improve lexer performance

This commit is contained in:
xensik 2022-01-28 15:58:41 +01:00
parent 7d643a9ae4
commit 8b62635131
45 changed files with 13985 additions and 14377 deletions

View File

@ -28,6 +28,8 @@
{ {
#include "h1.hpp" #include "h1.hpp"
namespace xsk::gsc::h1 { class lexer; } namespace xsk::gsc::h1 { class lexer; }
typedef void *yyscan_t;
#define YY_DECL xsk::gsc::h1::parser::symbol_type yylex(yyscan_t yyscanner, xsk::gsc::location& loc)
} }
%code top %code top
@ -125,6 +127,7 @@ xsk::gsc::h1::parser::symbol_type H1lex(xsk::gsc::h1::lexer& lexer);
%token MUL "*" %token MUL "*"
%token DIV "/" %token DIV "/"
%token MOD "%" %token MOD "%"
%token <std::string> FIELD "field"
%token <std::string> PATH "path" %token <std::string> PATH "path"
%token <std::string> IDENTIFIER "identifier" %token <std::string> IDENTIFIER "identifier"
%token <std::string> STRING "string literal" %token <std::string> STRING "string literal"
@ -703,6 +706,8 @@ expr_array
expr_field expr_field
: expr_object DOT expr_identifier : expr_object DOT expr_identifier
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); } { $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); }
| expr_object FIELD
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::make_unique<ast::expr_identifier>(@$, $2)); }
; ;
expr_size expr_size

View File

@ -125,6 +125,7 @@ xsk::gsc::h2::parser::symbol_type H2lex(xsk::gsc::h2::lexer& lexer);
%token MUL "*" %token MUL "*"
%token DIV "/" %token DIV "/"
%token MOD "%" %token MOD "%"
%token <std::string> FIELD "field"
%token <std::string> PATH "path" %token <std::string> PATH "path"
%token <std::string> IDENTIFIER "identifier" %token <std::string> IDENTIFIER "identifier"
%token <std::string> STRING "string literal" %token <std::string> STRING "string literal"
@ -703,6 +704,8 @@ expr_array
expr_field expr_field
: expr_object DOT expr_identifier : expr_object DOT expr_identifier
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); } { $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); }
| expr_object FIELD
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::make_unique<ast::expr_identifier>(@$, $2)); }
; ;
expr_size expr_size

View File

@ -124,6 +124,7 @@ xsk::gsc::iw5::parser::symbol_type IW5lex(xsk::gsc::iw5::lexer& lexer);
%token MUL "*" %token MUL "*"
%token DIV "/" %token DIV "/"
%token MOD "%" %token MOD "%"
%token <std::string> FIELD "field"
%token <std::string> PATH "path" %token <std::string> PATH "path"
%token <std::string> IDENTIFIER "identifier" %token <std::string> IDENTIFIER "identifier"
%token <std::string> STRING "string literal" %token <std::string> STRING "string literal"
@ -693,6 +694,8 @@ expr_array
expr_field expr_field
: expr_object DOT expr_identifier : expr_object DOT expr_identifier
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); } { $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); }
| expr_object FIELD
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::make_unique<ast::expr_identifier>(@$, $2)); }
; ;
expr_size expr_size

View File

@ -124,6 +124,7 @@ xsk::gsc::iw6::parser::symbol_type IW6lex(xsk::gsc::iw6::lexer& lexer);
%token MUL "*" %token MUL "*"
%token DIV "/" %token DIV "/"
%token MOD "%" %token MOD "%"
%token <std::string> FIELD "field"
%token <std::string> PATH "path" %token <std::string> PATH "path"
%token <std::string> IDENTIFIER "identifier" %token <std::string> IDENTIFIER "identifier"
%token <std::string> STRING "string literal" %token <std::string> STRING "string literal"
@ -693,6 +694,8 @@ expr_array
expr_field expr_field
: expr_object DOT expr_identifier : expr_object DOT expr_identifier
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); } { $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); }
| expr_object FIELD
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::make_unique<ast::expr_identifier>(@$, $2)); }
; ;
expr_size expr_size

View File

@ -124,6 +124,7 @@ xsk::gsc::iw7::parser::symbol_type IW7lex(xsk::gsc::iw7::lexer& lexer);
%token MUL "*" %token MUL "*"
%token DIV "/" %token DIV "/"
%token MOD "%" %token MOD "%"
%token <std::string> FIELD "field"
%token <std::string> PATH "path" %token <std::string> PATH "path"
%token <std::string> IDENTIFIER "identifier" %token <std::string> IDENTIFIER "identifier"
%token <std::string> STRING "string literal" %token <std::string> STRING "string literal"
@ -693,6 +694,8 @@ expr_array
expr_field expr_field
: expr_object DOT expr_identifier : expr_object DOT expr_identifier
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); } { $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); }
| expr_object FIELD
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::make_unique<ast::expr_identifier>(@$, $2)); }
; ;
expr_size expr_size

View File

@ -127,6 +127,7 @@ xsk::gsc::iw8::parser::symbol_type IW8lex(xsk::gsc::iw8::lexer& lexer);
%token MUL "*" %token MUL "*"
%token DIV "/" %token DIV "/"
%token MOD "%" %token MOD "%"
%token <std::string> FIELD "field"
%token <std::string> PATH "path" %token <std::string> PATH "path"
%token <std::string> IDENTIFIER "identifier" %token <std::string> IDENTIFIER "identifier"
%token <std::string> STRING "string literal" %token <std::string> STRING "string literal"
@ -719,6 +720,8 @@ expr_array
expr_field expr_field
: expr_object DOT expr_identifier : expr_object DOT expr_identifier
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); } { $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); }
| expr_object FIELD
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::make_unique<ast::expr_identifier>(@$, $2)); }
; ;
expr_size expr_size

View File

@ -125,6 +125,7 @@ xsk::gsc::s1::parser::symbol_type S1lex(xsk::gsc::s1::lexer& lexer);
%token MUL "*" %token MUL "*"
%token DIV "/" %token DIV "/"
%token MOD "%" %token MOD "%"
%token <std::string> FIELD "field"
%token <std::string> PATH "path" %token <std::string> PATH "path"
%token <std::string> IDENTIFIER "identifier" %token <std::string> IDENTIFIER "identifier"
%token <std::string> STRING "string literal" %token <std::string> STRING "string literal"
@ -703,6 +704,8 @@ expr_array
expr_field expr_field
: expr_object DOT expr_identifier : expr_object DOT expr_identifier
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); } { $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); }
| expr_object FIELD
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::make_unique<ast::expr_identifier>(@$, $2)); }
; ;
expr_size expr_size

View File

@ -125,6 +125,7 @@ xsk::gsc::s2::parser::symbol_type S2lex(xsk::gsc::s2::lexer& lexer);
%token MUL "*" %token MUL "*"
%token DIV "/" %token DIV "/"
%token MOD "%" %token MOD "%"
%token <std::string> FIELD "field"
%token <std::string> PATH "path" %token <std::string> PATH "path"
%token <std::string> IDENTIFIER "identifier" %token <std::string> IDENTIFIER "identifier"
%token <std::string> STRING "string literal" %token <std::string> STRING "string literal"
@ -703,6 +704,8 @@ expr_array
expr_field expr_field
: expr_object DOT expr_identifier : expr_object DOT expr_identifier
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); } { $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); }
| expr_object FIELD
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::make_unique<ast::expr_identifier>(@$, $2)); }
; ;
expr_size expr_size

View File

@ -127,6 +127,7 @@ xsk::gsc::s4::parser::symbol_type S4lex(xsk::gsc::s4::lexer& lexer);
%token MUL "*" %token MUL "*"
%token DIV "/" %token DIV "/"
%token MOD "%" %token MOD "%"
%token <std::string> FIELD "field"
%token <std::string> PATH "path" %token <std::string> PATH "path"
%token <std::string> IDENTIFIER "identifier" %token <std::string> IDENTIFIER "identifier"
%token <std::string> STRING "string literal" %token <std::string> STRING "string literal"
@ -719,6 +720,8 @@ expr_array
expr_field expr_field
: expr_object DOT expr_identifier : expr_object DOT expr_identifier
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); } { $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::move($3)); }
| expr_object FIELD
{ $$ = std::make_unique<ast::expr_field>(@$, std::move($1), std::make_unique<ast::expr_identifier>(@$, $2)); }
; ;
expr_size expr_size

View File

@ -60,7 +60,6 @@ enum class keyword
KW_true, KW_true,
KW_false, KW_false,
KW_undefined, KW_undefined,
KW_dotsize,
KW_game, KW_game,
KW_self, KW_self,
KW_anim, KW_anim,
@ -68,9 +67,9 @@ enum class keyword
KW_INVALID, KW_INVALID,
}; };
buffer::buffer() : size(1024), length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(size)); data = static_cast<char*>(std::malloc(max_buf_size));
} }
buffer::~buffer() buffer::~buffer()
@ -80,18 +79,9 @@ buffer::~buffer()
bool buffer::push(char c) bool buffer::push(char c)
{ {
if(length >= size) if(length >= max_buf_size)
{ return false;
auto nsize = size * 2;
auto ndata = reinterpret_cast<char*>(std::malloc(nsize));
if(!ndata) return false;
std::memmove(ndata, data, size);
std::free(data);
size = nsize;
data = ndata;
}
data[length++] = c; data[length++] = c;
return true; return true;
} }
@ -139,7 +129,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : in_dev_state_(false), loc_(xsk::gsc::location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(xsk::gsc::location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -185,24 +175,28 @@ void lexer::restrict_header(const xsk::gsc::location& loc)
auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type
{ {
buffer_.length = 0; buffer_.length = 0;
state_ = state::start;
loc_.step(); loc_.step();
while (true) while (true)
{ {
if (reader_.state == reader::end) const auto& state = reader_.state;
{ auto& last = reader_.last_byte;
if (in_dev_state_) auto& curr = reader_.current_byte;
throw h1::parser::syntax_error(loc_, "unmatched devblock start ('/#')"); auto path = false;
if(header_top_ > 0) if (state == reader::end)
{
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (header_top_ > 0)
pop_header(); pop_header();
else else
return h1::parser::make_H1EOF(loc_); return h1::parser::make_H1EOF(loc_);
} }
reader_.advance(); reader_.advance();
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
switch (last) switch (last)
{ {
@ -216,7 +210,7 @@ auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type
loc_.step(); loc_.step();
continue; continue;
case '/': case '/':
if(reader_.state == reader::end || (curr != '/' && curr != '*' && curr != '#' && curr != '=')) if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return h1::parser::make_DIV(loc_); return h1::parser::make_DIV(loc_);
reader_.advance(); reader_.advance();
@ -226,29 +220,27 @@ auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type
if (last == '#') if (last == '#')
{ {
if (in_dev_state_) if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
if (mode_ == xsk::gsc::build::dev)
{ {
throw h1::parser::syntax_error(loc_, "cannot recurse devblock ('/#')"); indev_ = true;
}
else if (mode_ == xsk::gsc::build::dev)
{
in_dev_state_ = true;
return h1::parser::make_DEVBEGIN(loc_); return h1::parser::make_DEVBEGIN(loc_);
} }
else else
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched devblock start ('/#')");
throw h1::parser::syntax_error(loc_, "unmatched devblock start ('/#')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '#' && curr == '/') else if (last == '#' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -262,16 +254,15 @@ auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched multiline comment start ('/*')");
throw h1::parser::syntax_error(loc_, "unmatched multiline comment start ('/*')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '*' && curr == '/') else if (last == '*' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -284,7 +275,7 @@ auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end || curr == '\n') if (state == reader::end || curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -294,69 +285,43 @@ auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type
case '#': case '#':
if (curr == '/') if (curr == '/')
{ {
if (!in_dev_state_) if (!indev_)
throw h1::parser::syntax_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
in_dev_state_ = false; indev_ = false;
reader_.advance(); reader_.advance();
return h1::parser::make_DEVEND(loc_); return h1::parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
while (reader_.state == reader::ok)
{
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
}
{ if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
if (auto len = buffer_.length; len == 4 || len == 7) throw comp_error(loc_, "unterminated preprocessor directive ('#')");
{
auto data = buffer_.data;
auto color = true;
for (auto i = 1; i < len; i++)
{
if ((data[i] < 48 || data[i] > 57) && (data[i] < 65 || data[i] > 70) && (data[i] < 97 || data[i] > 102))
{
color = false;
break;
}
}
if (color) return h1::parser::make_COLOR(std::string(++data, --len), loc_); state_ = state::preprocessor;
} goto lex_name;
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key == keyword::KW_INVALID)
throw h1::parser::syntax_error(loc_, utils::string::va("unknown preprocessor directive ('%s')", "#"));
if (keyword_is_token(key))
return keyword_token(key);
// call preprocessor(key);
}
continue;
case '*': case '*':
if (reader_.state == reader::end || (curr != '/' && curr != '=')) if (curr != '/' && curr != '=')
return h1::parser::make_MUL(loc_); return h1::parser::make_MUL(loc_);
reader_.advance(); reader_.advance();
if (curr == '/') if (last == '=')
throw h1::parser::syntax_error(loc_, "unmatched multiline comment end ('*/')");
return h1::parser::make_ASSIGN_MUL(loc_); return h1::parser::make_ASSIGN_MUL(loc_);
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
case '"': case '"':
case '\'': state_ = state::string;
return read_string(last, false); goto lex_string;
case '.': case '.':
return read_dotsize(); reader_.advance();
if(state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
case '(': case '(':
return h1::parser::make_LPAREN(loc_); return h1::parser::make_LPAREN(loc_);
case ')': case ')':
@ -376,6 +341,7 @@ auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type
case ':': case ':':
if (curr != ':') if (curr != ':')
return h1::parser::make_COLON(loc_); return h1::parser::make_COLON(loc_);
reader_.advance(); reader_.advance();
return h1::parser::make_DOUBLECOLON(loc_); return h1::parser::make_DOUBLECOLON(loc_);
case '?': case '?':
@ -383,43 +349,70 @@ auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type
case '=': case '=':
if (curr != '=') if (curr != '=')
return h1::parser::make_ASSIGN(loc_); return h1::parser::make_ASSIGN(loc_);
reader_.advance(); reader_.advance();
return h1::parser::make_EQUALITY(loc_); return h1::parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return h1::parser::make_ADD(loc_); return h1::parser::make_ADD(loc_);
reader_.advance(); reader_.advance();
return (last == '+') ? h1::parser::make_INCREMENT(loc_) : h1::parser::make_ASSIGN_ADD(loc_);
if (last == '+')
return h1::parser::make_INCREMENT(loc_);
return h1::parser::make_ASSIGN_ADD(loc_);
case '-': case '-':
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return h1::parser::make_SUB(loc_); return h1::parser::make_SUB(loc_);
reader_.advance(); reader_.advance();
return (last == '-') ? h1::parser::make_DECREMENT(loc_) : h1::parser::make_ASSIGN_SUB(loc_);
if (last == '-')
return h1::parser::make_DECREMENT(loc_);
return h1::parser::make_ASSIGN_SUB(loc_);
case '%': case '%':
if (curr != '=') if (curr != '=')
return h1::parser::make_MOD(loc_); return h1::parser::make_MOD(loc_);
reader_.advance(); reader_.advance();
return h1::parser::make_ASSIGN_MOD(loc_); return h1::parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return h1::parser::make_BITWISE_OR(loc_); return h1::parser::make_BITWISE_OR(loc_);
reader_.advance(); reader_.advance();
return (last == '|') ? h1::parser::make_OR(loc_) : h1::parser::make_ASSIGN_BW_OR(loc_);
if (last == '|')
return h1::parser::make_OR(loc_);
return h1::parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"' && curr != '\'')
return h1::parser::make_BITWISE_AND(loc_); return h1::parser::make_BITWISE_AND(loc_);
reader_.advance(); reader_.advance();
if (last == '"' || last == '\'')
return read_string(last, true); if (last == '&')
return (last == '&') ? h1::parser::make_AND(loc_) : h1::parser::make_ASSIGN_BW_AND(loc_); return h1::parser::make_AND(loc_);
if (last == '=')
return h1::parser::make_ASSIGN_BW_AND(loc_);
state_ = state::localize;
goto lex_string;
case '^': case '^':
if (curr != '=') if (curr != '=')
return h1::parser::make_BITWISE_EXOR(loc_); return h1::parser::make_BITWISE_EXOR(loc_);
reader_.advance(); reader_.advance();
return h1::parser::make_ASSIGN_BW_EXOR(loc_); return h1::parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return h1::parser::make_NOT(loc_); return h1::parser::make_NOT(loc_);
reader_.advance(); reader_.advance();
return h1::parser::make_INEQUALITY(loc_); return h1::parser::make_INEQUALITY(loc_);
case '~': case '~':
@ -427,88 +420,180 @@ auto lexer::lex() -> xsk::gsc::h1::parser::symbol_type
case '<': case '<':
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return h1::parser::make_LESS(loc_); return h1::parser::make_LESS(loc_);
reader_.advance(); reader_.advance();
if (last == '<') if (last == '=')
{
reader_.advance();
return (last == '=') ? h1::parser::make_ASSIGN_LSHIFT(loc_) : h1::parser::make_LSHIFT(loc_);
}
return h1::parser::make_LESS_EQUAL(loc_); return h1::parser::make_LESS_EQUAL(loc_);
if (curr != '=')
return h1::parser::make_LSHIFT(loc_);
reader_.advance();
return h1::parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return h1::parser::make_GREATER(loc_); return h1::parser::make_GREATER(loc_);
reader_.advance(); reader_.advance();
if (last == '>')
{ if (last == '=')
reader_.advance();
return (last == '=') ? h1::parser::make_ASSIGN_RSHIFT(loc_) : h1::parser::make_RSHIFT(loc_);
}
return h1::parser::make_GREATER_EQUAL(loc_); return h1::parser::make_GREATER_EQUAL(loc_);
if (curr != '=')
return h1::parser::make_RSHIFT(loc_);
reader_.advance();
return h1::parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number:
if (last >= '0' && last <= '9') if (last >= '0' && last <= '9')
return lexer::read_number(last); goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z') else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
return lexer::read_word(last); goto lex_name;
throw h1::parser::syntax_error(loc_, utils::string::va("bad token: \'%c\'", last)); throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
} }
}
}
auto lexer::read_string(char quote, bool localize) -> xsk::gsc::h1::parser::symbol_type lex_string:
{ if (state == reader::end)
if (localize) throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
reader_.advance(); reader_.advance();
if (last == '\n') while (true)
throw h1::parser::syntax_error(loc_, "unterminated string");
if (last == '\\') // process scapes
{ {
// TODO: if (last == '"')
}
if (last != '\\' && curr == quote)
break; break;
if (!buffer_.push(curr)) if (last == '\n')
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "unterminated string literal");
}
if (reader_.state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\')
{ {
throw h1::parser::syntax_error(loc_, utils::string::va("unmatched string start ('%s')", (quote == '"') ? "\"" : "\\'")); char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
} }
if (localize) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
else if (!buffer_.push(last))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if (state_ == state::localize)
return h1::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_); return h1::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
return h1::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_); return h1::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
}
auto lexer::read_number(char first) -> xsk::gsc::h1::parser::symbol_type lex_name:
{ buffer_.push(last);
if (first == '.')
{
buffer_.push(first);
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (last == '\\')
throw comp_error(loc_, "invalid path '\\\\'");
path = true;
if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
}
else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if(state_ == state::field)
{
if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return h1::parser::make_SIZE(loc_);
}
return h1::parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
// TODO: call preprocessor(key);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start;
continue;
}
else
{
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw comp_error(loc_, "invalid path end '\\'");
//return h1::parser::make_PATH(xsk::gsc::h1::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return h1::parser::make_PATH(std::string(buffer_.data, buffer_.length), loc_);
}
//return h1::parser::make_IDENTIFIER(xsk::gsc::h1::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return h1::parser::make_IDENTIFIER(std::string(buffer_.data, buffer_.length), loc_);
}
lex_number:
if (state_ == state::field)
buffer_.push('.');
if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto flt = 0;
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.')) if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw h1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if ((curr == '.' || curr == 'f') && last == '\'') if ((curr == '.' || curr == 'f') && last == '\'')
throw h1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if (curr == '\'') if (curr == '\'')
{ {
@ -516,55 +601,41 @@ auto lexer::read_number(char first) -> xsk::gsc::h1::parser::symbol_type
continue; continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58))) if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'')
throw h1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
auto data = buffer_.data; if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
auto len = buffer_.length; throw comp_error(loc_, "invalid number literal");
auto dot = true;
for (auto i = 1; i < len; i++) if (state_ == state::field || dot || flt)
{ return h1::parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
if (data[i] == '.')
{ return h1::parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
if (dot)
throw h1::parser::syntax_error(loc_, "invalid number '.'");
} }
if (data[i] == 'f' && i != len - 1) else if (curr == 'o')
throw h1::parser::syntax_error(loc_, "invalid number 'f'");
}
return h1::parser::make_FLOAT(std::string(data, len), loc_);
}
else
{ {
auto curr = reader_.current_byte;
if (first == '0' && curr == 'o')
{
buffer_.push(first);
buffer_.push('o');
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'o')) if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw h1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (curr == 'o' && last == '\'')
throw h1::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -581,30 +652,24 @@ auto lexer::read_number(char first) -> xsk::gsc::h1::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length <= 0)
throw h1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (buffer_.length < 3) return h1::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
throw error("gsc lexer: invalid octal literal!");
return h1::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data + 2), loc_);
} }
else if (first == '0' && curr == 'b') else if (curr == 'b')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('b'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'b')) if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw h1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (curr == 'b' && last == '\'')
throw h1::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -616,35 +681,29 @@ auto lexer::read_number(char first) -> xsk::gsc::h1::parser::symbol_type
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw h1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid binary literal!");
return h1::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_); return h1::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
} }
else if (first == '0' && curr == 'x') else if (curr == 'x')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('x'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'x')) if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw h1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (curr == 'x' && last == '\'')
throw h1::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -661,161 +720,13 @@ auto lexer::read_number(char first) -> xsk::gsc::h1::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw h1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid hexadecimal literal!");
return h1::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_); return h1::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
} }
else // cant get here!
{
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw h1::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw h1::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw h1::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = false;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw h1::parser::syntax_error(loc_, "invalid number '.'");
dot = true;
}
if (data[i] == 'f' && i != len - 1)
throw h1::parser::syntax_error(loc_, "invalid number 'f'");
}
if (dot || data[len - 1] == 'f')
return h1::parser::make_FLOAT(std::string(data, len), loc_);
return h1::parser::make_INTEGER(std::string(data, len), loc_);
}
}
}
auto lexer::read_word(char first) -> xsk::gsc::h1::parser::symbol_type
{
auto path = false;
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto curr = reader_.current_byte;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (reader_.last_byte == '\\')
throw h1::parser::syntax_error(loc_, "invalid path '\\\\'");
path = true;
curr = '/';
}
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw h1::parser::syntax_error(loc_, "invalid path end '\\'");
return h1::parser::make_PATH(xsk::gsc::h1::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
return h1::parser::make_IDENTIFIER(xsk::gsc::h1::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
auto lexer::read_dotsize() -> xsk::gsc::h1::parser::symbol_type
{
auto curr = reader_.current_byte;
if (curr > 47 && curr < 58)
{
return lexer::read_number('.');
}
else if (curr == '_' || curr > 64 && curr < 91 || curr > 96 && curr < 123)
{
reader save;
save.state = reader_.state;
save.bytes_remaining = reader_.bytes_remaining;
save.buffer_pos = reader_.buffer_pos;
save.last_byte = reader_.last_byte;
save.current_byte = reader_.current_byte;
while (reader_.state == reader::ok)
{
curr = reader_.current_byte;
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return h1::parser::make_SIZE(loc_);
}
reader_.state = save.state;
reader_.bytes_remaining = save.bytes_remaining;
reader_.buffer_pos = save.buffer_pos;
reader_.last_byte = save.last_byte;
reader_.current_byte = save.current_byte;
return h1::parser::make_DOT(loc_);
}
else return h1::parser::make_DOT(loc_);
} }
auto lexer::keyword_token(keyword k) -> xsk::gsc::h1::parser::symbol_type auto lexer::keyword_token(keyword k) -> xsk::gsc::h1::parser::symbol_type
@ -890,8 +801,6 @@ auto lexer::keyword_token(keyword k) -> xsk::gsc::h1::parser::symbol_type
return h1::parser::make_FALSE(loc_); return h1::parser::make_FALSE(loc_);
case keyword::KW_undefined: case keyword::KW_undefined:
return h1::parser::make_UNDEFINED(loc_); return h1::parser::make_UNDEFINED(loc_);
case keyword::KW_dotsize:
return h1::parser::make_SIZE(loc_);
case keyword::KW_game: case keyword::KW_game:
return h1::parser::make_GAME(loc_); return h1::parser::make_GAME(loc_);
case keyword::KW_self: case keyword::KW_self:
@ -980,7 +889,6 @@ std::unordered_map<std::string_view, keyword> lexer::keywords
{ "true", keyword::KW_true }, { "true", keyword::KW_true },
{ "false", keyword::KW_false }, { "false", keyword::KW_false },
{ "undefined", keyword::KW_undefined }, { "undefined", keyword::KW_undefined },
{ ".size", keyword::KW_dotsize },
{ "game", keyword::KW_game }, { "game", keyword::KW_game },
{ "self", keyword::KW_self }, { "self", keyword::KW_self },
{ "anim", keyword::KW_anim }, { "anim", keyword::KW_anim },

View File

@ -12,11 +12,12 @@ namespace xsk::gsc::h1
enum class keyword; enum class keyword;
constexpr size_t max_buf_size = 0x2000;
struct buffer struct buffer
{ {
int size;
int length;
char* data; char* data;
int length;
buffer(); buffer();
~buffer(); ~buffer();
@ -25,13 +26,13 @@ struct buffer
struct reader struct reader
{ {
enum states { end, ok }; enum state_type : std::uint8_t { end, ok };
states state;
int bytes_remaining;
const char* buffer_pos; const char* buffer_pos;
std::uint32_t bytes_remaining;
char last_byte; char last_byte;
char current_byte; char current_byte;
state_type state;
reader(); reader();
@ -47,15 +48,17 @@ struct reader
class lexer class lexer
{ {
private: enum class state : std::uint8_t { start, string, localize, field, preprocessor };
reader reader_; reader reader_;
buffer buffer_; buffer buffer_;
location loc_; location loc_;
build mode_; build mode_;
bool in_dev_state_;
std::stack<location> locs_; std::stack<location> locs_;
std::stack<reader> readers_; std::stack<reader> readers_;
std::uint32_t header_top_; std::uint32_t header_top_;
state state_;
bool indev_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -65,10 +68,6 @@ public:
void restrict_header(const xsk::gsc::location& loc); void restrict_header(const xsk::gsc::location& loc);
private: private:
auto read_string(char quote, bool localize) -> xsk::gsc::h1::parser::symbol_type;
auto read_number(char first) -> xsk::gsc::h1::parser::symbol_type;
auto read_word(char first) -> xsk::gsc::h1::parser::symbol_type;
auto read_dotsize() -> xsk::gsc::h1::parser::symbol_type;
auto keyword_token(keyword k) -> xsk::gsc::h1::parser::symbol_type; auto keyword_token(keyword k) -> xsk::gsc::h1::parser::symbol_type;
static auto keyword_is_token(keyword k) -> bool; static auto keyword_is_token(keyword k) -> bool;
static auto get_keyword(std::string_view str) -> keyword; static auto get_keyword(std::string_view str) -> keyword;

File diff suppressed because it is too large Load Diff

View File

@ -49,8 +49,10 @@
#include "h1.hpp" #include "h1.hpp"
namespace xsk::gsc::h1 { class lexer; } namespace xsk::gsc::h1 { class lexer; }
typedef void *yyscan_t;
#define YY_DECL xsk::gsc::h1::parser::symbol_type yylex(yyscan_t yyscanner, xsk::gsc::location& loc)
#line 54 "parser.hpp" #line 56 "parser.hpp"
# include <cassert> # include <cassert>
# include <cstdlib> // std::abort # include <cstdlib> // std::abort
@ -193,7 +195,7 @@ namespace xsk::gsc::h1 { class lexer; }
#line 13 "parser.ypp" #line 13 "parser.ypp"
namespace xsk { namespace gsc { namespace h1 { namespace xsk { namespace gsc { namespace h1 {
#line 197 "parser.hpp" #line 199 "parser.hpp"
@ -617,6 +619,7 @@ namespace xsk { namespace gsc { namespace h1 {
// stmt_while // stmt_while
char dummy66[sizeof (ast::stmt_while::ptr)]; char dummy66[sizeof (ast::stmt_while::ptr)];
// "field"
// "path" // "path"
// "identifier" // "identifier"
// "string literal" // "string literal"
@ -762,22 +765,23 @@ namespace xsk { namespace gsc { namespace h1 {
MUL = 86, // "*" MUL = 86, // "*"
DIV = 87, // "/" DIV = 87, // "/"
MOD = 88, // "%" MOD = 88, // "%"
PATH = 89, // "path" FIELD = 89, // "field"
IDENTIFIER = 90, // "identifier" PATH = 90, // "path"
STRING = 91, // "string literal" IDENTIFIER = 91, // "identifier"
ISTRING = 92, // "localized string" STRING = 92, // "string literal"
COLOR = 93, // "color" ISTRING = 93, // "localized string"
FLOAT = 94, // "float" COLOR = 94, // "color"
INTEGER = 95, // "integer" FLOAT = 95, // "float"
ADD_ARRAY = 96, // ADD_ARRAY INTEGER = 96, // "integer"
THEN = 97, // THEN ADD_ARRAY = 97, // ADD_ARRAY
TERN = 98, // TERN THEN = 98, // THEN
NEG = 99, // NEG TERN = 99, // TERN
ANIMREF = 100, // ANIMREF NEG = 100, // NEG
PREINC = 101, // PREINC ANIMREF = 101, // ANIMREF
PREDEC = 102, // PREDEC PREINC = 102, // PREINC
POSTINC = 103, // POSTINC PREDEC = 103, // PREDEC
POSTDEC = 104 // POSTDEC POSTINC = 104, // POSTINC
POSTDEC = 105 // POSTDEC
}; };
/// Backward compatibility alias (Bison 3.6). /// Backward compatibility alias (Bison 3.6).
typedef token_kind_type yytokentype; typedef token_kind_type yytokentype;
@ -794,7 +798,7 @@ namespace xsk { namespace gsc { namespace h1 {
{ {
enum symbol_kind_type enum symbol_kind_type
{ {
YYNTOKENS = 105, ///< Number of tokens. YYNTOKENS = 106, ///< Number of tokens.
S_YYEMPTY = -2, S_YYEMPTY = -2,
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
@ -885,103 +889,104 @@ namespace xsk { namespace gsc { namespace h1 {
S_MUL = 86, // "*" S_MUL = 86, // "*"
S_DIV = 87, // "/" S_DIV = 87, // "/"
S_MOD = 88, // "%" S_MOD = 88, // "%"
S_PATH = 89, // "path" S_FIELD = 89, // "field"
S_IDENTIFIER = 90, // "identifier" S_PATH = 90, // "path"
S_STRING = 91, // "string literal" S_IDENTIFIER = 91, // "identifier"
S_ISTRING = 92, // "localized string" S_STRING = 92, // "string literal"
S_COLOR = 93, // "color" S_ISTRING = 93, // "localized string"
S_FLOAT = 94, // "float" S_COLOR = 94, // "color"
S_INTEGER = 95, // "integer" S_FLOAT = 95, // "float"
S_ADD_ARRAY = 96, // ADD_ARRAY S_INTEGER = 96, // "integer"
S_THEN = 97, // THEN S_ADD_ARRAY = 97, // ADD_ARRAY
S_TERN = 98, // TERN S_THEN = 98, // THEN
S_NEG = 99, // NEG S_TERN = 99, // TERN
S_ANIMREF = 100, // ANIMREF S_NEG = 100, // NEG
S_PREINC = 101, // PREINC S_ANIMREF = 101, // ANIMREF
S_PREDEC = 102, // PREDEC S_PREINC = 102, // PREINC
S_POSTINC = 103, // POSTINC S_PREDEC = 103, // PREDEC
S_POSTDEC = 104, // POSTDEC S_POSTINC = 104, // POSTINC
S_YYACCEPT = 105, // $accept S_POSTDEC = 105, // POSTDEC
S_root = 106, // root S_YYACCEPT = 106, // $accept
S_program = 107, // program S_root = 107, // root
S_inline = 108, // inline S_program = 108, // program
S_include = 109, // include S_inline = 109, // inline
S_declaration = 110, // declaration S_include = 110, // include
S_decl_usingtree = 111, // decl_usingtree S_declaration = 111, // declaration
S_decl_constant = 112, // decl_constant S_decl_usingtree = 112, // decl_usingtree
S_decl_thread = 113, // decl_thread S_decl_constant = 113, // decl_constant
S_stmt = 114, // stmt S_decl_thread = 114, // decl_thread
S_stmt_dev = 115, // stmt_dev S_stmt = 115, // stmt
S_stmt_block = 116, // stmt_block S_stmt_dev = 116, // stmt_dev
S_stmt_list = 117, // stmt_list S_stmt_block = 117, // stmt_block
S_stmt_expr = 118, // stmt_expr S_stmt_list = 118, // stmt_list
S_stmt_call = 119, // stmt_call S_stmt_expr = 119, // stmt_expr
S_stmt_assign = 120, // stmt_assign S_stmt_call = 120, // stmt_call
S_stmt_endon = 121, // stmt_endon S_stmt_assign = 121, // stmt_assign
S_stmt_notify = 122, // stmt_notify S_stmt_endon = 122, // stmt_endon
S_stmt_wait = 123, // stmt_wait S_stmt_notify = 123, // stmt_notify
S_stmt_waittill = 124, // stmt_waittill S_stmt_wait = 124, // stmt_wait
S_stmt_waittillmatch = 125, // stmt_waittillmatch S_stmt_waittill = 125, // stmt_waittill
S_stmt_waittillframeend = 126, // stmt_waittillframeend S_stmt_waittillmatch = 126, // stmt_waittillmatch
S_stmt_waitframe = 127, // stmt_waitframe S_stmt_waittillframeend = 127, // stmt_waittillframeend
S_stmt_if = 128, // stmt_if S_stmt_waitframe = 128, // stmt_waitframe
S_stmt_ifelse = 129, // stmt_ifelse S_stmt_if = 129, // stmt_if
S_stmt_while = 130, // stmt_while S_stmt_ifelse = 130, // stmt_ifelse
S_stmt_dowhile = 131, // stmt_dowhile S_stmt_while = 131, // stmt_while
S_stmt_for = 132, // stmt_for S_stmt_dowhile = 132, // stmt_dowhile
S_stmt_foreach = 133, // stmt_foreach S_stmt_for = 133, // stmt_for
S_stmt_switch = 134, // stmt_switch S_stmt_foreach = 134, // stmt_foreach
S_stmt_case = 135, // stmt_case S_stmt_switch = 135, // stmt_switch
S_stmt_default = 136, // stmt_default S_stmt_case = 136, // stmt_case
S_stmt_break = 137, // stmt_break S_stmt_default = 137, // stmt_default
S_stmt_continue = 138, // stmt_continue S_stmt_break = 138, // stmt_break
S_stmt_return = 139, // stmt_return S_stmt_continue = 139, // stmt_continue
S_stmt_breakpoint = 140, // stmt_breakpoint S_stmt_return = 140, // stmt_return
S_stmt_prof_begin = 141, // stmt_prof_begin S_stmt_breakpoint = 141, // stmt_breakpoint
S_stmt_prof_end = 142, // stmt_prof_end S_stmt_prof_begin = 142, // stmt_prof_begin
S_expr = 143, // expr S_stmt_prof_end = 143, // stmt_prof_end
S_expr_or_empty = 144, // expr_or_empty S_expr = 144, // expr
S_expr_assign = 145, // expr_assign S_expr_or_empty = 145, // expr_or_empty
S_expr_increment = 146, // expr_increment S_expr_assign = 146, // expr_assign
S_expr_decrement = 147, // expr_decrement S_expr_increment = 147, // expr_increment
S_expr_ternary = 148, // expr_ternary S_expr_decrement = 148, // expr_decrement
S_expr_binary = 149, // expr_binary S_expr_ternary = 149, // expr_ternary
S_expr_primitive = 150, // expr_primitive S_expr_binary = 150, // expr_binary
S_expr_complement = 151, // expr_complement S_expr_primitive = 151, // expr_primitive
S_expr_not = 152, // expr_not S_expr_complement = 152, // expr_complement
S_expr_call = 153, // expr_call S_expr_not = 153, // expr_not
S_expr_method = 154, // expr_method S_expr_call = 154, // expr_call
S_expr_function = 155, // expr_function S_expr_method = 155, // expr_method
S_expr_pointer = 156, // expr_pointer S_expr_function = 156, // expr_function
S_expr_add_array = 157, // expr_add_array S_expr_pointer = 157, // expr_pointer
S_expr_parameters = 158, // expr_parameters S_expr_add_array = 158, // expr_add_array
S_expr_arguments = 159, // expr_arguments S_expr_parameters = 159, // expr_parameters
S_expr_arguments_no_empty = 160, // expr_arguments_no_empty S_expr_arguments = 160, // expr_arguments
S_expr_reference = 161, // expr_reference S_expr_arguments_no_empty = 161, // expr_arguments_no_empty
S_expr_array = 162, // expr_array S_expr_reference = 162, // expr_reference
S_expr_field = 163, // expr_field S_expr_array = 163, // expr_array
S_expr_size = 164, // expr_size S_expr_field = 164, // expr_field
S_expr_paren = 165, // expr_paren S_expr_size = 165, // expr_size
S_expr_object = 166, // expr_object S_expr_paren = 166, // expr_paren
S_expr_thisthread = 167, // expr_thisthread S_expr_object = 167, // expr_object
S_expr_empty_array = 168, // expr_empty_array S_expr_thisthread = 168, // expr_thisthread
S_expr_undefined = 169, // expr_undefined S_expr_empty_array = 169, // expr_empty_array
S_expr_game = 170, // expr_game S_expr_undefined = 170, // expr_undefined
S_expr_self = 171, // expr_self S_expr_game = 171, // expr_game
S_expr_anim = 172, // expr_anim S_expr_self = 172, // expr_self
S_expr_level = 173, // expr_level S_expr_anim = 173, // expr_anim
S_expr_animation = 174, // expr_animation S_expr_level = 174, // expr_level
S_expr_animtree = 175, // expr_animtree S_expr_animation = 175, // expr_animation
S_expr_identifier = 176, // expr_identifier S_expr_animtree = 176, // expr_animtree
S_expr_path = 177, // expr_path S_expr_identifier = 177, // expr_identifier
S_expr_istring = 178, // expr_istring S_expr_path = 178, // expr_path
S_expr_string = 179, // expr_string S_expr_istring = 179, // expr_istring
S_expr_color = 180, // expr_color S_expr_string = 180, // expr_string
S_expr_vector = 181, // expr_vector S_expr_color = 181, // expr_color
S_expr_float = 182, // expr_float S_expr_vector = 182, // expr_vector
S_expr_integer = 183, // expr_integer S_expr_float = 183, // expr_float
S_expr_false = 184, // expr_false S_expr_integer = 184, // expr_integer
S_expr_true = 185 // expr_true S_expr_false = 185, // expr_false
S_expr_true = 186 // expr_true
}; };
}; };
@ -1294,6 +1299,7 @@ namespace xsk { namespace gsc { namespace h1 {
value.move< ast::stmt_while::ptr > (std::move (that.value)); value.move< ast::stmt_while::ptr > (std::move (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2563,6 +2569,7 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > (); value.template destroy< ast::stmt_while::ptr > ();
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2679,7 +2686,7 @@ switch (yykind)
: super_type(token_type (tok), v, l) : super_type(token_type (tok), v, l)
#endif #endif
{ {
H1_ASSERT ((token::PATH <= tok && tok <= token::INTEGER)); H1_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
} }
}; };
@ -4064,6 +4071,21 @@ switch (yykind)
return symbol_type (token::MOD, l); return symbol_type (token::MOD, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
@ -4648,7 +4670,7 @@ switch (yykind)
/// Constants. /// Constants.
enum enum
{ {
yylast_ = 2191, ///< Last index in yytable_. yylast_ = 2291, ///< Last index in yytable_.
yynnts_ = 81, ///< Number of nonterminal symbols. yynnts_ = 81, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number. yyfinal_ = 21 ///< Termination state number.
}; };
@ -4952,6 +4974,7 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value)); value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5267,6 +5290,7 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value)); value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5340,7 +5364,7 @@ switch (yykind)
#line 13 "parser.ypp" #line 13 "parser.ypp"
} } } // xsk::gsc::h1 } } } // xsk::gsc::h1
#line 5344 "parser.hpp" #line 5368 "parser.hpp"

View File

@ -68,9 +68,9 @@ enum class keyword
KW_INVALID, KW_INVALID,
}; };
buffer::buffer() : size(1024), length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(size)); data = static_cast<char*>(std::malloc(max_buf_size));
} }
buffer::~buffer() buffer::~buffer()
@ -80,18 +80,9 @@ buffer::~buffer()
bool buffer::push(char c) bool buffer::push(char c)
{ {
if(length >= size) if(length >= max_buf_size)
{ return false;
auto nsize = size * 2;
auto ndata = reinterpret_cast<char*>(std::malloc(nsize));
if(!ndata) return false;
std::memmove(ndata, data, size);
std::free(data);
size = nsize;
data = ndata;
}
data[length++] = c; data[length++] = c;
return true; return true;
} }
@ -139,7 +130,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : in_dev_state_(false), loc_(xsk::gsc::location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(xsk::gsc::location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -185,24 +176,28 @@ void lexer::restrict_header(const xsk::gsc::location& loc)
auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type
{ {
buffer_.length = 0; buffer_.length = 0;
state_ = state::start;
loc_.step(); loc_.step();
while (true) while (true)
{ {
if (reader_.state == reader::end) const auto& state = reader_.state;
{ auto& last = reader_.last_byte;
if (in_dev_state_) auto& curr = reader_.current_byte;
throw h2::parser::syntax_error(loc_, "unmatched devblock start ('/#')"); auto path = false;
if(header_top_ > 0) if (state == reader::end)
{
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (header_top_ > 0)
pop_header(); pop_header();
else else
return h2::parser::make_H2EOF(loc_); return h2::parser::make_H2EOF(loc_);
} }
reader_.advance(); reader_.advance();
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
switch (last) switch (last)
{ {
@ -216,7 +211,7 @@ auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type
loc_.step(); loc_.step();
continue; continue;
case '/': case '/':
if(reader_.state == reader::end || (curr != '/' && curr != '*' && curr != '#' && curr != '=')) if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return h2::parser::make_DIV(loc_); return h2::parser::make_DIV(loc_);
reader_.advance(); reader_.advance();
@ -226,29 +221,27 @@ auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type
if (last == '#') if (last == '#')
{ {
if (in_dev_state_) if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
if (mode_ == xsk::gsc::build::dev)
{ {
throw h2::parser::syntax_error(loc_, "cannot recurse devblock ('/#')"); indev_ = true;
}
else if (mode_ == xsk::gsc::build::dev)
{
in_dev_state_ = true;
return h2::parser::make_DEVBEGIN(loc_); return h2::parser::make_DEVBEGIN(loc_);
} }
else else
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched devblock start ('/#')");
throw h2::parser::syntax_error(loc_, "unmatched devblock start ('/#')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '#' && curr == '/') else if (last == '#' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -262,16 +255,15 @@ auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched multiline comment start ('/*')");
throw h2::parser::syntax_error(loc_, "unmatched multiline comment start ('/*')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '*' && curr == '/') else if (last == '*' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -284,7 +276,7 @@ auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end || curr == '\n') if (state == reader::end || curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -294,69 +286,43 @@ auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type
case '#': case '#':
if (curr == '/') if (curr == '/')
{ {
if (!in_dev_state_) if (!indev_)
throw h2::parser::syntax_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
in_dev_state_ = false; indev_ = false;
reader_.advance(); reader_.advance();
return h2::parser::make_DEVEND(loc_); return h2::parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
while (reader_.state == reader::ok)
{
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
}
{ if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
if (auto len = buffer_.length; len == 4 || len == 7) throw comp_error(loc_, "unterminated preprocessor directive ('#')");
{
auto data = buffer_.data;
auto color = true;
for (auto i = 1; i < len; i++)
{
if ((data[i] < 48 || data[i] > 57) && (data[i] < 65 || data[i] > 70) && (data[i] < 97 || data[i] > 102))
{
color = false;
break;
}
}
if (color) return h2::parser::make_COLOR(std::string(++data, --len), loc_); state_ = state::preprocessor;
} goto lex_name;
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key == keyword::KW_INVALID)
throw h2::parser::syntax_error(loc_, utils::string::va("unknown preprocessor directive ('%s')", "#"));
if (keyword_is_token(key))
return keyword_token(key);
// call preprocessor(key);
}
continue;
case '*': case '*':
if (reader_.state == reader::end || (curr != '/' && curr != '=')) if (curr != '/' && curr != '=')
return h2::parser::make_MUL(loc_); return h2::parser::make_MUL(loc_);
reader_.advance(); reader_.advance();
if (curr == '/') if (last == '=')
throw h2::parser::syntax_error(loc_, "unmatched multiline comment end ('*/')");
return h2::parser::make_ASSIGN_MUL(loc_); return h2::parser::make_ASSIGN_MUL(loc_);
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
case '"': case '"':
case '\'': state_ = state::string;
return read_string(last, false); goto lex_string;
case '.': case '.':
return read_dotsize(); reader_.advance();
if(state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
case '(': case '(':
return h2::parser::make_LPAREN(loc_); return h2::parser::make_LPAREN(loc_);
case ')': case ')':
@ -376,6 +342,7 @@ auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type
case ':': case ':':
if (curr != ':') if (curr != ':')
return h2::parser::make_COLON(loc_); return h2::parser::make_COLON(loc_);
reader_.advance(); reader_.advance();
return h2::parser::make_DOUBLECOLON(loc_); return h2::parser::make_DOUBLECOLON(loc_);
case '?': case '?':
@ -383,43 +350,70 @@ auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type
case '=': case '=':
if (curr != '=') if (curr != '=')
return h2::parser::make_ASSIGN(loc_); return h2::parser::make_ASSIGN(loc_);
reader_.advance(); reader_.advance();
return h2::parser::make_EQUALITY(loc_); return h2::parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return h2::parser::make_ADD(loc_); return h2::parser::make_ADD(loc_);
reader_.advance(); reader_.advance();
return (last == '+') ? h2::parser::make_INCREMENT(loc_) : h2::parser::make_ASSIGN_ADD(loc_);
if (last == '+')
return h2::parser::make_INCREMENT(loc_);
return h2::parser::make_ASSIGN_ADD(loc_);
case '-': case '-':
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return h2::parser::make_SUB(loc_); return h2::parser::make_SUB(loc_);
reader_.advance(); reader_.advance();
return (last == '-') ? h2::parser::make_DECREMENT(loc_) : h2::parser::make_ASSIGN_SUB(loc_);
if (last == '-')
return h2::parser::make_DECREMENT(loc_);
return h2::parser::make_ASSIGN_SUB(loc_);
case '%': case '%':
if (curr != '=') if (curr != '=')
return h2::parser::make_MOD(loc_); return h2::parser::make_MOD(loc_);
reader_.advance(); reader_.advance();
return h2::parser::make_ASSIGN_MOD(loc_); return h2::parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return h2::parser::make_BITWISE_OR(loc_); return h2::parser::make_BITWISE_OR(loc_);
reader_.advance(); reader_.advance();
return (last == '|') ? h2::parser::make_OR(loc_) : h2::parser::make_ASSIGN_BW_OR(loc_);
if (last == '|')
return h2::parser::make_OR(loc_);
return h2::parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"' && curr != '\'')
return h2::parser::make_BITWISE_AND(loc_); return h2::parser::make_BITWISE_AND(loc_);
reader_.advance(); reader_.advance();
if (last == '"' || last == '\'')
return read_string(last, true); if (last == '&')
return (last == '&') ? h2::parser::make_AND(loc_) : h2::parser::make_ASSIGN_BW_AND(loc_); return h2::parser::make_AND(loc_);
if (last == '=')
return h2::parser::make_ASSIGN_BW_AND(loc_);
state_ = state::localize;
goto lex_string;
case '^': case '^':
if (curr != '=') if (curr != '=')
return h2::parser::make_BITWISE_EXOR(loc_); return h2::parser::make_BITWISE_EXOR(loc_);
reader_.advance(); reader_.advance();
return h2::parser::make_ASSIGN_BW_EXOR(loc_); return h2::parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return h2::parser::make_NOT(loc_); return h2::parser::make_NOT(loc_);
reader_.advance(); reader_.advance();
return h2::parser::make_INEQUALITY(loc_); return h2::parser::make_INEQUALITY(loc_);
case '~': case '~':
@ -427,88 +421,180 @@ auto lexer::lex() -> xsk::gsc::h2::parser::symbol_type
case '<': case '<':
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return h2::parser::make_LESS(loc_); return h2::parser::make_LESS(loc_);
reader_.advance(); reader_.advance();
if (last == '<') if (last == '=')
{
reader_.advance();
return (last == '=') ? h2::parser::make_ASSIGN_LSHIFT(loc_) : h2::parser::make_LSHIFT(loc_);
}
return h2::parser::make_LESS_EQUAL(loc_); return h2::parser::make_LESS_EQUAL(loc_);
if (curr != '=')
return h2::parser::make_LSHIFT(loc_);
reader_.advance();
return h2::parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return h2::parser::make_GREATER(loc_); return h2::parser::make_GREATER(loc_);
reader_.advance(); reader_.advance();
if (last == '>')
{ if (last == '=')
reader_.advance();
return (last == '=') ? h2::parser::make_ASSIGN_RSHIFT(loc_) : h2::parser::make_RSHIFT(loc_);
}
return h2::parser::make_GREATER_EQUAL(loc_); return h2::parser::make_GREATER_EQUAL(loc_);
if (curr != '=')
return h2::parser::make_RSHIFT(loc_);
reader_.advance();
return h2::parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number:
if (last >= '0' && last <= '9') if (last >= '0' && last <= '9')
return lexer::read_number(last); goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z') else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
return lexer::read_word(last); goto lex_name;
throw h2::parser::syntax_error(loc_, utils::string::va("bad token: \'%c\'", last)); throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
} }
}
}
auto lexer::read_string(char quote, bool localize) -> xsk::gsc::h2::parser::symbol_type lex_string:
{ if (state == reader::end)
if (localize) throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
reader_.advance(); reader_.advance();
if (last == '\n') while (true)
throw h2::parser::syntax_error(loc_, "unterminated string");
if (last == '\\') // process scapes
{ {
// TODO: if (last == '"')
}
if (last != '\\' && curr == quote)
break; break;
if (!buffer_.push(curr)) if (last == '\n')
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "unterminated string literal");
}
if (reader_.state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\')
{ {
throw h2::parser::syntax_error(loc_, utils::string::va("unmatched string start ('%s')", (quote == '"') ? "\"" : "\\'")); char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
} }
if (localize) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
else if (!buffer_.push(last))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if (state_ == state::localize)
return h2::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_); return h2::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
return h2::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_); return h2::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
}
auto lexer::read_number(char first) -> xsk::gsc::h2::parser::symbol_type lex_name:
{ buffer_.push(last);
if (first == '.')
{
buffer_.push(first);
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (last == '\\')
throw comp_error(loc_, "invalid path '\\\\'");
path = true;
if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
}
else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if(state_ == state::field)
{
if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return h2::parser::make_SIZE(loc_);
}
return h2::parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
// TODO: call preprocessor(key);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start;
continue;
}
else
{
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw comp_error(loc_, "invalid path end '\\'");
//return h2::parser::make_PATH(xsk::gsc::h2::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return h2::parser::make_PATH(std::string(buffer_.data, buffer_.length), loc_);
}
//return h2::parser::make_IDENTIFIER(xsk::gsc::h2::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return h2::parser::make_IDENTIFIER(std::string(buffer_.data, buffer_.length), loc_);
}
lex_number:
if (state_ == state::field)
buffer_.push('.');
if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto flt = 0;
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.')) if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw h2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if ((curr == '.' || curr == 'f') && last == '\'') if ((curr == '.' || curr == 'f') && last == '\'')
throw h2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if (curr == '\'') if (curr == '\'')
{ {
@ -516,55 +602,41 @@ auto lexer::read_number(char first) -> xsk::gsc::h2::parser::symbol_type
continue; continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58))) if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'')
throw h2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
auto data = buffer_.data; if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
auto len = buffer_.length; throw comp_error(loc_, "invalid number literal");
auto dot = true;
for (auto i = 1; i < len; i++) if (state_ == state::field || dot || flt)
{ return h2::parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
if (data[i] == '.')
{ return h2::parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
if (dot)
throw h2::parser::syntax_error(loc_, "invalid number '.'");
} }
if (data[i] == 'f' && i != len - 1) else if (curr == 'o')
throw h2::parser::syntax_error(loc_, "invalid number 'f'");
}
return h2::parser::make_FLOAT(std::string(data, len), loc_);
}
else
{ {
auto curr = reader_.current_byte;
if (first == '0' && curr == 'o')
{
buffer_.push(first);
buffer_.push('o');
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'o')) if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw h2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (curr == 'o' && last == '\'')
throw h2::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -581,30 +653,24 @@ auto lexer::read_number(char first) -> xsk::gsc::h2::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length <= 0)
throw h2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (buffer_.length < 3) return h2::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
throw error("gsc lexer: invalid octal literal!");
return h2::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data + 2), loc_);
} }
else if (first == '0' && curr == 'b') else if (curr == 'b')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('b'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'b')) if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw h2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (curr == 'b' && last == '\'')
throw h2::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -616,35 +682,29 @@ auto lexer::read_number(char first) -> xsk::gsc::h2::parser::symbol_type
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw h2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid binary literal!");
return h2::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_); return h2::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
} }
else if (first == '0' && curr == 'x') else if (curr == 'x')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('x'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'x')) if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw h2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (curr == 'x' && last == '\'')
throw h2::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -661,161 +721,13 @@ auto lexer::read_number(char first) -> xsk::gsc::h2::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw h2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid hexadecimal literal!");
return h2::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_); return h2::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
} }
else // cant get here!
{
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw h2::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw h2::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw h2::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = false;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw h2::parser::syntax_error(loc_, "invalid number '.'");
dot = true;
}
if (data[i] == 'f' && i != len - 1)
throw h2::parser::syntax_error(loc_, "invalid number 'f'");
}
if (dot || data[len - 1] == 'f')
return h2::parser::make_FLOAT(std::string(data, len), loc_);
return h2::parser::make_INTEGER(std::string(data, len), loc_);
}
}
}
auto lexer::read_word(char first) -> xsk::gsc::h2::parser::symbol_type
{
auto path = false;
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto curr = reader_.current_byte;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (reader_.last_byte == '\\')
throw h2::parser::syntax_error(loc_, "invalid path '\\\\'");
path = true;
curr = '/';
}
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw h2::parser::syntax_error(loc_, "invalid path end '\\'");
return h2::parser::make_PATH(xsk::gsc::h2::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
return h2::parser::make_IDENTIFIER(xsk::gsc::h2::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
auto lexer::read_dotsize() -> xsk::gsc::h2::parser::symbol_type
{
auto curr = reader_.current_byte;
if (curr > 47 && curr < 58)
{
return lexer::read_number('.');
}
else if (curr == '_' || curr > 64 && curr < 91 || curr > 96 && curr < 123)
{
reader save;
save.state = reader_.state;
save.bytes_remaining = reader_.bytes_remaining;
save.buffer_pos = reader_.buffer_pos;
save.last_byte = reader_.last_byte;
save.current_byte = reader_.current_byte;
while (reader_.state == reader::ok)
{
curr = reader_.current_byte;
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return h2::parser::make_SIZE(loc_);
}
reader_.state = save.state;
reader_.bytes_remaining = save.bytes_remaining;
reader_.buffer_pos = save.buffer_pos;
reader_.last_byte = save.last_byte;
reader_.current_byte = save.current_byte;
return h2::parser::make_DOT(loc_);
}
else return h2::parser::make_DOT(loc_);
} }
auto lexer::keyword_token(keyword k) -> xsk::gsc::h2::parser::symbol_type auto lexer::keyword_token(keyword k) -> xsk::gsc::h2::parser::symbol_type

View File

@ -10,11 +10,12 @@ namespace xsk::gsc::h2
enum class keyword; enum class keyword;
constexpr size_t max_buf_size = 0x2000;
struct buffer struct buffer
{ {
int size;
int length;
char* data; char* data;
int length;
buffer(); buffer();
~buffer(); ~buffer();
@ -23,13 +24,13 @@ struct buffer
struct reader struct reader
{ {
enum states { end, ok }; enum state_type : std::uint8_t { end, ok };
states state;
int bytes_remaining;
const char* buffer_pos; const char* buffer_pos;
std::uint32_t bytes_remaining;
char last_byte; char last_byte;
char current_byte; char current_byte;
state_type state;
reader(); reader();
@ -45,15 +46,17 @@ struct reader
class lexer class lexer
{ {
private: enum class state : std::uint8_t { start, string, localize, field, preprocessor };
reader reader_; reader reader_;
buffer buffer_; buffer buffer_;
location loc_; location loc_;
build mode_; build mode_;
bool in_dev_state_;
std::stack<location> locs_; std::stack<location> locs_;
std::stack<reader> readers_; std::stack<reader> readers_;
std::uint32_t header_top_; std::uint32_t header_top_;
state state_;
bool indev_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -63,10 +66,6 @@ public:
void restrict_header(const xsk::gsc::location& loc); void restrict_header(const xsk::gsc::location& loc);
private: private:
auto read_string(char quote, bool localize) -> xsk::gsc::h2::parser::symbol_type;
auto read_number(char first) -> xsk::gsc::h2::parser::symbol_type;
auto read_word(char first) -> xsk::gsc::h2::parser::symbol_type;
auto read_dotsize() -> xsk::gsc::h2::parser::symbol_type;
auto keyword_token(keyword k) -> xsk::gsc::h2::parser::symbol_type; auto keyword_token(keyword k) -> xsk::gsc::h2::parser::symbol_type;
static auto keyword_is_token(keyword k) -> bool; static auto keyword_is_token(keyword k) -> bool;
static auto get_keyword(std::string_view str) -> keyword; static auto get_keyword(std::string_view str) -> keyword;

File diff suppressed because it is too large Load Diff

View File

@ -617,6 +617,7 @@ namespace xsk { namespace gsc { namespace h2 {
// stmt_while // stmt_while
char dummy66[sizeof (ast::stmt_while::ptr)]; char dummy66[sizeof (ast::stmt_while::ptr)];
// "field"
// "path" // "path"
// "identifier" // "identifier"
// "string literal" // "string literal"
@ -762,22 +763,23 @@ namespace xsk { namespace gsc { namespace h2 {
MUL = 86, // "*" MUL = 86, // "*"
DIV = 87, // "/" DIV = 87, // "/"
MOD = 88, // "%" MOD = 88, // "%"
PATH = 89, // "path" FIELD = 89, // "field"
IDENTIFIER = 90, // "identifier" PATH = 90, // "path"
STRING = 91, // "string literal" IDENTIFIER = 91, // "identifier"
ISTRING = 92, // "localized string" STRING = 92, // "string literal"
COLOR = 93, // "color" ISTRING = 93, // "localized string"
FLOAT = 94, // "float" COLOR = 94, // "color"
INTEGER = 95, // "integer" FLOAT = 95, // "float"
ADD_ARRAY = 96, // ADD_ARRAY INTEGER = 96, // "integer"
THEN = 97, // THEN ADD_ARRAY = 97, // ADD_ARRAY
TERN = 98, // TERN THEN = 98, // THEN
NEG = 99, // NEG TERN = 99, // TERN
ANIMREF = 100, // ANIMREF NEG = 100, // NEG
PREINC = 101, // PREINC ANIMREF = 101, // ANIMREF
PREDEC = 102, // PREDEC PREINC = 102, // PREINC
POSTINC = 103, // POSTINC PREDEC = 103, // PREDEC
POSTDEC = 104 // POSTDEC POSTINC = 104, // POSTINC
POSTDEC = 105 // POSTDEC
}; };
/// Backward compatibility alias (Bison 3.6). /// Backward compatibility alias (Bison 3.6).
typedef token_kind_type yytokentype; typedef token_kind_type yytokentype;
@ -794,7 +796,7 @@ namespace xsk { namespace gsc { namespace h2 {
{ {
enum symbol_kind_type enum symbol_kind_type
{ {
YYNTOKENS = 105, ///< Number of tokens. YYNTOKENS = 106, ///< Number of tokens.
S_YYEMPTY = -2, S_YYEMPTY = -2,
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
@ -885,103 +887,104 @@ namespace xsk { namespace gsc { namespace h2 {
S_MUL = 86, // "*" S_MUL = 86, // "*"
S_DIV = 87, // "/" S_DIV = 87, // "/"
S_MOD = 88, // "%" S_MOD = 88, // "%"
S_PATH = 89, // "path" S_FIELD = 89, // "field"
S_IDENTIFIER = 90, // "identifier" S_PATH = 90, // "path"
S_STRING = 91, // "string literal" S_IDENTIFIER = 91, // "identifier"
S_ISTRING = 92, // "localized string" S_STRING = 92, // "string literal"
S_COLOR = 93, // "color" S_ISTRING = 93, // "localized string"
S_FLOAT = 94, // "float" S_COLOR = 94, // "color"
S_INTEGER = 95, // "integer" S_FLOAT = 95, // "float"
S_ADD_ARRAY = 96, // ADD_ARRAY S_INTEGER = 96, // "integer"
S_THEN = 97, // THEN S_ADD_ARRAY = 97, // ADD_ARRAY
S_TERN = 98, // TERN S_THEN = 98, // THEN
S_NEG = 99, // NEG S_TERN = 99, // TERN
S_ANIMREF = 100, // ANIMREF S_NEG = 100, // NEG
S_PREINC = 101, // PREINC S_ANIMREF = 101, // ANIMREF
S_PREDEC = 102, // PREDEC S_PREINC = 102, // PREINC
S_POSTINC = 103, // POSTINC S_PREDEC = 103, // PREDEC
S_POSTDEC = 104, // POSTDEC S_POSTINC = 104, // POSTINC
S_YYACCEPT = 105, // $accept S_POSTDEC = 105, // POSTDEC
S_root = 106, // root S_YYACCEPT = 106, // $accept
S_program = 107, // program S_root = 107, // root
S_inline = 108, // inline S_program = 108, // program
S_include = 109, // include S_inline = 109, // inline
S_declaration = 110, // declaration S_include = 110, // include
S_decl_usingtree = 111, // decl_usingtree S_declaration = 111, // declaration
S_decl_constant = 112, // decl_constant S_decl_usingtree = 112, // decl_usingtree
S_decl_thread = 113, // decl_thread S_decl_constant = 113, // decl_constant
S_stmt = 114, // stmt S_decl_thread = 114, // decl_thread
S_stmt_dev = 115, // stmt_dev S_stmt = 115, // stmt
S_stmt_block = 116, // stmt_block S_stmt_dev = 116, // stmt_dev
S_stmt_list = 117, // stmt_list S_stmt_block = 117, // stmt_block
S_stmt_expr = 118, // stmt_expr S_stmt_list = 118, // stmt_list
S_stmt_call = 119, // stmt_call S_stmt_expr = 119, // stmt_expr
S_stmt_assign = 120, // stmt_assign S_stmt_call = 120, // stmt_call
S_stmt_endon = 121, // stmt_endon S_stmt_assign = 121, // stmt_assign
S_stmt_notify = 122, // stmt_notify S_stmt_endon = 122, // stmt_endon
S_stmt_wait = 123, // stmt_wait S_stmt_notify = 123, // stmt_notify
S_stmt_waittill = 124, // stmt_waittill S_stmt_wait = 124, // stmt_wait
S_stmt_waittillmatch = 125, // stmt_waittillmatch S_stmt_waittill = 125, // stmt_waittill
S_stmt_waittillframeend = 126, // stmt_waittillframeend S_stmt_waittillmatch = 126, // stmt_waittillmatch
S_stmt_waitframe = 127, // stmt_waitframe S_stmt_waittillframeend = 127, // stmt_waittillframeend
S_stmt_if = 128, // stmt_if S_stmt_waitframe = 128, // stmt_waitframe
S_stmt_ifelse = 129, // stmt_ifelse S_stmt_if = 129, // stmt_if
S_stmt_while = 130, // stmt_while S_stmt_ifelse = 130, // stmt_ifelse
S_stmt_dowhile = 131, // stmt_dowhile S_stmt_while = 131, // stmt_while
S_stmt_for = 132, // stmt_for S_stmt_dowhile = 132, // stmt_dowhile
S_stmt_foreach = 133, // stmt_foreach S_stmt_for = 133, // stmt_for
S_stmt_switch = 134, // stmt_switch S_stmt_foreach = 134, // stmt_foreach
S_stmt_case = 135, // stmt_case S_stmt_switch = 135, // stmt_switch
S_stmt_default = 136, // stmt_default S_stmt_case = 136, // stmt_case
S_stmt_break = 137, // stmt_break S_stmt_default = 137, // stmt_default
S_stmt_continue = 138, // stmt_continue S_stmt_break = 138, // stmt_break
S_stmt_return = 139, // stmt_return S_stmt_continue = 139, // stmt_continue
S_stmt_breakpoint = 140, // stmt_breakpoint S_stmt_return = 140, // stmt_return
S_stmt_prof_begin = 141, // stmt_prof_begin S_stmt_breakpoint = 141, // stmt_breakpoint
S_stmt_prof_end = 142, // stmt_prof_end S_stmt_prof_begin = 142, // stmt_prof_begin
S_expr = 143, // expr S_stmt_prof_end = 143, // stmt_prof_end
S_expr_or_empty = 144, // expr_or_empty S_expr = 144, // expr
S_expr_assign = 145, // expr_assign S_expr_or_empty = 145, // expr_or_empty
S_expr_increment = 146, // expr_increment S_expr_assign = 146, // expr_assign
S_expr_decrement = 147, // expr_decrement S_expr_increment = 147, // expr_increment
S_expr_ternary = 148, // expr_ternary S_expr_decrement = 148, // expr_decrement
S_expr_binary = 149, // expr_binary S_expr_ternary = 149, // expr_ternary
S_expr_primitive = 150, // expr_primitive S_expr_binary = 150, // expr_binary
S_expr_complement = 151, // expr_complement S_expr_primitive = 151, // expr_primitive
S_expr_not = 152, // expr_not S_expr_complement = 152, // expr_complement
S_expr_call = 153, // expr_call S_expr_not = 153, // expr_not
S_expr_method = 154, // expr_method S_expr_call = 154, // expr_call
S_expr_function = 155, // expr_function S_expr_method = 155, // expr_method
S_expr_pointer = 156, // expr_pointer S_expr_function = 156, // expr_function
S_expr_add_array = 157, // expr_add_array S_expr_pointer = 157, // expr_pointer
S_expr_parameters = 158, // expr_parameters S_expr_add_array = 158, // expr_add_array
S_expr_arguments = 159, // expr_arguments S_expr_parameters = 159, // expr_parameters
S_expr_arguments_no_empty = 160, // expr_arguments_no_empty S_expr_arguments = 160, // expr_arguments
S_expr_reference = 161, // expr_reference S_expr_arguments_no_empty = 161, // expr_arguments_no_empty
S_expr_array = 162, // expr_array S_expr_reference = 162, // expr_reference
S_expr_field = 163, // expr_field S_expr_array = 163, // expr_array
S_expr_size = 164, // expr_size S_expr_field = 164, // expr_field
S_expr_paren = 165, // expr_paren S_expr_size = 165, // expr_size
S_expr_object = 166, // expr_object S_expr_paren = 166, // expr_paren
S_expr_thisthread = 167, // expr_thisthread S_expr_object = 167, // expr_object
S_expr_empty_array = 168, // expr_empty_array S_expr_thisthread = 168, // expr_thisthread
S_expr_undefined = 169, // expr_undefined S_expr_empty_array = 169, // expr_empty_array
S_expr_game = 170, // expr_game S_expr_undefined = 170, // expr_undefined
S_expr_self = 171, // expr_self S_expr_game = 171, // expr_game
S_expr_anim = 172, // expr_anim S_expr_self = 172, // expr_self
S_expr_level = 173, // expr_level S_expr_anim = 173, // expr_anim
S_expr_animation = 174, // expr_animation S_expr_level = 174, // expr_level
S_expr_animtree = 175, // expr_animtree S_expr_animation = 175, // expr_animation
S_expr_identifier = 176, // expr_identifier S_expr_animtree = 176, // expr_animtree
S_expr_path = 177, // expr_path S_expr_identifier = 177, // expr_identifier
S_expr_istring = 178, // expr_istring S_expr_path = 178, // expr_path
S_expr_string = 179, // expr_string S_expr_istring = 179, // expr_istring
S_expr_color = 180, // expr_color S_expr_string = 180, // expr_string
S_expr_vector = 181, // expr_vector S_expr_color = 181, // expr_color
S_expr_float = 182, // expr_float S_expr_vector = 182, // expr_vector
S_expr_integer = 183, // expr_integer S_expr_float = 183, // expr_float
S_expr_false = 184, // expr_false S_expr_integer = 184, // expr_integer
S_expr_true = 185 // expr_true S_expr_false = 185, // expr_false
S_expr_true = 186 // expr_true
}; };
}; };
@ -1294,6 +1297,7 @@ namespace xsk { namespace gsc { namespace h2 {
value.move< ast::stmt_while::ptr > (std::move (that.value)); value.move< ast::stmt_while::ptr > (std::move (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2563,6 +2567,7 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > (); value.template destroy< ast::stmt_while::ptr > ();
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2679,7 +2684,7 @@ switch (yykind)
: super_type(token_type (tok), v, l) : super_type(token_type (tok), v, l)
#endif #endif
{ {
H2_ASSERT ((token::PATH <= tok && tok <= token::INTEGER)); H2_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
} }
}; };
@ -4064,6 +4069,21 @@ switch (yykind)
return symbol_type (token::MOD, l); return symbol_type (token::MOD, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
@ -4648,7 +4668,7 @@ switch (yykind)
/// Constants. /// Constants.
enum enum
{ {
yylast_ = 2191, ///< Last index in yytable_. yylast_ = 2291, ///< Last index in yytable_.
yynnts_ = 81, ///< Number of nonterminal symbols. yynnts_ = 81, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number. yyfinal_ = 21 ///< Termination state number.
}; };
@ -4952,6 +4972,7 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value)); value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5267,6 +5288,7 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value)); value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5340,7 +5362,7 @@ switch (yykind)
#line 13 "parser.ypp" #line 13 "parser.ypp"
} } } // xsk::gsc::h2 } } } // xsk::gsc::h2
#line 5344 "parser.hpp" #line 5366 "parser.hpp"

View File

@ -67,9 +67,9 @@ enum class keyword
KW_INVALID, KW_INVALID,
}; };
buffer::buffer() : size(1024), length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(size)); data = static_cast<char*>(std::malloc(max_buf_size));
} }
buffer::~buffer() buffer::~buffer()
@ -79,18 +79,9 @@ buffer::~buffer()
bool buffer::push(char c) bool buffer::push(char c)
{ {
if(length >= size) if(length >= max_buf_size)
{ return false;
auto nsize = size * 2;
auto ndata = reinterpret_cast<char*>(std::malloc(nsize));
if(!ndata) return false;
std::memmove(ndata, data, size);
std::free(data);
size = nsize;
data = ndata;
}
data[length++] = c; data[length++] = c;
return true; return true;
} }
@ -138,7 +129,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : in_dev_state_(false), loc_(xsk::gsc::location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(xsk::gsc::location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -184,24 +175,28 @@ void lexer::restrict_header(const xsk::gsc::location& loc)
auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type
{ {
buffer_.length = 0; buffer_.length = 0;
state_ = state::start;
loc_.step(); loc_.step();
while (true) while (true)
{ {
if (reader_.state == reader::end) const auto& state = reader_.state;
{ auto& last = reader_.last_byte;
if (in_dev_state_) auto& curr = reader_.current_byte;
throw iw5::parser::syntax_error(loc_, "unmatched devblock start ('/#')"); auto path = false;
if(header_top_ > 0) if (state == reader::end)
{
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (header_top_ > 0)
pop_header(); pop_header();
else else
return iw5::parser::make_IW5EOF(loc_); return iw5::parser::make_IW5EOF(loc_);
} }
reader_.advance(); reader_.advance();
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
switch (last) switch (last)
{ {
@ -215,7 +210,7 @@ auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type
loc_.step(); loc_.step();
continue; continue;
case '/': case '/':
if(reader_.state == reader::end || (curr != '/' && curr != '*' && curr != '#' && curr != '=')) if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return iw5::parser::make_DIV(loc_); return iw5::parser::make_DIV(loc_);
reader_.advance(); reader_.advance();
@ -225,29 +220,27 @@ auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type
if (last == '#') if (last == '#')
{ {
if (in_dev_state_) if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
if (mode_ == xsk::gsc::build::dev)
{ {
throw iw5::parser::syntax_error(loc_, "cannot recurse devblock ('/#')"); indev_ = true;
}
else if (mode_ == xsk::gsc::build::dev)
{
in_dev_state_ = true;
return iw5::parser::make_DEVBEGIN(loc_); return iw5::parser::make_DEVBEGIN(loc_);
} }
else else
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched devblock start ('/#')");
throw iw5::parser::syntax_error(loc_, "unmatched devblock start ('/#')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '#' && curr == '/') else if (last == '#' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -261,16 +254,15 @@ auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched multiline comment start ('/*')");
throw iw5::parser::syntax_error(loc_, "unmatched multiline comment start ('/*')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '*' && curr == '/') else if (last == '*' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -283,7 +275,7 @@ auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end || curr == '\n') if (state == reader::end || curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -293,69 +285,43 @@ auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type
case '#': case '#':
if (curr == '/') if (curr == '/')
{ {
if (!in_dev_state_) if (!indev_)
throw iw5::parser::syntax_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
in_dev_state_ = false; indev_ = false;
reader_.advance(); reader_.advance();
return iw5::parser::make_DEVEND(loc_); return iw5::parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
while (reader_.state == reader::ok)
{
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
}
{ if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
if (auto len = buffer_.length; len == 4 || len == 7) throw comp_error(loc_, "unterminated preprocessor directive ('#')");
{
auto data = buffer_.data;
auto color = true;
for (auto i = 1; i < len; i++)
{
if ((data[i] < 48 || data[i] > 57) && (data[i] < 65 || data[i] > 70) && (data[i] < 97 || data[i] > 102))
{
color = false;
break;
}
}
if (color) return iw5::parser::make_COLOR(std::string(++data, --len), loc_); state_ = state::preprocessor;
} goto lex_name;
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key == keyword::KW_INVALID)
throw iw5::parser::syntax_error(loc_, utils::string::va("unknown preprocessor directive ('%s')", "#"));
if (keyword_is_token(key))
return keyword_token(key);
// call preprocessor(key);
}
continue;
case '*': case '*':
if (reader_.state == reader::end || (curr != '/' && curr != '=')) if (curr != '/' && curr != '=')
return iw5::parser::make_MUL(loc_); return iw5::parser::make_MUL(loc_);
reader_.advance(); reader_.advance();
if (curr == '/') if (last == '=')
throw iw5::parser::syntax_error(loc_, "unmatched multiline comment end ('*/')");
return iw5::parser::make_ASSIGN_MUL(loc_); return iw5::parser::make_ASSIGN_MUL(loc_);
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
case '"': case '"':
case '\'': state_ = state::string;
return read_string(last, false); goto lex_string;
case '.': case '.':
return read_dotsize(); reader_.advance();
if(state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
case '(': case '(':
return iw5::parser::make_LPAREN(loc_); return iw5::parser::make_LPAREN(loc_);
case ')': case ')':
@ -375,6 +341,7 @@ auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type
case ':': case ':':
if (curr != ':') if (curr != ':')
return iw5::parser::make_COLON(loc_); return iw5::parser::make_COLON(loc_);
reader_.advance(); reader_.advance();
return iw5::parser::make_DOUBLECOLON(loc_); return iw5::parser::make_DOUBLECOLON(loc_);
case '?': case '?':
@ -382,43 +349,70 @@ auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type
case '=': case '=':
if (curr != '=') if (curr != '=')
return iw5::parser::make_ASSIGN(loc_); return iw5::parser::make_ASSIGN(loc_);
reader_.advance(); reader_.advance();
return iw5::parser::make_EQUALITY(loc_); return iw5::parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return iw5::parser::make_ADD(loc_); return iw5::parser::make_ADD(loc_);
reader_.advance(); reader_.advance();
return (last == '+') ? iw5::parser::make_INCREMENT(loc_) : iw5::parser::make_ASSIGN_ADD(loc_);
if (last == '+')
return iw5::parser::make_INCREMENT(loc_);
return iw5::parser::make_ASSIGN_ADD(loc_);
case '-': case '-':
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return iw5::parser::make_SUB(loc_); return iw5::parser::make_SUB(loc_);
reader_.advance(); reader_.advance();
return (last == '-') ? iw5::parser::make_DECREMENT(loc_) : iw5::parser::make_ASSIGN_SUB(loc_);
if (last == '-')
return iw5::parser::make_DECREMENT(loc_);
return iw5::parser::make_ASSIGN_SUB(loc_);
case '%': case '%':
if (curr != '=') if (curr != '=')
return iw5::parser::make_MOD(loc_); return iw5::parser::make_MOD(loc_);
reader_.advance(); reader_.advance();
return iw5::parser::make_ASSIGN_MOD(loc_); return iw5::parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return iw5::parser::make_BITWISE_OR(loc_); return iw5::parser::make_BITWISE_OR(loc_);
reader_.advance(); reader_.advance();
return (last == '|') ? iw5::parser::make_OR(loc_) : iw5::parser::make_ASSIGN_BW_OR(loc_);
if (last == '|')
return iw5::parser::make_OR(loc_);
return iw5::parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"' && curr != '\'')
return iw5::parser::make_BITWISE_AND(loc_); return iw5::parser::make_BITWISE_AND(loc_);
reader_.advance(); reader_.advance();
if (last == '"' || last == '\'')
return read_string(last, true); if (last == '&')
return (last == '&') ? iw5::parser::make_AND(loc_) : iw5::parser::make_ASSIGN_BW_AND(loc_); return iw5::parser::make_AND(loc_);
if (last == '=')
return iw5::parser::make_ASSIGN_BW_AND(loc_);
state_ = state::localize;
goto lex_string;
case '^': case '^':
if (curr != '=') if (curr != '=')
return iw5::parser::make_BITWISE_EXOR(loc_); return iw5::parser::make_BITWISE_EXOR(loc_);
reader_.advance(); reader_.advance();
return iw5::parser::make_ASSIGN_BW_EXOR(loc_); return iw5::parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return iw5::parser::make_NOT(loc_); return iw5::parser::make_NOT(loc_);
reader_.advance(); reader_.advance();
return iw5::parser::make_INEQUALITY(loc_); return iw5::parser::make_INEQUALITY(loc_);
case '~': case '~':
@ -426,88 +420,180 @@ auto lexer::lex() -> xsk::gsc::iw5::parser::symbol_type
case '<': case '<':
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return iw5::parser::make_LESS(loc_); return iw5::parser::make_LESS(loc_);
reader_.advance(); reader_.advance();
if (last == '<') if (last == '=')
{
reader_.advance();
return (last == '=') ? iw5::parser::make_ASSIGN_LSHIFT(loc_) : iw5::parser::make_LSHIFT(loc_);
}
return iw5::parser::make_LESS_EQUAL(loc_); return iw5::parser::make_LESS_EQUAL(loc_);
if (curr != '=')
return iw5::parser::make_LSHIFT(loc_);
reader_.advance();
return iw5::parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return iw5::parser::make_GREATER(loc_); return iw5::parser::make_GREATER(loc_);
reader_.advance(); reader_.advance();
if (last == '>')
{ if (last == '=')
reader_.advance();
return (last == '=') ? iw5::parser::make_ASSIGN_RSHIFT(loc_) : iw5::parser::make_RSHIFT(loc_);
}
return iw5::parser::make_GREATER_EQUAL(loc_); return iw5::parser::make_GREATER_EQUAL(loc_);
if (curr != '=')
return iw5::parser::make_RSHIFT(loc_);
reader_.advance();
return iw5::parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number:
if (last >= '0' && last <= '9') if (last >= '0' && last <= '9')
return lexer::read_number(last); goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z') else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
return lexer::read_word(last); goto lex_name;
throw iw5::parser::syntax_error(loc_, utils::string::va("bad token: \'%c\'", last)); throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
} }
}
}
auto lexer::read_string(char quote, bool localize) -> xsk::gsc::iw5::parser::symbol_type lex_string:
{ if (state == reader::end)
if (localize) throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
reader_.advance(); reader_.advance();
if (last == '\n') while (true)
throw iw5::parser::syntax_error(loc_, "unterminated string");
if (last == '\\') // process scapes
{ {
// TODO: if (last == '"')
}
if (last != '\\' && curr == quote)
break; break;
if (!buffer_.push(curr)) if (last == '\n')
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "unterminated string literal");
}
if (reader_.state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\')
{ {
throw iw5::parser::syntax_error(loc_, utils::string::va("unmatched string start ('%s')", (quote == '"') ? "\"" : "\\'")); char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
} }
if (localize) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
else if (!buffer_.push(last))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if (state_ == state::localize)
return iw5::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_); return iw5::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
return iw5::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_); return iw5::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
}
auto lexer::read_number(char first) -> xsk::gsc::iw5::parser::symbol_type lex_name:
{ buffer_.push(last);
if (first == '.')
{
buffer_.push(first);
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (last == '\\')
throw comp_error(loc_, "invalid path '\\\\'");
path = true;
if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
}
else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if(state_ == state::field)
{
if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return iw5::parser::make_SIZE(loc_);
}
return iw5::parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
// TODO: call preprocessor(key);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start;
continue;
}
else
{
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw comp_error(loc_, "invalid path end '\\'");
//return iw5::parser::make_PATH(xsk::gsc::iw5::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return iw5::parser::make_PATH(std::string(buffer_.data, buffer_.length), loc_);
}
//return iw5::parser::make_IDENTIFIER(xsk::gsc::iw5::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return iw5::parser::make_IDENTIFIER(std::string(buffer_.data, buffer_.length), loc_);
}
lex_number:
if (state_ == state::field)
buffer_.push('.');
if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto flt = 0;
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.')) if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw iw5::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if ((curr == '.' || curr == 'f') && last == '\'') if ((curr == '.' || curr == 'f') && last == '\'')
throw iw5::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if (curr == '\'') if (curr == '\'')
{ {
@ -515,55 +601,41 @@ auto lexer::read_number(char first) -> xsk::gsc::iw5::parser::symbol_type
continue; continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58))) if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'')
throw iw5::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
auto data = buffer_.data; if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
auto len = buffer_.length; throw comp_error(loc_, "invalid number literal");
auto dot = true;
for (auto i = 1; i < len; i++) if (state_ == state::field || dot || flt)
{ return iw5::parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
if (data[i] == '.')
{ return iw5::parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
if (dot)
throw iw5::parser::syntax_error(loc_, "invalid number '.'");
} }
if (data[i] == 'f' && i != len - 1) else if (curr == 'o')
throw iw5::parser::syntax_error(loc_, "invalid number 'f'");
}
return iw5::parser::make_FLOAT(std::string(data, len), loc_);
}
else
{ {
auto curr = reader_.current_byte;
if (first == '0' && curr == 'o')
{
buffer_.push(first);
buffer_.push('o');
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'o')) if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw iw5::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (curr == 'o' && last == '\'')
throw iw5::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -580,30 +652,24 @@ auto lexer::read_number(char first) -> xsk::gsc::iw5::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length <= 0)
throw iw5::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (buffer_.length < 3) return iw5::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
throw error("gsc lexer: invalid octal literal!");
return iw5::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data + 2), loc_);
} }
else if (first == '0' && curr == 'b') else if (curr == 'b')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('b'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'b')) if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw iw5::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (curr == 'b' && last == '\'')
throw iw5::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -615,35 +681,29 @@ auto lexer::read_number(char first) -> xsk::gsc::iw5::parser::symbol_type
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw iw5::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid binary literal!");
return iw5::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_); return iw5::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
} }
else if (first == '0' && curr == 'x') else if (curr == 'x')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('x'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'x')) if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw iw5::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (curr == 'x' && last == '\'')
throw iw5::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -660,161 +720,13 @@ auto lexer::read_number(char first) -> xsk::gsc::iw5::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw iw5::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid hexadecimal literal!");
return iw5::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_); return iw5::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
} }
else // cant get here!
{
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw iw5::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw iw5::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw iw5::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = false;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw iw5::parser::syntax_error(loc_, "invalid number '.'");
dot = true;
}
if (data[i] == 'f' && i != len - 1)
throw iw5::parser::syntax_error(loc_, "invalid number 'f'");
}
if (dot || data[len - 1] == 'f')
return iw5::parser::make_FLOAT(std::string(data, len), loc_);
return iw5::parser::make_INTEGER(std::string(data, len), loc_);
}
}
}
auto lexer::read_word(char first) -> xsk::gsc::iw5::parser::symbol_type
{
auto path = false;
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto curr = reader_.current_byte;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (reader_.last_byte == '\\')
throw iw5::parser::syntax_error(loc_, "invalid path '\\\\'");
path = true;
curr = '/';
}
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw iw5::parser::syntax_error(loc_, "invalid path end '\\'");
return iw5::parser::make_PATH(xsk::gsc::iw5::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
return iw5::parser::make_IDENTIFIER(xsk::gsc::iw5::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
auto lexer::read_dotsize() -> xsk::gsc::iw5::parser::symbol_type
{
auto curr = reader_.current_byte;
if (curr > 47 && curr < 58)
{
return lexer::read_number('.');
}
else if (curr == '_' || curr > 64 && curr < 91 || curr > 96 && curr < 123)
{
reader save;
save.state = reader_.state;
save.bytes_remaining = reader_.bytes_remaining;
save.buffer_pos = reader_.buffer_pos;
save.last_byte = reader_.last_byte;
save.current_byte = reader_.current_byte;
while (reader_.state == reader::ok)
{
curr = reader_.current_byte;
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return iw5::parser::make_SIZE(loc_);
}
reader_.state = save.state;
reader_.bytes_remaining = save.bytes_remaining;
reader_.buffer_pos = save.buffer_pos;
reader_.last_byte = save.last_byte;
reader_.current_byte = save.current_byte;
return iw5::parser::make_DOT(loc_);
}
else return iw5::parser::make_DOT(loc_);
} }
auto lexer::keyword_token(keyword k) -> xsk::gsc::iw5::parser::symbol_type auto lexer::keyword_token(keyword k) -> xsk::gsc::iw5::parser::symbol_type

View File

@ -12,11 +12,12 @@ namespace xsk::gsc::iw5
enum class keyword; enum class keyword;
constexpr size_t max_buf_size = 0x2000;
struct buffer struct buffer
{ {
int size;
int length;
char* data; char* data;
int length;
buffer(); buffer();
~buffer(); ~buffer();
@ -25,13 +26,13 @@ struct buffer
struct reader struct reader
{ {
enum states { end, ok }; enum state_type : std::uint8_t { end, ok };
states state;
int bytes_remaining;
const char* buffer_pos; const char* buffer_pos;
std::uint32_t bytes_remaining;
char last_byte; char last_byte;
char current_byte; char current_byte;
state_type state;
reader(); reader();
@ -47,15 +48,17 @@ struct reader
class lexer class lexer
{ {
private: enum class state : std::uint8_t { start, string, localize, field, preprocessor };
reader reader_; reader reader_;
buffer buffer_; buffer buffer_;
location loc_; location loc_;
build mode_; build mode_;
bool in_dev_state_;
std::stack<location> locs_; std::stack<location> locs_;
std::stack<reader> readers_; std::stack<reader> readers_;
std::uint32_t header_top_; std::uint32_t header_top_;
state state_;
bool indev_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -65,10 +68,6 @@ public:
void restrict_header(const xsk::gsc::location& loc); void restrict_header(const xsk::gsc::location& loc);
private: private:
auto read_string(char quote, bool localize) -> xsk::gsc::iw5::parser::symbol_type;
auto read_number(char first) -> xsk::gsc::iw5::parser::symbol_type;
auto read_word(char first) -> xsk::gsc::iw5::parser::symbol_type;
auto read_dotsize() -> xsk::gsc::iw5::parser::symbol_type;
auto keyword_token(keyword k) -> xsk::gsc::iw5::parser::symbol_type; auto keyword_token(keyword k) -> xsk::gsc::iw5::parser::symbol_type;
static auto keyword_is_token(keyword k) -> bool; static auto keyword_is_token(keyword k) -> bool;
static auto get_keyword(std::string_view str) -> keyword; static auto get_keyword(std::string_view str) -> keyword;

File diff suppressed because it is too large Load Diff

View File

@ -614,6 +614,7 @@ namespace xsk { namespace gsc { namespace iw5 {
// stmt_while // stmt_while
char dummy65[sizeof (ast::stmt_while::ptr)]; char dummy65[sizeof (ast::stmt_while::ptr)];
// "field"
// "path" // "path"
// "identifier" // "identifier"
// "string literal" // "string literal"
@ -758,22 +759,23 @@ namespace xsk { namespace gsc { namespace iw5 {
MUL = 85, // "*" MUL = 85, // "*"
DIV = 86, // "/" DIV = 86, // "/"
MOD = 87, // "%" MOD = 87, // "%"
PATH = 88, // "path" FIELD = 88, // "field"
IDENTIFIER = 89, // "identifier" PATH = 89, // "path"
STRING = 90, // "string literal" IDENTIFIER = 90, // "identifier"
ISTRING = 91, // "localized string" STRING = 91, // "string literal"
COLOR = 92, // "color" ISTRING = 92, // "localized string"
FLOAT = 93, // "float" COLOR = 93, // "color"
INTEGER = 94, // "integer" FLOAT = 94, // "float"
ADD_ARRAY = 95, // ADD_ARRAY INTEGER = 95, // "integer"
THEN = 96, // THEN ADD_ARRAY = 96, // ADD_ARRAY
TERN = 97, // TERN THEN = 97, // THEN
NEG = 98, // NEG TERN = 98, // TERN
ANIMREF = 99, // ANIMREF NEG = 99, // NEG
PREINC = 100, // PREINC ANIMREF = 100, // ANIMREF
PREDEC = 101, // PREDEC PREINC = 101, // PREINC
POSTINC = 102, // POSTINC PREDEC = 102, // PREDEC
POSTDEC = 103 // POSTDEC POSTINC = 103, // POSTINC
POSTDEC = 104 // POSTDEC
}; };
/// Backward compatibility alias (Bison 3.6). /// Backward compatibility alias (Bison 3.6).
typedef token_kind_type yytokentype; typedef token_kind_type yytokentype;
@ -790,7 +792,7 @@ namespace xsk { namespace gsc { namespace iw5 {
{ {
enum symbol_kind_type enum symbol_kind_type
{ {
YYNTOKENS = 104, ///< Number of tokens. YYNTOKENS = 105, ///< Number of tokens.
S_YYEMPTY = -2, S_YYEMPTY = -2,
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
@ -880,102 +882,103 @@ namespace xsk { namespace gsc { namespace iw5 {
S_MUL = 85, // "*" S_MUL = 85, // "*"
S_DIV = 86, // "/" S_DIV = 86, // "/"
S_MOD = 87, // "%" S_MOD = 87, // "%"
S_PATH = 88, // "path" S_FIELD = 88, // "field"
S_IDENTIFIER = 89, // "identifier" S_PATH = 89, // "path"
S_STRING = 90, // "string literal" S_IDENTIFIER = 90, // "identifier"
S_ISTRING = 91, // "localized string" S_STRING = 91, // "string literal"
S_COLOR = 92, // "color" S_ISTRING = 92, // "localized string"
S_FLOAT = 93, // "float" S_COLOR = 93, // "color"
S_INTEGER = 94, // "integer" S_FLOAT = 94, // "float"
S_ADD_ARRAY = 95, // ADD_ARRAY S_INTEGER = 95, // "integer"
S_THEN = 96, // THEN S_ADD_ARRAY = 96, // ADD_ARRAY
S_TERN = 97, // TERN S_THEN = 97, // THEN
S_NEG = 98, // NEG S_TERN = 98, // TERN
S_ANIMREF = 99, // ANIMREF S_NEG = 99, // NEG
S_PREINC = 100, // PREINC S_ANIMREF = 100, // ANIMREF
S_PREDEC = 101, // PREDEC S_PREINC = 101, // PREINC
S_POSTINC = 102, // POSTINC S_PREDEC = 102, // PREDEC
S_POSTDEC = 103, // POSTDEC S_POSTINC = 103, // POSTINC
S_YYACCEPT = 104, // $accept S_POSTDEC = 104, // POSTDEC
S_root = 105, // root S_YYACCEPT = 105, // $accept
S_program = 106, // program S_root = 106, // root
S_inline = 107, // inline S_program = 107, // program
S_include = 108, // include S_inline = 108, // inline
S_declaration = 109, // declaration S_include = 109, // include
S_decl_usingtree = 110, // decl_usingtree S_declaration = 110, // declaration
S_decl_constant = 111, // decl_constant S_decl_usingtree = 111, // decl_usingtree
S_decl_thread = 112, // decl_thread S_decl_constant = 112, // decl_constant
S_stmt = 113, // stmt S_decl_thread = 113, // decl_thread
S_stmt_dev = 114, // stmt_dev S_stmt = 114, // stmt
S_stmt_block = 115, // stmt_block S_stmt_dev = 115, // stmt_dev
S_stmt_list = 116, // stmt_list S_stmt_block = 116, // stmt_block
S_stmt_expr = 117, // stmt_expr S_stmt_list = 117, // stmt_list
S_stmt_call = 118, // stmt_call S_stmt_expr = 118, // stmt_expr
S_stmt_assign = 119, // stmt_assign S_stmt_call = 119, // stmt_call
S_stmt_endon = 120, // stmt_endon S_stmt_assign = 120, // stmt_assign
S_stmt_notify = 121, // stmt_notify S_stmt_endon = 121, // stmt_endon
S_stmt_wait = 122, // stmt_wait S_stmt_notify = 122, // stmt_notify
S_stmt_waittill = 123, // stmt_waittill S_stmt_wait = 123, // stmt_wait
S_stmt_waittillmatch = 124, // stmt_waittillmatch S_stmt_waittill = 124, // stmt_waittill
S_stmt_waittillframeend = 125, // stmt_waittillframeend S_stmt_waittillmatch = 125, // stmt_waittillmatch
S_stmt_if = 126, // stmt_if S_stmt_waittillframeend = 126, // stmt_waittillframeend
S_stmt_ifelse = 127, // stmt_ifelse S_stmt_if = 127, // stmt_if
S_stmt_while = 128, // stmt_while S_stmt_ifelse = 128, // stmt_ifelse
S_stmt_dowhile = 129, // stmt_dowhile S_stmt_while = 129, // stmt_while
S_stmt_for = 130, // stmt_for S_stmt_dowhile = 130, // stmt_dowhile
S_stmt_foreach = 131, // stmt_foreach S_stmt_for = 131, // stmt_for
S_stmt_switch = 132, // stmt_switch S_stmt_foreach = 132, // stmt_foreach
S_stmt_case = 133, // stmt_case S_stmt_switch = 133, // stmt_switch
S_stmt_default = 134, // stmt_default S_stmt_case = 134, // stmt_case
S_stmt_break = 135, // stmt_break S_stmt_default = 135, // stmt_default
S_stmt_continue = 136, // stmt_continue S_stmt_break = 136, // stmt_break
S_stmt_return = 137, // stmt_return S_stmt_continue = 137, // stmt_continue
S_stmt_breakpoint = 138, // stmt_breakpoint S_stmt_return = 138, // stmt_return
S_stmt_prof_begin = 139, // stmt_prof_begin S_stmt_breakpoint = 139, // stmt_breakpoint
S_stmt_prof_end = 140, // stmt_prof_end S_stmt_prof_begin = 140, // stmt_prof_begin
S_expr = 141, // expr S_stmt_prof_end = 141, // stmt_prof_end
S_expr_or_empty = 142, // expr_or_empty S_expr = 142, // expr
S_expr_assign = 143, // expr_assign S_expr_or_empty = 143, // expr_or_empty
S_expr_increment = 144, // expr_increment S_expr_assign = 144, // expr_assign
S_expr_decrement = 145, // expr_decrement S_expr_increment = 145, // expr_increment
S_expr_ternary = 146, // expr_ternary S_expr_decrement = 146, // expr_decrement
S_expr_binary = 147, // expr_binary S_expr_ternary = 147, // expr_ternary
S_expr_primitive = 148, // expr_primitive S_expr_binary = 148, // expr_binary
S_expr_complement = 149, // expr_complement S_expr_primitive = 149, // expr_primitive
S_expr_not = 150, // expr_not S_expr_complement = 150, // expr_complement
S_expr_call = 151, // expr_call S_expr_not = 151, // expr_not
S_expr_method = 152, // expr_method S_expr_call = 152, // expr_call
S_expr_function = 153, // expr_function S_expr_method = 153, // expr_method
S_expr_pointer = 154, // expr_pointer S_expr_function = 154, // expr_function
S_expr_add_array = 155, // expr_add_array S_expr_pointer = 155, // expr_pointer
S_expr_parameters = 156, // expr_parameters S_expr_add_array = 156, // expr_add_array
S_expr_arguments = 157, // expr_arguments S_expr_parameters = 157, // expr_parameters
S_expr_arguments_no_empty = 158, // expr_arguments_no_empty S_expr_arguments = 158, // expr_arguments
S_expr_reference = 159, // expr_reference S_expr_arguments_no_empty = 159, // expr_arguments_no_empty
S_expr_array = 160, // expr_array S_expr_reference = 160, // expr_reference
S_expr_field = 161, // expr_field S_expr_array = 161, // expr_array
S_expr_size = 162, // expr_size S_expr_field = 162, // expr_field
S_expr_paren = 163, // expr_paren S_expr_size = 163, // expr_size
S_expr_object = 164, // expr_object S_expr_paren = 164, // expr_paren
S_expr_thisthread = 165, // expr_thisthread S_expr_object = 165, // expr_object
S_expr_empty_array = 166, // expr_empty_array S_expr_thisthread = 166, // expr_thisthread
S_expr_undefined = 167, // expr_undefined S_expr_empty_array = 167, // expr_empty_array
S_expr_game = 168, // expr_game S_expr_undefined = 168, // expr_undefined
S_expr_self = 169, // expr_self S_expr_game = 169, // expr_game
S_expr_anim = 170, // expr_anim S_expr_self = 170, // expr_self
S_expr_level = 171, // expr_level S_expr_anim = 171, // expr_anim
S_expr_animation = 172, // expr_animation S_expr_level = 172, // expr_level
S_expr_animtree = 173, // expr_animtree S_expr_animation = 173, // expr_animation
S_expr_identifier = 174, // expr_identifier S_expr_animtree = 174, // expr_animtree
S_expr_path = 175, // expr_path S_expr_identifier = 175, // expr_identifier
S_expr_istring = 176, // expr_istring S_expr_path = 176, // expr_path
S_expr_string = 177, // expr_string S_expr_istring = 177, // expr_istring
S_expr_color = 178, // expr_color S_expr_string = 178, // expr_string
S_expr_vector = 179, // expr_vector S_expr_color = 179, // expr_color
S_expr_float = 180, // expr_float S_expr_vector = 180, // expr_vector
S_expr_integer = 181, // expr_integer S_expr_float = 181, // expr_float
S_expr_false = 182, // expr_false S_expr_integer = 182, // expr_integer
S_expr_true = 183 // expr_true S_expr_false = 183, // expr_false
S_expr_true = 184 // expr_true
}; };
}; };
@ -1284,6 +1287,7 @@ namespace xsk { namespace gsc { namespace iw5 {
value.move< ast::stmt_while::ptr > (std::move (that.value)); value.move< ast::stmt_while::ptr > (std::move (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2535,6 +2539,7 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > (); value.template destroy< ast::stmt_while::ptr > ();
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2651,7 +2656,7 @@ switch (yykind)
: super_type(token_type (tok), v, l) : super_type(token_type (tok), v, l)
#endif #endif
{ {
IW5_ASSERT ((token::PATH <= tok && tok <= token::INTEGER)); IW5_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
} }
}; };
@ -4021,6 +4026,21 @@ switch (yykind)
return symbol_type (token::MOD, l); return symbol_type (token::MOD, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
@ -4605,7 +4625,7 @@ switch (yykind)
/// Constants. /// Constants.
enum enum
{ {
yylast_ = 2293, ///< Last index in yytable_. yylast_ = 2249, ///< Last index in yytable_.
yynnts_ = 80, ///< Number of nonterminal symbols. yynnts_ = 80, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number. yyfinal_ = 21 ///< Termination state number.
}; };
@ -4905,6 +4925,7 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value)); value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5216,6 +5237,7 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value)); value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5289,7 +5311,7 @@ switch (yykind)
#line 13 "parser.ypp" #line 13 "parser.ypp"
} } } // xsk::gsc::iw5 } } } // xsk::gsc::iw5
#line 5293 "parser.hpp" #line 5315 "parser.hpp"

View File

@ -67,9 +67,9 @@ enum class keyword
KW_INVALID, KW_INVALID,
}; };
buffer::buffer() : size(1024), length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(size)); data = static_cast<char*>(std::malloc(max_buf_size));
} }
buffer::~buffer() buffer::~buffer()
@ -79,18 +79,9 @@ buffer::~buffer()
bool buffer::push(char c) bool buffer::push(char c)
{ {
if(length >= size) if(length >= max_buf_size)
{ return false;
auto nsize = size * 2;
auto ndata = reinterpret_cast<char*>(std::malloc(nsize));
if(!ndata) return false;
std::memmove(ndata, data, size);
std::free(data);
size = nsize;
data = ndata;
}
data[length++] = c; data[length++] = c;
return true; return true;
} }
@ -138,7 +129,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : in_dev_state_(false), loc_(xsk::gsc::location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(xsk::gsc::location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -184,24 +175,28 @@ void lexer::restrict_header(const xsk::gsc::location& loc)
auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type
{ {
buffer_.length = 0; buffer_.length = 0;
state_ = state::start;
loc_.step(); loc_.step();
while (true) while (true)
{ {
if (reader_.state == reader::end) const auto& state = reader_.state;
{ auto& last = reader_.last_byte;
if (in_dev_state_) auto& curr = reader_.current_byte;
throw iw6::parser::syntax_error(loc_, "unmatched devblock start ('/#')"); auto path = false;
if(header_top_ > 0) if (state == reader::end)
{
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (header_top_ > 0)
pop_header(); pop_header();
else else
return iw6::parser::make_IW6EOF(loc_); return iw6::parser::make_IW6EOF(loc_);
} }
reader_.advance(); reader_.advance();
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
switch (last) switch (last)
{ {
@ -215,7 +210,7 @@ auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type
loc_.step(); loc_.step();
continue; continue;
case '/': case '/':
if(reader_.state == reader::end || (curr != '/' && curr != '*' && curr != '#' && curr != '=')) if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return iw6::parser::make_DIV(loc_); return iw6::parser::make_DIV(loc_);
reader_.advance(); reader_.advance();
@ -225,29 +220,27 @@ auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type
if (last == '#') if (last == '#')
{ {
if (in_dev_state_) if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
if (mode_ == xsk::gsc::build::dev)
{ {
throw iw6::parser::syntax_error(loc_, "cannot recurse devblock ('/#')"); indev_ = true;
}
else if (mode_ == xsk::gsc::build::dev)
{
in_dev_state_ = true;
return iw6::parser::make_DEVBEGIN(loc_); return iw6::parser::make_DEVBEGIN(loc_);
} }
else else
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched devblock start ('/#')");
throw iw6::parser::syntax_error(loc_, "unmatched devblock start ('/#')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '#' && curr == '/') else if (last == '#' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -261,16 +254,15 @@ auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched multiline comment start ('/*')");
throw iw6::parser::syntax_error(loc_, "unmatched multiline comment start ('/*')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '*' && curr == '/') else if (last == '*' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -283,7 +275,7 @@ auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end || curr == '\n') if (state == reader::end || curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -293,69 +285,43 @@ auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type
case '#': case '#':
if (curr == '/') if (curr == '/')
{ {
if (!in_dev_state_) if (!indev_)
throw iw6::parser::syntax_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
in_dev_state_ = false; indev_ = false;
reader_.advance(); reader_.advance();
return iw6::parser::make_DEVEND(loc_); return iw6::parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
while (reader_.state == reader::ok)
{
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
}
{ if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
if (auto len = buffer_.length; len == 4 || len == 7) throw comp_error(loc_, "unterminated preprocessor directive ('#')");
{
auto data = buffer_.data;
auto color = true;
for (auto i = 1; i < len; i++)
{
if ((data[i] < 48 || data[i] > 57) && (data[i] < 65 || data[i] > 70) && (data[i] < 97 || data[i] > 102))
{
color = false;
break;
}
}
if (color) return iw6::parser::make_COLOR(std::string(++data, --len), loc_); state_ = state::preprocessor;
} goto lex_name;
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key == keyword::KW_INVALID)
throw iw6::parser::syntax_error(loc_, utils::string::va("unknown preprocessor directive ('%s')", "#"));
if (keyword_is_token(key))
return keyword_token(key);
// call preprocessor(key);
}
continue;
case '*': case '*':
if (reader_.state == reader::end || (curr != '/' && curr != '=')) if (curr != '/' && curr != '=')
return iw6::parser::make_MUL(loc_); return iw6::parser::make_MUL(loc_);
reader_.advance(); reader_.advance();
if (curr == '/') if (last == '=')
throw iw6::parser::syntax_error(loc_, "unmatched multiline comment end ('*/')");
return iw6::parser::make_ASSIGN_MUL(loc_); return iw6::parser::make_ASSIGN_MUL(loc_);
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
case '"': case '"':
case '\'': state_ = state::string;
return read_string(last, false); goto lex_string;
case '.': case '.':
return read_dotsize(); reader_.advance();
if(state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
case '(': case '(':
return iw6::parser::make_LPAREN(loc_); return iw6::parser::make_LPAREN(loc_);
case ')': case ')':
@ -375,6 +341,7 @@ auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type
case ':': case ':':
if (curr != ':') if (curr != ':')
return iw6::parser::make_COLON(loc_); return iw6::parser::make_COLON(loc_);
reader_.advance(); reader_.advance();
return iw6::parser::make_DOUBLECOLON(loc_); return iw6::parser::make_DOUBLECOLON(loc_);
case '?': case '?':
@ -382,43 +349,70 @@ auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type
case '=': case '=':
if (curr != '=') if (curr != '=')
return iw6::parser::make_ASSIGN(loc_); return iw6::parser::make_ASSIGN(loc_);
reader_.advance(); reader_.advance();
return iw6::parser::make_EQUALITY(loc_); return iw6::parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return iw6::parser::make_ADD(loc_); return iw6::parser::make_ADD(loc_);
reader_.advance(); reader_.advance();
return (last == '+') ? iw6::parser::make_INCREMENT(loc_) : iw6::parser::make_ASSIGN_ADD(loc_);
if (last == '+')
return iw6::parser::make_INCREMENT(loc_);
return iw6::parser::make_ASSIGN_ADD(loc_);
case '-': case '-':
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return iw6::parser::make_SUB(loc_); return iw6::parser::make_SUB(loc_);
reader_.advance(); reader_.advance();
return (last == '-') ? iw6::parser::make_DECREMENT(loc_) : iw6::parser::make_ASSIGN_SUB(loc_);
if (last == '-')
return iw6::parser::make_DECREMENT(loc_);
return iw6::parser::make_ASSIGN_SUB(loc_);
case '%': case '%':
if (curr != '=') if (curr != '=')
return iw6::parser::make_MOD(loc_); return iw6::parser::make_MOD(loc_);
reader_.advance(); reader_.advance();
return iw6::parser::make_ASSIGN_MOD(loc_); return iw6::parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return iw6::parser::make_BITWISE_OR(loc_); return iw6::parser::make_BITWISE_OR(loc_);
reader_.advance(); reader_.advance();
return (last == '|') ? iw6::parser::make_OR(loc_) : iw6::parser::make_ASSIGN_BW_OR(loc_);
if (last == '|')
return iw6::parser::make_OR(loc_);
return iw6::parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"' && curr != '\'')
return iw6::parser::make_BITWISE_AND(loc_); return iw6::parser::make_BITWISE_AND(loc_);
reader_.advance(); reader_.advance();
if (last == '"' || last == '\'')
return read_string(last, true); if (last == '&')
return (last == '&') ? iw6::parser::make_AND(loc_) : iw6::parser::make_ASSIGN_BW_AND(loc_); return iw6::parser::make_AND(loc_);
if (last == '=')
return iw6::parser::make_ASSIGN_BW_AND(loc_);
state_ = state::localize;
goto lex_string;
case '^': case '^':
if (curr != '=') if (curr != '=')
return iw6::parser::make_BITWISE_EXOR(loc_); return iw6::parser::make_BITWISE_EXOR(loc_);
reader_.advance(); reader_.advance();
return iw6::parser::make_ASSIGN_BW_EXOR(loc_); return iw6::parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return iw6::parser::make_NOT(loc_); return iw6::parser::make_NOT(loc_);
reader_.advance(); reader_.advance();
return iw6::parser::make_INEQUALITY(loc_); return iw6::parser::make_INEQUALITY(loc_);
case '~': case '~':
@ -426,88 +420,180 @@ auto lexer::lex() -> xsk::gsc::iw6::parser::symbol_type
case '<': case '<':
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return iw6::parser::make_LESS(loc_); return iw6::parser::make_LESS(loc_);
reader_.advance(); reader_.advance();
if (last == '<') if (last == '=')
{
reader_.advance();
return (last == '=') ? iw6::parser::make_ASSIGN_LSHIFT(loc_) : iw6::parser::make_LSHIFT(loc_);
}
return iw6::parser::make_LESS_EQUAL(loc_); return iw6::parser::make_LESS_EQUAL(loc_);
if (curr != '=')
return iw6::parser::make_LSHIFT(loc_);
reader_.advance();
return iw6::parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return iw6::parser::make_GREATER(loc_); return iw6::parser::make_GREATER(loc_);
reader_.advance(); reader_.advance();
if (last == '>')
{ if (last == '=')
reader_.advance();
return (last == '=') ? iw6::parser::make_ASSIGN_RSHIFT(loc_) : iw6::parser::make_RSHIFT(loc_);
}
return iw6::parser::make_GREATER_EQUAL(loc_); return iw6::parser::make_GREATER_EQUAL(loc_);
if (curr != '=')
return iw6::parser::make_RSHIFT(loc_);
reader_.advance();
return iw6::parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number:
if (last >= '0' && last <= '9') if (last >= '0' && last <= '9')
return lexer::read_number(last); goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z') else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
return lexer::read_word(last); goto lex_name;
throw iw6::parser::syntax_error(loc_, utils::string::va("bad token: \'%c\'", last)); throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
} }
}
}
auto lexer::read_string(char quote, bool localize) -> xsk::gsc::iw6::parser::symbol_type lex_string:
{ if (state == reader::end)
if (localize) throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
reader_.advance(); reader_.advance();
if (last == '\n') while (true)
throw iw6::parser::syntax_error(loc_, "unterminated string");
if (last == '\\') // process scapes
{ {
// TODO: if (last == '"')
}
if (last != '\\' && curr == quote)
break; break;
if (!buffer_.push(curr)) if (last == '\n')
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "unterminated string literal");
}
if (reader_.state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\')
{ {
throw iw6::parser::syntax_error(loc_, utils::string::va("unmatched string start ('%s')", (quote == '"') ? "\"" : "\\'")); char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
} }
if (localize) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
else if (!buffer_.push(last))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if (state_ == state::localize)
return iw6::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_); return iw6::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
return iw6::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_); return iw6::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
}
auto lexer::read_number(char first) -> xsk::gsc::iw6::parser::symbol_type lex_name:
{ buffer_.push(last);
if (first == '.')
{
buffer_.push(first);
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (last == '\\')
throw comp_error(loc_, "invalid path '\\\\'");
path = true;
if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
}
else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if(state_ == state::field)
{
if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return iw6::parser::make_SIZE(loc_);
}
return iw6::parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
// TODO: call preprocessor(key);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start;
continue;
}
else
{
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw comp_error(loc_, "invalid path end '\\'");
//return iw6::parser::make_PATH(xsk::gsc::iw6::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return iw6::parser::make_PATH(std::string(buffer_.data, buffer_.length), loc_);
}
//return iw6::parser::make_IDENTIFIER(xsk::gsc::iw6::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return iw6::parser::make_IDENTIFIER(std::string(buffer_.data, buffer_.length), loc_);
}
lex_number:
if (state_ == state::field)
buffer_.push('.');
if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto flt = 0;
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.')) if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw iw6::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if ((curr == '.' || curr == 'f') && last == '\'') if ((curr == '.' || curr == 'f') && last == '\'')
throw iw6::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if (curr == '\'') if (curr == '\'')
{ {
@ -515,55 +601,41 @@ auto lexer::read_number(char first) -> xsk::gsc::iw6::parser::symbol_type
continue; continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58))) if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'')
throw iw6::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
auto data = buffer_.data; if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
auto len = buffer_.length; throw comp_error(loc_, "invalid number literal");
auto dot = true;
for (auto i = 1; i < len; i++) if (state_ == state::field || dot || flt)
{ return iw6::parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
if (data[i] == '.')
{ return iw6::parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
if (dot)
throw iw6::parser::syntax_error(loc_, "invalid number '.'");
} }
if (data[i] == 'f' && i != len - 1) else if (curr == 'o')
throw iw6::parser::syntax_error(loc_, "invalid number 'f'");
}
return iw6::parser::make_FLOAT(std::string(data, len), loc_);
}
else
{ {
auto curr = reader_.current_byte;
if (first == '0' && curr == 'o')
{
buffer_.push(first);
buffer_.push('o');
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'o')) if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw iw6::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (curr == 'o' && last == '\'')
throw iw6::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -580,30 +652,24 @@ auto lexer::read_number(char first) -> xsk::gsc::iw6::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length <= 0)
throw iw6::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (buffer_.length < 3) return iw6::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
throw error("gsc lexer: invalid octal literal!");
return iw6::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data + 2), loc_);
} }
else if (first == '0' && curr == 'b') else if (curr == 'b')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('b'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'b')) if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw iw6::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (curr == 'b' && last == '\'')
throw iw6::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -615,35 +681,29 @@ auto lexer::read_number(char first) -> xsk::gsc::iw6::parser::symbol_type
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw iw6::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid binary literal!");
return iw6::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_); return iw6::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
} }
else if (first == '0' && curr == 'x') else if (curr == 'x')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('x'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'x')) if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw iw6::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (curr == 'x' && last == '\'')
throw iw6::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -660,161 +720,13 @@ auto lexer::read_number(char first) -> xsk::gsc::iw6::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw iw6::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid hexadecimal literal!");
return iw6::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_); return iw6::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
} }
else // cant get here!
{
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw iw6::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw iw6::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw iw6::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = false;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw iw6::parser::syntax_error(loc_, "invalid number '.'");
dot = true;
}
if (data[i] == 'f' && i != len - 1)
throw iw6::parser::syntax_error(loc_, "invalid number 'f'");
}
if (dot || data[len - 1] == 'f')
return iw6::parser::make_FLOAT(std::string(data, len), loc_);
return iw6::parser::make_INTEGER(std::string(data, len), loc_);
}
}
}
auto lexer::read_word(char first) -> xsk::gsc::iw6::parser::symbol_type
{
auto path = false;
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto curr = reader_.current_byte;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (reader_.last_byte == '\\')
throw iw6::parser::syntax_error(loc_, "invalid path '\\\\'");
path = true;
curr = '/';
}
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw iw6::parser::syntax_error(loc_, "invalid path end '\\'");
return iw6::parser::make_PATH(xsk::gsc::iw6::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
return iw6::parser::make_IDENTIFIER(xsk::gsc::iw6::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
auto lexer::read_dotsize() -> xsk::gsc::iw6::parser::symbol_type
{
auto curr = reader_.current_byte;
if (curr > 47 && curr < 58)
{
return lexer::read_number('.');
}
else if (curr == '_' || curr > 64 && curr < 91 || curr > 96 && curr < 123)
{
reader save;
save.state = reader_.state;
save.bytes_remaining = reader_.bytes_remaining;
save.buffer_pos = reader_.buffer_pos;
save.last_byte = reader_.last_byte;
save.current_byte = reader_.current_byte;
while (reader_.state == reader::ok)
{
curr = reader_.current_byte;
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return iw6::parser::make_SIZE(loc_);
}
reader_.state = save.state;
reader_.bytes_remaining = save.bytes_remaining;
reader_.buffer_pos = save.buffer_pos;
reader_.last_byte = save.last_byte;
reader_.current_byte = save.current_byte;
return iw6::parser::make_DOT(loc_);
}
else return iw6::parser::make_DOT(loc_);
} }
auto lexer::keyword_token(keyword k) -> xsk::gsc::iw6::parser::symbol_type auto lexer::keyword_token(keyword k) -> xsk::gsc::iw6::parser::symbol_type

View File

@ -12,11 +12,12 @@ namespace xsk::gsc::iw6
enum class keyword; enum class keyword;
constexpr size_t max_buf_size = 0x2000;
struct buffer struct buffer
{ {
int size;
int length;
char* data; char* data;
int length;
buffer(); buffer();
~buffer(); ~buffer();
@ -25,13 +26,13 @@ struct buffer
struct reader struct reader
{ {
enum states { end, ok }; enum state_type : std::uint8_t { end, ok };
states state;
int bytes_remaining;
const char* buffer_pos; const char* buffer_pos;
std::uint32_t bytes_remaining;
char last_byte; char last_byte;
char current_byte; char current_byte;
state_type state;
reader(); reader();
@ -47,15 +48,17 @@ struct reader
class lexer class lexer
{ {
private: enum class state : std::uint8_t { start, string, localize, field, preprocessor };
reader reader_; reader reader_;
buffer buffer_; buffer buffer_;
location loc_; location loc_;
build mode_; build mode_;
bool in_dev_state_;
std::stack<location> locs_; std::stack<location> locs_;
std::stack<reader> readers_; std::stack<reader> readers_;
std::uint32_t header_top_; std::uint32_t header_top_;
state state_;
bool indev_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -65,10 +68,6 @@ public:
void restrict_header(const xsk::gsc::location& loc); void restrict_header(const xsk::gsc::location& loc);
private: private:
auto read_string(char quote, bool localize) -> xsk::gsc::iw6::parser::symbol_type;
auto read_number(char first) -> xsk::gsc::iw6::parser::symbol_type;
auto read_word(char first) -> xsk::gsc::iw6::parser::symbol_type;
auto read_dotsize() -> xsk::gsc::iw6::parser::symbol_type;
auto keyword_token(keyword k) -> xsk::gsc::iw6::parser::symbol_type; auto keyword_token(keyword k) -> xsk::gsc::iw6::parser::symbol_type;
static auto keyword_is_token(keyword k) -> bool; static auto keyword_is_token(keyword k) -> bool;
static auto get_keyword(std::string_view str) -> keyword; static auto get_keyword(std::string_view str) -> keyword;

File diff suppressed because it is too large Load Diff

View File

@ -614,6 +614,7 @@ namespace xsk { namespace gsc { namespace iw6 {
// stmt_while // stmt_while
char dummy65[sizeof (ast::stmt_while::ptr)]; char dummy65[sizeof (ast::stmt_while::ptr)];
// "field"
// "path" // "path"
// "identifier" // "identifier"
// "string literal" // "string literal"
@ -758,22 +759,23 @@ namespace xsk { namespace gsc { namespace iw6 {
MUL = 85, // "*" MUL = 85, // "*"
DIV = 86, // "/" DIV = 86, // "/"
MOD = 87, // "%" MOD = 87, // "%"
PATH = 88, // "path" FIELD = 88, // "field"
IDENTIFIER = 89, // "identifier" PATH = 89, // "path"
STRING = 90, // "string literal" IDENTIFIER = 90, // "identifier"
ISTRING = 91, // "localized string" STRING = 91, // "string literal"
COLOR = 92, // "color" ISTRING = 92, // "localized string"
FLOAT = 93, // "float" COLOR = 93, // "color"
INTEGER = 94, // "integer" FLOAT = 94, // "float"
ADD_ARRAY = 95, // ADD_ARRAY INTEGER = 95, // "integer"
THEN = 96, // THEN ADD_ARRAY = 96, // ADD_ARRAY
TERN = 97, // TERN THEN = 97, // THEN
NEG = 98, // NEG TERN = 98, // TERN
ANIMREF = 99, // ANIMREF NEG = 99, // NEG
PREINC = 100, // PREINC ANIMREF = 100, // ANIMREF
PREDEC = 101, // PREDEC PREINC = 101, // PREINC
POSTINC = 102, // POSTINC PREDEC = 102, // PREDEC
POSTDEC = 103 // POSTDEC POSTINC = 103, // POSTINC
POSTDEC = 104 // POSTDEC
}; };
/// Backward compatibility alias (Bison 3.6). /// Backward compatibility alias (Bison 3.6).
typedef token_kind_type yytokentype; typedef token_kind_type yytokentype;
@ -790,7 +792,7 @@ namespace xsk { namespace gsc { namespace iw6 {
{ {
enum symbol_kind_type enum symbol_kind_type
{ {
YYNTOKENS = 104, ///< Number of tokens. YYNTOKENS = 105, ///< Number of tokens.
S_YYEMPTY = -2, S_YYEMPTY = -2,
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
@ -880,102 +882,103 @@ namespace xsk { namespace gsc { namespace iw6 {
S_MUL = 85, // "*" S_MUL = 85, // "*"
S_DIV = 86, // "/" S_DIV = 86, // "/"
S_MOD = 87, // "%" S_MOD = 87, // "%"
S_PATH = 88, // "path" S_FIELD = 88, // "field"
S_IDENTIFIER = 89, // "identifier" S_PATH = 89, // "path"
S_STRING = 90, // "string literal" S_IDENTIFIER = 90, // "identifier"
S_ISTRING = 91, // "localized string" S_STRING = 91, // "string literal"
S_COLOR = 92, // "color" S_ISTRING = 92, // "localized string"
S_FLOAT = 93, // "float" S_COLOR = 93, // "color"
S_INTEGER = 94, // "integer" S_FLOAT = 94, // "float"
S_ADD_ARRAY = 95, // ADD_ARRAY S_INTEGER = 95, // "integer"
S_THEN = 96, // THEN S_ADD_ARRAY = 96, // ADD_ARRAY
S_TERN = 97, // TERN S_THEN = 97, // THEN
S_NEG = 98, // NEG S_TERN = 98, // TERN
S_ANIMREF = 99, // ANIMREF S_NEG = 99, // NEG
S_PREINC = 100, // PREINC S_ANIMREF = 100, // ANIMREF
S_PREDEC = 101, // PREDEC S_PREINC = 101, // PREINC
S_POSTINC = 102, // POSTINC S_PREDEC = 102, // PREDEC
S_POSTDEC = 103, // POSTDEC S_POSTINC = 103, // POSTINC
S_YYACCEPT = 104, // $accept S_POSTDEC = 104, // POSTDEC
S_root = 105, // root S_YYACCEPT = 105, // $accept
S_program = 106, // program S_root = 106, // root
S_inline = 107, // inline S_program = 107, // program
S_include = 108, // include S_inline = 108, // inline
S_declaration = 109, // declaration S_include = 109, // include
S_decl_usingtree = 110, // decl_usingtree S_declaration = 110, // declaration
S_decl_constant = 111, // decl_constant S_decl_usingtree = 111, // decl_usingtree
S_decl_thread = 112, // decl_thread S_decl_constant = 112, // decl_constant
S_stmt = 113, // stmt S_decl_thread = 113, // decl_thread
S_stmt_dev = 114, // stmt_dev S_stmt = 114, // stmt
S_stmt_block = 115, // stmt_block S_stmt_dev = 115, // stmt_dev
S_stmt_list = 116, // stmt_list S_stmt_block = 116, // stmt_block
S_stmt_expr = 117, // stmt_expr S_stmt_list = 117, // stmt_list
S_stmt_call = 118, // stmt_call S_stmt_expr = 118, // stmt_expr
S_stmt_assign = 119, // stmt_assign S_stmt_call = 119, // stmt_call
S_stmt_endon = 120, // stmt_endon S_stmt_assign = 120, // stmt_assign
S_stmt_notify = 121, // stmt_notify S_stmt_endon = 121, // stmt_endon
S_stmt_wait = 122, // stmt_wait S_stmt_notify = 122, // stmt_notify
S_stmt_waittill = 123, // stmt_waittill S_stmt_wait = 123, // stmt_wait
S_stmt_waittillmatch = 124, // stmt_waittillmatch S_stmt_waittill = 124, // stmt_waittill
S_stmt_waittillframeend = 125, // stmt_waittillframeend S_stmt_waittillmatch = 125, // stmt_waittillmatch
S_stmt_if = 126, // stmt_if S_stmt_waittillframeend = 126, // stmt_waittillframeend
S_stmt_ifelse = 127, // stmt_ifelse S_stmt_if = 127, // stmt_if
S_stmt_while = 128, // stmt_while S_stmt_ifelse = 128, // stmt_ifelse
S_stmt_dowhile = 129, // stmt_dowhile S_stmt_while = 129, // stmt_while
S_stmt_for = 130, // stmt_for S_stmt_dowhile = 130, // stmt_dowhile
S_stmt_foreach = 131, // stmt_foreach S_stmt_for = 131, // stmt_for
S_stmt_switch = 132, // stmt_switch S_stmt_foreach = 132, // stmt_foreach
S_stmt_case = 133, // stmt_case S_stmt_switch = 133, // stmt_switch
S_stmt_default = 134, // stmt_default S_stmt_case = 134, // stmt_case
S_stmt_break = 135, // stmt_break S_stmt_default = 135, // stmt_default
S_stmt_continue = 136, // stmt_continue S_stmt_break = 136, // stmt_break
S_stmt_return = 137, // stmt_return S_stmt_continue = 137, // stmt_continue
S_stmt_breakpoint = 138, // stmt_breakpoint S_stmt_return = 138, // stmt_return
S_stmt_prof_begin = 139, // stmt_prof_begin S_stmt_breakpoint = 139, // stmt_breakpoint
S_stmt_prof_end = 140, // stmt_prof_end S_stmt_prof_begin = 140, // stmt_prof_begin
S_expr = 141, // expr S_stmt_prof_end = 141, // stmt_prof_end
S_expr_or_empty = 142, // expr_or_empty S_expr = 142, // expr
S_expr_assign = 143, // expr_assign S_expr_or_empty = 143, // expr_or_empty
S_expr_increment = 144, // expr_increment S_expr_assign = 144, // expr_assign
S_expr_decrement = 145, // expr_decrement S_expr_increment = 145, // expr_increment
S_expr_ternary = 146, // expr_ternary S_expr_decrement = 146, // expr_decrement
S_expr_binary = 147, // expr_binary S_expr_ternary = 147, // expr_ternary
S_expr_primitive = 148, // expr_primitive S_expr_binary = 148, // expr_binary
S_expr_complement = 149, // expr_complement S_expr_primitive = 149, // expr_primitive
S_expr_not = 150, // expr_not S_expr_complement = 150, // expr_complement
S_expr_call = 151, // expr_call S_expr_not = 151, // expr_not
S_expr_method = 152, // expr_method S_expr_call = 152, // expr_call
S_expr_function = 153, // expr_function S_expr_method = 153, // expr_method
S_expr_pointer = 154, // expr_pointer S_expr_function = 154, // expr_function
S_expr_add_array = 155, // expr_add_array S_expr_pointer = 155, // expr_pointer
S_expr_parameters = 156, // expr_parameters S_expr_add_array = 156, // expr_add_array
S_expr_arguments = 157, // expr_arguments S_expr_parameters = 157, // expr_parameters
S_expr_arguments_no_empty = 158, // expr_arguments_no_empty S_expr_arguments = 158, // expr_arguments
S_expr_reference = 159, // expr_reference S_expr_arguments_no_empty = 159, // expr_arguments_no_empty
S_expr_array = 160, // expr_array S_expr_reference = 160, // expr_reference
S_expr_field = 161, // expr_field S_expr_array = 161, // expr_array
S_expr_size = 162, // expr_size S_expr_field = 162, // expr_field
S_expr_paren = 163, // expr_paren S_expr_size = 163, // expr_size
S_expr_object = 164, // expr_object S_expr_paren = 164, // expr_paren
S_expr_thisthread = 165, // expr_thisthread S_expr_object = 165, // expr_object
S_expr_empty_array = 166, // expr_empty_array S_expr_thisthread = 166, // expr_thisthread
S_expr_undefined = 167, // expr_undefined S_expr_empty_array = 167, // expr_empty_array
S_expr_game = 168, // expr_game S_expr_undefined = 168, // expr_undefined
S_expr_self = 169, // expr_self S_expr_game = 169, // expr_game
S_expr_anim = 170, // expr_anim S_expr_self = 170, // expr_self
S_expr_level = 171, // expr_level S_expr_anim = 171, // expr_anim
S_expr_animation = 172, // expr_animation S_expr_level = 172, // expr_level
S_expr_animtree = 173, // expr_animtree S_expr_animation = 173, // expr_animation
S_expr_identifier = 174, // expr_identifier S_expr_animtree = 174, // expr_animtree
S_expr_path = 175, // expr_path S_expr_identifier = 175, // expr_identifier
S_expr_istring = 176, // expr_istring S_expr_path = 176, // expr_path
S_expr_string = 177, // expr_string S_expr_istring = 177, // expr_istring
S_expr_color = 178, // expr_color S_expr_string = 178, // expr_string
S_expr_vector = 179, // expr_vector S_expr_color = 179, // expr_color
S_expr_float = 180, // expr_float S_expr_vector = 180, // expr_vector
S_expr_integer = 181, // expr_integer S_expr_float = 181, // expr_float
S_expr_false = 182, // expr_false S_expr_integer = 182, // expr_integer
S_expr_true = 183 // expr_true S_expr_false = 183, // expr_false
S_expr_true = 184 // expr_true
}; };
}; };
@ -1284,6 +1287,7 @@ namespace xsk { namespace gsc { namespace iw6 {
value.move< ast::stmt_while::ptr > (std::move (that.value)); value.move< ast::stmt_while::ptr > (std::move (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2535,6 +2539,7 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > (); value.template destroy< ast::stmt_while::ptr > ();
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2651,7 +2656,7 @@ switch (yykind)
: super_type(token_type (tok), v, l) : super_type(token_type (tok), v, l)
#endif #endif
{ {
IW6_ASSERT ((token::PATH <= tok && tok <= token::INTEGER)); IW6_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
} }
}; };
@ -4021,6 +4026,21 @@ switch (yykind)
return symbol_type (token::MOD, l); return symbol_type (token::MOD, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
@ -4605,7 +4625,7 @@ switch (yykind)
/// Constants. /// Constants.
enum enum
{ {
yylast_ = 2293, ///< Last index in yytable_. yylast_ = 2249, ///< Last index in yytable_.
yynnts_ = 80, ///< Number of nonterminal symbols. yynnts_ = 80, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number. yyfinal_ = 21 ///< Termination state number.
}; };
@ -4905,6 +4925,7 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value)); value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5216,6 +5237,7 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value)); value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5289,7 +5311,7 @@ switch (yykind)
#line 13 "parser.ypp" #line 13 "parser.ypp"
} } } // xsk::gsc::iw6 } } } // xsk::gsc::iw6
#line 5293 "parser.hpp" #line 5315 "parser.hpp"

View File

@ -67,9 +67,9 @@ enum class keyword
KW_INVALID, KW_INVALID,
}; };
buffer::buffer() : size(1024), length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(size)); data = static_cast<char*>(std::malloc(max_buf_size));
} }
buffer::~buffer() buffer::~buffer()
@ -79,18 +79,9 @@ buffer::~buffer()
bool buffer::push(char c) bool buffer::push(char c)
{ {
if(length >= size) if(length >= max_buf_size)
{ return false;
auto nsize = size * 2;
auto ndata = reinterpret_cast<char*>(std::malloc(nsize));
if(!ndata) return false;
std::memmove(ndata, data, size);
std::free(data);
size = nsize;
data = ndata;
}
data[length++] = c; data[length++] = c;
return true; return true;
} }
@ -138,7 +129,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : in_dev_state_(false), loc_(xsk::gsc::location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(xsk::gsc::location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -184,24 +175,28 @@ void lexer::restrict_header(const xsk::gsc::location& loc)
auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type
{ {
buffer_.length = 0; buffer_.length = 0;
state_ = state::start;
loc_.step(); loc_.step();
while (true) while (true)
{ {
if (reader_.state == reader::end) const auto& state = reader_.state;
{ auto& last = reader_.last_byte;
if (in_dev_state_) auto& curr = reader_.current_byte;
throw iw7::parser::syntax_error(loc_, "unmatched devblock start ('/#')"); auto path = false;
if(header_top_ > 0) if (state == reader::end)
{
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (header_top_ > 0)
pop_header(); pop_header();
else else
return iw7::parser::make_IW7EOF(loc_); return iw7::parser::make_IW7EOF(loc_);
} }
reader_.advance(); reader_.advance();
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
switch (last) switch (last)
{ {
@ -215,7 +210,7 @@ auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type
loc_.step(); loc_.step();
continue; continue;
case '/': case '/':
if(reader_.state == reader::end || (curr != '/' && curr != '*' && curr != '#' && curr != '=')) if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return iw7::parser::make_DIV(loc_); return iw7::parser::make_DIV(loc_);
reader_.advance(); reader_.advance();
@ -225,29 +220,27 @@ auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type
if (last == '#') if (last == '#')
{ {
if (in_dev_state_) if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
if (mode_ == xsk::gsc::build::dev)
{ {
throw iw7::parser::syntax_error(loc_, "cannot recurse devblock ('/#')"); indev_ = true;
}
else if (mode_ == xsk::gsc::build::dev)
{
in_dev_state_ = true;
return iw7::parser::make_DEVBEGIN(loc_); return iw7::parser::make_DEVBEGIN(loc_);
} }
else else
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched devblock start ('/#')");
throw iw7::parser::syntax_error(loc_, "unmatched devblock start ('/#')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '#' && curr == '/') else if (last == '#' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -261,16 +254,15 @@ auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched multiline comment start ('/*')");
throw iw7::parser::syntax_error(loc_, "unmatched multiline comment start ('/*')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '*' && curr == '/') else if (last == '*' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -283,7 +275,7 @@ auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end || curr == '\n') if (state == reader::end || curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -293,69 +285,43 @@ auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type
case '#': case '#':
if (curr == '/') if (curr == '/')
{ {
if (!in_dev_state_) if (!indev_)
throw iw7::parser::syntax_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
in_dev_state_ = false; indev_ = false;
reader_.advance(); reader_.advance();
return iw7::parser::make_DEVEND(loc_); return iw7::parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
while (reader_.state == reader::ok)
{
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
}
{ if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
if (auto len = buffer_.length; len == 4 || len == 7) throw comp_error(loc_, "unterminated preprocessor directive ('#')");
{
auto data = buffer_.data;
auto color = true;
for (auto i = 1; i < len; i++)
{
if ((data[i] < 48 || data[i] > 57) && (data[i] < 65 || data[i] > 70) && (data[i] < 97 || data[i] > 102))
{
color = false;
break;
}
}
if (color) return iw7::parser::make_COLOR(std::string(++data, --len), loc_); state_ = state::preprocessor;
} goto lex_name;
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key == keyword::KW_INVALID)
throw iw7::parser::syntax_error(loc_, utils::string::va("unknown preprocessor directive ('%s')", "#"));
if (keyword_is_token(key))
return keyword_token(key);
// call preprocessor(key);
}
continue;
case '*': case '*':
if (reader_.state == reader::end || (curr != '/' && curr != '=')) if (curr != '/' && curr != '=')
return iw7::parser::make_MUL(loc_); return iw7::parser::make_MUL(loc_);
reader_.advance(); reader_.advance();
if (curr == '/') if (last == '=')
throw iw7::parser::syntax_error(loc_, "unmatched multiline comment end ('*/')");
return iw7::parser::make_ASSIGN_MUL(loc_); return iw7::parser::make_ASSIGN_MUL(loc_);
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
case '"': case '"':
case '\'': state_ = state::string;
return read_string(last, false); goto lex_string;
case '.': case '.':
return read_dotsize(); reader_.advance();
if(state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
case '(': case '(':
return iw7::parser::make_LPAREN(loc_); return iw7::parser::make_LPAREN(loc_);
case ')': case ')':
@ -375,6 +341,7 @@ auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type
case ':': case ':':
if (curr != ':') if (curr != ':')
return iw7::parser::make_COLON(loc_); return iw7::parser::make_COLON(loc_);
reader_.advance(); reader_.advance();
return iw7::parser::make_DOUBLECOLON(loc_); return iw7::parser::make_DOUBLECOLON(loc_);
case '?': case '?':
@ -382,43 +349,70 @@ auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type
case '=': case '=':
if (curr != '=') if (curr != '=')
return iw7::parser::make_ASSIGN(loc_); return iw7::parser::make_ASSIGN(loc_);
reader_.advance(); reader_.advance();
return iw7::parser::make_EQUALITY(loc_); return iw7::parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return iw7::parser::make_ADD(loc_); return iw7::parser::make_ADD(loc_);
reader_.advance(); reader_.advance();
return (last == '+') ? iw7::parser::make_INCREMENT(loc_) : iw7::parser::make_ASSIGN_ADD(loc_);
if (last == '+')
return iw7::parser::make_INCREMENT(loc_);
return iw7::parser::make_ASSIGN_ADD(loc_);
case '-': case '-':
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return iw7::parser::make_SUB(loc_); return iw7::parser::make_SUB(loc_);
reader_.advance(); reader_.advance();
return (last == '-') ? iw7::parser::make_DECREMENT(loc_) : iw7::parser::make_ASSIGN_SUB(loc_);
if (last == '-')
return iw7::parser::make_DECREMENT(loc_);
return iw7::parser::make_ASSIGN_SUB(loc_);
case '%': case '%':
if (curr != '=') if (curr != '=')
return iw7::parser::make_MOD(loc_); return iw7::parser::make_MOD(loc_);
reader_.advance(); reader_.advance();
return iw7::parser::make_ASSIGN_MOD(loc_); return iw7::parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return iw7::parser::make_BITWISE_OR(loc_); return iw7::parser::make_BITWISE_OR(loc_);
reader_.advance(); reader_.advance();
return (last == '|') ? iw7::parser::make_OR(loc_) : iw7::parser::make_ASSIGN_BW_OR(loc_);
if (last == '|')
return iw7::parser::make_OR(loc_);
return iw7::parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"' && curr != '\'')
return iw7::parser::make_BITWISE_AND(loc_); return iw7::parser::make_BITWISE_AND(loc_);
reader_.advance(); reader_.advance();
if (last == '"' || last == '\'')
return read_string(last, true); if (last == '&')
return (last == '&') ? iw7::parser::make_AND(loc_) : iw7::parser::make_ASSIGN_BW_AND(loc_); return iw7::parser::make_AND(loc_);
if (last == '=')
return iw7::parser::make_ASSIGN_BW_AND(loc_);
state_ = state::localize;
goto lex_string;
case '^': case '^':
if (curr != '=') if (curr != '=')
return iw7::parser::make_BITWISE_EXOR(loc_); return iw7::parser::make_BITWISE_EXOR(loc_);
reader_.advance(); reader_.advance();
return iw7::parser::make_ASSIGN_BW_EXOR(loc_); return iw7::parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return iw7::parser::make_NOT(loc_); return iw7::parser::make_NOT(loc_);
reader_.advance(); reader_.advance();
return iw7::parser::make_INEQUALITY(loc_); return iw7::parser::make_INEQUALITY(loc_);
case '~': case '~':
@ -426,88 +420,180 @@ auto lexer::lex() -> xsk::gsc::iw7::parser::symbol_type
case '<': case '<':
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return iw7::parser::make_LESS(loc_); return iw7::parser::make_LESS(loc_);
reader_.advance(); reader_.advance();
if (last == '<') if (last == '=')
{
reader_.advance();
return (last == '=') ? iw7::parser::make_ASSIGN_LSHIFT(loc_) : iw7::parser::make_LSHIFT(loc_);
}
return iw7::parser::make_LESS_EQUAL(loc_); return iw7::parser::make_LESS_EQUAL(loc_);
if (curr != '=')
return iw7::parser::make_LSHIFT(loc_);
reader_.advance();
return iw7::parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return iw7::parser::make_GREATER(loc_); return iw7::parser::make_GREATER(loc_);
reader_.advance(); reader_.advance();
if (last == '>')
{ if (last == '=')
reader_.advance();
return (last == '=') ? iw7::parser::make_ASSIGN_RSHIFT(loc_) : iw7::parser::make_RSHIFT(loc_);
}
return iw7::parser::make_GREATER_EQUAL(loc_); return iw7::parser::make_GREATER_EQUAL(loc_);
if (curr != '=')
return iw7::parser::make_RSHIFT(loc_);
reader_.advance();
return iw7::parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number:
if (last >= '0' && last <= '9') if (last >= '0' && last <= '9')
return lexer::read_number(last); goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z') else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
return lexer::read_word(last); goto lex_name;
throw iw7::parser::syntax_error(loc_, utils::string::va("bad token: \'%c\'", last)); throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
} }
}
}
auto lexer::read_string(char quote, bool localize) -> xsk::gsc::iw7::parser::symbol_type lex_string:
{ if (state == reader::end)
if (localize) throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
reader_.advance(); reader_.advance();
if (last == '\n') while (true)
throw iw7::parser::syntax_error(loc_, "unterminated string");
if (last == '\\') // process scapes
{ {
// TODO: if (last == '"')
}
if (last != '\\' && curr == quote)
break; break;
if (!buffer_.push(curr)) if (last == '\n')
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "unterminated string literal");
}
if (reader_.state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\')
{ {
throw iw7::parser::syntax_error(loc_, utils::string::va("unmatched string start ('%s')", (quote == '"') ? "\"" : "\\'")); char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
} }
if (localize) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
else if (!buffer_.push(last))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if (state_ == state::localize)
return iw7::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_); return iw7::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
return iw7::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_); return iw7::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
}
auto lexer::read_number(char first) -> xsk::gsc::iw7::parser::symbol_type lex_name:
{ buffer_.push(last);
if (first == '.')
{
buffer_.push(first);
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (last == '\\')
throw comp_error(loc_, "invalid path '\\\\'");
path = true;
if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
}
else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if(state_ == state::field)
{
if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return iw7::parser::make_SIZE(loc_);
}
return iw7::parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
// TODO: call preprocessor(key);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start;
continue;
}
else
{
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw comp_error(loc_, "invalid path end '\\'");
//return iw7::parser::make_PATH(xsk::gsc::iw7::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return iw7::parser::make_PATH(std::string(buffer_.data, buffer_.length), loc_);
}
//return iw7::parser::make_IDENTIFIER(xsk::gsc::iw7::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return iw7::parser::make_IDENTIFIER(std::string(buffer_.data, buffer_.length), loc_);
}
lex_number:
if (state_ == state::field)
buffer_.push('.');
if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto flt = 0;
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.')) if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw iw7::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if ((curr == '.' || curr == 'f') && last == '\'') if ((curr == '.' || curr == 'f') && last == '\'')
throw iw7::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if (curr == '\'') if (curr == '\'')
{ {
@ -515,55 +601,41 @@ auto lexer::read_number(char first) -> xsk::gsc::iw7::parser::symbol_type
continue; continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58))) if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'')
throw iw7::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
auto data = buffer_.data; if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
auto len = buffer_.length; throw comp_error(loc_, "invalid number literal");
auto dot = true;
for (auto i = 1; i < len; i++) if (state_ == state::field || dot || flt)
{ return iw7::parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
if (data[i] == '.')
{ return iw7::parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
if (dot)
throw iw7::parser::syntax_error(loc_, "invalid number '.'");
} }
if (data[i] == 'f' && i != len - 1) else if (curr == 'o')
throw iw7::parser::syntax_error(loc_, "invalid number 'f'");
}
return iw7::parser::make_FLOAT(std::string(data, len), loc_);
}
else
{ {
auto curr = reader_.current_byte;
if (first == '0' && curr == 'o')
{
buffer_.push(first);
buffer_.push('o');
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'o')) if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw iw7::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (curr == 'o' && last == '\'')
throw iw7::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -580,30 +652,24 @@ auto lexer::read_number(char first) -> xsk::gsc::iw7::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length <= 0)
throw iw7::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (buffer_.length < 3) return iw7::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
throw error("gsc lexer: invalid octal literal!");
return iw7::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data + 2), loc_);
} }
else if (first == '0' && curr == 'b') else if (curr == 'b')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('b'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'b')) if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw iw7::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (curr == 'b' && last == '\'')
throw iw7::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -615,35 +681,29 @@ auto lexer::read_number(char first) -> xsk::gsc::iw7::parser::symbol_type
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw iw7::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid binary literal!");
return iw7::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_); return iw7::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
} }
else if (first == '0' && curr == 'x') else if (curr == 'x')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('x'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'x')) if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw iw7::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (curr == 'x' && last == '\'')
throw iw7::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -660,161 +720,13 @@ auto lexer::read_number(char first) -> xsk::gsc::iw7::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw iw7::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid hexadecimal literal!");
return iw7::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_); return iw7::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
} }
else // cant get here!
{
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw iw7::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw iw7::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw iw7::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = false;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw iw7::parser::syntax_error(loc_, "invalid number '.'");
dot = true;
}
if (data[i] == 'f' && i != len - 1)
throw iw7::parser::syntax_error(loc_, "invalid number 'f'");
}
if (dot || data[len - 1] == 'f')
return iw7::parser::make_FLOAT(std::string(data, len), loc_);
return iw7::parser::make_INTEGER(std::string(data, len), loc_);
}
}
}
auto lexer::read_word(char first) -> xsk::gsc::iw7::parser::symbol_type
{
auto path = false;
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto curr = reader_.current_byte;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (reader_.last_byte == '\\')
throw iw7::parser::syntax_error(loc_, "invalid path '\\\\'");
path = true;
curr = '/';
}
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw iw7::parser::syntax_error(loc_, "invalid path end '\\'");
return iw7::parser::make_PATH(xsk::gsc::iw7::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
return iw7::parser::make_IDENTIFIER(xsk::gsc::iw7::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
auto lexer::read_dotsize() -> xsk::gsc::iw7::parser::symbol_type
{
auto curr = reader_.current_byte;
if (curr > 47 && curr < 58)
{
return lexer::read_number('.');
}
else if (curr == '_' || curr > 64 && curr < 91 || curr > 96 && curr < 123)
{
reader save;
save.state = reader_.state;
save.bytes_remaining = reader_.bytes_remaining;
save.buffer_pos = reader_.buffer_pos;
save.last_byte = reader_.last_byte;
save.current_byte = reader_.current_byte;
while (reader_.state == reader::ok)
{
curr = reader_.current_byte;
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return iw7::parser::make_SIZE(loc_);
}
reader_.state = save.state;
reader_.bytes_remaining = save.bytes_remaining;
reader_.buffer_pos = save.buffer_pos;
reader_.last_byte = save.last_byte;
reader_.current_byte = save.current_byte;
return iw7::parser::make_DOT(loc_);
}
else return iw7::parser::make_DOT(loc_);
} }
auto lexer::keyword_token(keyword k) -> xsk::gsc::iw7::parser::symbol_type auto lexer::keyword_token(keyword k) -> xsk::gsc::iw7::parser::symbol_type

View File

@ -12,11 +12,12 @@ namespace xsk::gsc::iw7
enum class keyword; enum class keyword;
constexpr size_t max_buf_size = 0x2000;
struct buffer struct buffer
{ {
int size;
int length;
char* data; char* data;
int length;
buffer(); buffer();
~buffer(); ~buffer();
@ -25,13 +26,13 @@ struct buffer
struct reader struct reader
{ {
enum states { end, ok }; enum state_type : std::uint8_t { end, ok };
states state;
int bytes_remaining;
const char* buffer_pos; const char* buffer_pos;
std::uint32_t bytes_remaining;
char last_byte; char last_byte;
char current_byte; char current_byte;
state_type state;
reader(); reader();
@ -47,15 +48,17 @@ struct reader
class lexer class lexer
{ {
private: enum class state : std::uint8_t { start, string, localize, field, preprocessor };
reader reader_; reader reader_;
buffer buffer_; buffer buffer_;
location loc_; location loc_;
build mode_; build mode_;
bool in_dev_state_;
std::stack<location> locs_; std::stack<location> locs_;
std::stack<reader> readers_; std::stack<reader> readers_;
std::uint32_t header_top_; std::uint32_t header_top_;
state state_;
bool indev_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -65,10 +68,6 @@ public:
void restrict_header(const xsk::gsc::location& loc); void restrict_header(const xsk::gsc::location& loc);
private: private:
auto read_string(char quote, bool localize) -> xsk::gsc::iw7::parser::symbol_type;
auto read_number(char first) -> xsk::gsc::iw7::parser::symbol_type;
auto read_word(char first) -> xsk::gsc::iw7::parser::symbol_type;
auto read_dotsize() -> xsk::gsc::iw7::parser::symbol_type;
auto keyword_token(keyword k) -> xsk::gsc::iw7::parser::symbol_type; auto keyword_token(keyword k) -> xsk::gsc::iw7::parser::symbol_type;
static auto keyword_is_token(keyword k) -> bool; static auto keyword_is_token(keyword k) -> bool;
static auto get_keyword(std::string_view str) -> keyword; static auto get_keyword(std::string_view str) -> keyword;

File diff suppressed because it is too large Load Diff

View File

@ -614,6 +614,7 @@ namespace xsk { namespace gsc { namespace iw7 {
// stmt_while // stmt_while
char dummy65[sizeof (ast::stmt_while::ptr)]; char dummy65[sizeof (ast::stmt_while::ptr)];
// "field"
// "path" // "path"
// "identifier" // "identifier"
// "string literal" // "string literal"
@ -758,22 +759,23 @@ namespace xsk { namespace gsc { namespace iw7 {
MUL = 85, // "*" MUL = 85, // "*"
DIV = 86, // "/" DIV = 86, // "/"
MOD = 87, // "%" MOD = 87, // "%"
PATH = 88, // "path" FIELD = 88, // "field"
IDENTIFIER = 89, // "identifier" PATH = 89, // "path"
STRING = 90, // "string literal" IDENTIFIER = 90, // "identifier"
ISTRING = 91, // "localized string" STRING = 91, // "string literal"
COLOR = 92, // "color" ISTRING = 92, // "localized string"
FLOAT = 93, // "float" COLOR = 93, // "color"
INTEGER = 94, // "integer" FLOAT = 94, // "float"
ADD_ARRAY = 95, // ADD_ARRAY INTEGER = 95, // "integer"
THEN = 96, // THEN ADD_ARRAY = 96, // ADD_ARRAY
TERN = 97, // TERN THEN = 97, // THEN
NEG = 98, // NEG TERN = 98, // TERN
ANIMREF = 99, // ANIMREF NEG = 99, // NEG
PREINC = 100, // PREINC ANIMREF = 100, // ANIMREF
PREDEC = 101, // PREDEC PREINC = 101, // PREINC
POSTINC = 102, // POSTINC PREDEC = 102, // PREDEC
POSTDEC = 103 // POSTDEC POSTINC = 103, // POSTINC
POSTDEC = 104 // POSTDEC
}; };
/// Backward compatibility alias (Bison 3.6). /// Backward compatibility alias (Bison 3.6).
typedef token_kind_type yytokentype; typedef token_kind_type yytokentype;
@ -790,7 +792,7 @@ namespace xsk { namespace gsc { namespace iw7 {
{ {
enum symbol_kind_type enum symbol_kind_type
{ {
YYNTOKENS = 104, ///< Number of tokens. YYNTOKENS = 105, ///< Number of tokens.
S_YYEMPTY = -2, S_YYEMPTY = -2,
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
@ -880,102 +882,103 @@ namespace xsk { namespace gsc { namespace iw7 {
S_MUL = 85, // "*" S_MUL = 85, // "*"
S_DIV = 86, // "/" S_DIV = 86, // "/"
S_MOD = 87, // "%" S_MOD = 87, // "%"
S_PATH = 88, // "path" S_FIELD = 88, // "field"
S_IDENTIFIER = 89, // "identifier" S_PATH = 89, // "path"
S_STRING = 90, // "string literal" S_IDENTIFIER = 90, // "identifier"
S_ISTRING = 91, // "localized string" S_STRING = 91, // "string literal"
S_COLOR = 92, // "color" S_ISTRING = 92, // "localized string"
S_FLOAT = 93, // "float" S_COLOR = 93, // "color"
S_INTEGER = 94, // "integer" S_FLOAT = 94, // "float"
S_ADD_ARRAY = 95, // ADD_ARRAY S_INTEGER = 95, // "integer"
S_THEN = 96, // THEN S_ADD_ARRAY = 96, // ADD_ARRAY
S_TERN = 97, // TERN S_THEN = 97, // THEN
S_NEG = 98, // NEG S_TERN = 98, // TERN
S_ANIMREF = 99, // ANIMREF S_NEG = 99, // NEG
S_PREINC = 100, // PREINC S_ANIMREF = 100, // ANIMREF
S_PREDEC = 101, // PREDEC S_PREINC = 101, // PREINC
S_POSTINC = 102, // POSTINC S_PREDEC = 102, // PREDEC
S_POSTDEC = 103, // POSTDEC S_POSTINC = 103, // POSTINC
S_YYACCEPT = 104, // $accept S_POSTDEC = 104, // POSTDEC
S_root = 105, // root S_YYACCEPT = 105, // $accept
S_program = 106, // program S_root = 106, // root
S_inline = 107, // inline S_program = 107, // program
S_include = 108, // include S_inline = 108, // inline
S_declaration = 109, // declaration S_include = 109, // include
S_decl_usingtree = 110, // decl_usingtree S_declaration = 110, // declaration
S_decl_constant = 111, // decl_constant S_decl_usingtree = 111, // decl_usingtree
S_decl_thread = 112, // decl_thread S_decl_constant = 112, // decl_constant
S_stmt = 113, // stmt S_decl_thread = 113, // decl_thread
S_stmt_dev = 114, // stmt_dev S_stmt = 114, // stmt
S_stmt_block = 115, // stmt_block S_stmt_dev = 115, // stmt_dev
S_stmt_list = 116, // stmt_list S_stmt_block = 116, // stmt_block
S_stmt_expr = 117, // stmt_expr S_stmt_list = 117, // stmt_list
S_stmt_call = 118, // stmt_call S_stmt_expr = 118, // stmt_expr
S_stmt_assign = 119, // stmt_assign S_stmt_call = 119, // stmt_call
S_stmt_endon = 120, // stmt_endon S_stmt_assign = 120, // stmt_assign
S_stmt_notify = 121, // stmt_notify S_stmt_endon = 121, // stmt_endon
S_stmt_wait = 122, // stmt_wait S_stmt_notify = 122, // stmt_notify
S_stmt_waittill = 123, // stmt_waittill S_stmt_wait = 123, // stmt_wait
S_stmt_waittillmatch = 124, // stmt_waittillmatch S_stmt_waittill = 124, // stmt_waittill
S_stmt_waittillframeend = 125, // stmt_waittillframeend S_stmt_waittillmatch = 125, // stmt_waittillmatch
S_stmt_if = 126, // stmt_if S_stmt_waittillframeend = 126, // stmt_waittillframeend
S_stmt_ifelse = 127, // stmt_ifelse S_stmt_if = 127, // stmt_if
S_stmt_while = 128, // stmt_while S_stmt_ifelse = 128, // stmt_ifelse
S_stmt_dowhile = 129, // stmt_dowhile S_stmt_while = 129, // stmt_while
S_stmt_for = 130, // stmt_for S_stmt_dowhile = 130, // stmt_dowhile
S_stmt_foreach = 131, // stmt_foreach S_stmt_for = 131, // stmt_for
S_stmt_switch = 132, // stmt_switch S_stmt_foreach = 132, // stmt_foreach
S_stmt_case = 133, // stmt_case S_stmt_switch = 133, // stmt_switch
S_stmt_default = 134, // stmt_default S_stmt_case = 134, // stmt_case
S_stmt_break = 135, // stmt_break S_stmt_default = 135, // stmt_default
S_stmt_continue = 136, // stmt_continue S_stmt_break = 136, // stmt_break
S_stmt_return = 137, // stmt_return S_stmt_continue = 137, // stmt_continue
S_stmt_breakpoint = 138, // stmt_breakpoint S_stmt_return = 138, // stmt_return
S_stmt_prof_begin = 139, // stmt_prof_begin S_stmt_breakpoint = 139, // stmt_breakpoint
S_stmt_prof_end = 140, // stmt_prof_end S_stmt_prof_begin = 140, // stmt_prof_begin
S_expr = 141, // expr S_stmt_prof_end = 141, // stmt_prof_end
S_expr_or_empty = 142, // expr_or_empty S_expr = 142, // expr
S_expr_assign = 143, // expr_assign S_expr_or_empty = 143, // expr_or_empty
S_expr_increment = 144, // expr_increment S_expr_assign = 144, // expr_assign
S_expr_decrement = 145, // expr_decrement S_expr_increment = 145, // expr_increment
S_expr_ternary = 146, // expr_ternary S_expr_decrement = 146, // expr_decrement
S_expr_binary = 147, // expr_binary S_expr_ternary = 147, // expr_ternary
S_expr_primitive = 148, // expr_primitive S_expr_binary = 148, // expr_binary
S_expr_complement = 149, // expr_complement S_expr_primitive = 149, // expr_primitive
S_expr_not = 150, // expr_not S_expr_complement = 150, // expr_complement
S_expr_call = 151, // expr_call S_expr_not = 151, // expr_not
S_expr_method = 152, // expr_method S_expr_call = 152, // expr_call
S_expr_function = 153, // expr_function S_expr_method = 153, // expr_method
S_expr_pointer = 154, // expr_pointer S_expr_function = 154, // expr_function
S_expr_add_array = 155, // expr_add_array S_expr_pointer = 155, // expr_pointer
S_expr_parameters = 156, // expr_parameters S_expr_add_array = 156, // expr_add_array
S_expr_arguments = 157, // expr_arguments S_expr_parameters = 157, // expr_parameters
S_expr_arguments_no_empty = 158, // expr_arguments_no_empty S_expr_arguments = 158, // expr_arguments
S_expr_reference = 159, // expr_reference S_expr_arguments_no_empty = 159, // expr_arguments_no_empty
S_expr_array = 160, // expr_array S_expr_reference = 160, // expr_reference
S_expr_field = 161, // expr_field S_expr_array = 161, // expr_array
S_expr_size = 162, // expr_size S_expr_field = 162, // expr_field
S_expr_paren = 163, // expr_paren S_expr_size = 163, // expr_size
S_expr_object = 164, // expr_object S_expr_paren = 164, // expr_paren
S_expr_thisthread = 165, // expr_thisthread S_expr_object = 165, // expr_object
S_expr_empty_array = 166, // expr_empty_array S_expr_thisthread = 166, // expr_thisthread
S_expr_undefined = 167, // expr_undefined S_expr_empty_array = 167, // expr_empty_array
S_expr_game = 168, // expr_game S_expr_undefined = 168, // expr_undefined
S_expr_self = 169, // expr_self S_expr_game = 169, // expr_game
S_expr_anim = 170, // expr_anim S_expr_self = 170, // expr_self
S_expr_level = 171, // expr_level S_expr_anim = 171, // expr_anim
S_expr_animation = 172, // expr_animation S_expr_level = 172, // expr_level
S_expr_animtree = 173, // expr_animtree S_expr_animation = 173, // expr_animation
S_expr_identifier = 174, // expr_identifier S_expr_animtree = 174, // expr_animtree
S_expr_path = 175, // expr_path S_expr_identifier = 175, // expr_identifier
S_expr_istring = 176, // expr_istring S_expr_path = 176, // expr_path
S_expr_string = 177, // expr_string S_expr_istring = 177, // expr_istring
S_expr_color = 178, // expr_color S_expr_string = 178, // expr_string
S_expr_vector = 179, // expr_vector S_expr_color = 179, // expr_color
S_expr_float = 180, // expr_float S_expr_vector = 180, // expr_vector
S_expr_integer = 181, // expr_integer S_expr_float = 181, // expr_float
S_expr_false = 182, // expr_false S_expr_integer = 182, // expr_integer
S_expr_true = 183 // expr_true S_expr_false = 183, // expr_false
S_expr_true = 184 // expr_true
}; };
}; };
@ -1284,6 +1287,7 @@ namespace xsk { namespace gsc { namespace iw7 {
value.move< ast::stmt_while::ptr > (std::move (that.value)); value.move< ast::stmt_while::ptr > (std::move (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2535,6 +2539,7 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > (); value.template destroy< ast::stmt_while::ptr > ();
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2651,7 +2656,7 @@ switch (yykind)
: super_type(token_type (tok), v, l) : super_type(token_type (tok), v, l)
#endif #endif
{ {
IW7_ASSERT ((token::PATH <= tok && tok <= token::INTEGER)); IW7_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
} }
}; };
@ -4021,6 +4026,21 @@ switch (yykind)
return symbol_type (token::MOD, l); return symbol_type (token::MOD, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
@ -4605,7 +4625,7 @@ switch (yykind)
/// Constants. /// Constants.
enum enum
{ {
yylast_ = 2293, ///< Last index in yytable_. yylast_ = 2249, ///< Last index in yytable_.
yynnts_ = 80, ///< Number of nonterminal symbols. yynnts_ = 80, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number. yyfinal_ = 21 ///< Termination state number.
}; };
@ -4905,6 +4925,7 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value)); value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5216,6 +5237,7 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value)); value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5289,7 +5311,7 @@ switch (yykind)
#line 13 "parser.ypp" #line 13 "parser.ypp"
} } } // xsk::gsc::iw7 } } } // xsk::gsc::iw7
#line 5293 "parser.hpp" #line 5315 "parser.hpp"

View File

@ -70,9 +70,9 @@ enum class keyword
KW_INVALID, KW_INVALID,
}; };
buffer::buffer() : size(1024), length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(size)); data = static_cast<char*>(std::malloc(max_buf_size));
} }
buffer::~buffer() buffer::~buffer()
@ -82,18 +82,9 @@ buffer::~buffer()
bool buffer::push(char c) bool buffer::push(char c)
{ {
if(length >= size) if(length >= max_buf_size)
{ return false;
auto nsize = size * 2;
auto ndata = reinterpret_cast<char*>(std::malloc(nsize));
if(!ndata) return false;
std::memmove(ndata, data, size);
std::free(data);
size = nsize;
data = ndata;
}
data[length++] = c; data[length++] = c;
return true; return true;
} }
@ -141,7 +132,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : in_dev_state_(false), loc_(xsk::gsc::location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(xsk::gsc::location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -187,24 +178,28 @@ void lexer::restrict_header(const xsk::gsc::location& loc)
auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type
{ {
buffer_.length = 0; buffer_.length = 0;
state_ = state::start;
loc_.step(); loc_.step();
while (true) while (true)
{ {
if (reader_.state == reader::end) const auto& state = reader_.state;
{ auto& last = reader_.last_byte;
if (in_dev_state_) auto& curr = reader_.current_byte;
throw iw8::parser::syntax_error(loc_, "unmatched devblock start ('/#')"); auto path = false;
if(header_top_ > 0) if (state == reader::end)
{
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (header_top_ > 0)
pop_header(); pop_header();
else else
return iw8::parser::make_IW8EOF(loc_); return iw8::parser::make_IW8EOF(loc_);
} }
reader_.advance(); reader_.advance();
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
switch (last) switch (last)
{ {
@ -218,7 +213,7 @@ auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type
loc_.step(); loc_.step();
continue; continue;
case '/': case '/':
if(reader_.state == reader::end || (curr != '/' && curr != '*' && curr != '#' && curr != '=')) if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return iw8::parser::make_DIV(loc_); return iw8::parser::make_DIV(loc_);
reader_.advance(); reader_.advance();
@ -228,29 +223,27 @@ auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type
if (last == '#') if (last == '#')
{ {
if (in_dev_state_) if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
if (mode_ == xsk::gsc::build::dev)
{ {
throw iw8::parser::syntax_error(loc_, "cannot recurse devblock ('/#')"); indev_ = true;
}
else if (mode_ == xsk::gsc::build::dev)
{
in_dev_state_ = true;
return iw8::parser::make_DEVBEGIN(loc_); return iw8::parser::make_DEVBEGIN(loc_);
} }
else else
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched devblock start ('/#')");
throw iw8::parser::syntax_error(loc_, "unmatched devblock start ('/#')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '#' && curr == '/') else if (last == '#' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -264,16 +257,15 @@ auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched multiline comment start ('/*')");
throw iw8::parser::syntax_error(loc_, "unmatched multiline comment start ('/*')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '*' && curr == '/') else if (last == '*' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -286,7 +278,7 @@ auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end || curr == '\n') if (state == reader::end || curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -296,69 +288,43 @@ auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type
case '#': case '#':
if (curr == '/') if (curr == '/')
{ {
if (!in_dev_state_) if (!indev_)
throw iw8::parser::syntax_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
in_dev_state_ = false; indev_ = false;
reader_.advance(); reader_.advance();
return iw8::parser::make_DEVEND(loc_); return iw8::parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
while (reader_.state == reader::ok)
{
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
}
{ if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
if (auto len = buffer_.length; len == 4 || len == 7) throw comp_error(loc_, "unterminated preprocessor directive ('#')");
{
auto data = buffer_.data;
auto color = true;
for (auto i = 1; i < len; i++)
{
if ((data[i] < 48 || data[i] > 57) && (data[i] < 65 || data[i] > 70) && (data[i] < 97 || data[i] > 102))
{
color = false;
break;
}
}
if (color) return iw8::parser::make_COLOR(std::string(++data, --len), loc_); state_ = state::preprocessor;
} goto lex_name;
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key == keyword::KW_INVALID)
throw iw8::parser::syntax_error(loc_, utils::string::va("unknown preprocessor directive ('%s')", "#"));
if (keyword_is_token(key))
return keyword_token(key);
// call preprocessor(key);
}
continue;
case '*': case '*':
if (reader_.state == reader::end || (curr != '/' && curr != '=')) if (curr != '/' && curr != '=')
return iw8::parser::make_MUL(loc_); return iw8::parser::make_MUL(loc_);
reader_.advance(); reader_.advance();
if (curr == '/') if (last == '=')
throw iw8::parser::syntax_error(loc_, "unmatched multiline comment end ('*/')");
return iw8::parser::make_ASSIGN_MUL(loc_); return iw8::parser::make_ASSIGN_MUL(loc_);
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
case '"': case '"':
case '\'': state_ = state::string;
return read_string(last, false); goto lex_string;
case '.': case '.':
return read_dotsize(); reader_.advance();
if(state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
case '(': case '(':
return iw8::parser::make_LPAREN(loc_); return iw8::parser::make_LPAREN(loc_);
case ')': case ')':
@ -378,6 +344,7 @@ auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type
case ':': case ':':
if (curr != ':') if (curr != ':')
return iw8::parser::make_COLON(loc_); return iw8::parser::make_COLON(loc_);
reader_.advance(); reader_.advance();
return iw8::parser::make_DOUBLECOLON(loc_); return iw8::parser::make_DOUBLECOLON(loc_);
case '?': case '?':
@ -385,43 +352,70 @@ auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type
case '=': case '=':
if (curr != '=') if (curr != '=')
return iw8::parser::make_ASSIGN(loc_); return iw8::parser::make_ASSIGN(loc_);
reader_.advance(); reader_.advance();
return iw8::parser::make_EQUALITY(loc_); return iw8::parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return iw8::parser::make_ADD(loc_); return iw8::parser::make_ADD(loc_);
reader_.advance(); reader_.advance();
return (last == '+') ? iw8::parser::make_INCREMENT(loc_) : iw8::parser::make_ASSIGN_ADD(loc_);
if (last == '+')
return iw8::parser::make_INCREMENT(loc_);
return iw8::parser::make_ASSIGN_ADD(loc_);
case '-': case '-':
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return iw8::parser::make_SUB(loc_); return iw8::parser::make_SUB(loc_);
reader_.advance(); reader_.advance();
return (last == '-') ? iw8::parser::make_DECREMENT(loc_) : iw8::parser::make_ASSIGN_SUB(loc_);
if (last == '-')
return iw8::parser::make_DECREMENT(loc_);
return iw8::parser::make_ASSIGN_SUB(loc_);
case '%': case '%':
if (curr != '=') if (curr != '=')
return iw8::parser::make_MOD(loc_); return iw8::parser::make_MOD(loc_);
reader_.advance(); reader_.advance();
return iw8::parser::make_ASSIGN_MOD(loc_); return iw8::parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return iw8::parser::make_BITWISE_OR(loc_); return iw8::parser::make_BITWISE_OR(loc_);
reader_.advance(); reader_.advance();
return (last == '|') ? iw8::parser::make_OR(loc_) : iw8::parser::make_ASSIGN_BW_OR(loc_);
if (last == '|')
return iw8::parser::make_OR(loc_);
return iw8::parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"' && curr != '\'')
return iw8::parser::make_BITWISE_AND(loc_); return iw8::parser::make_BITWISE_AND(loc_);
reader_.advance(); reader_.advance();
if (last == '"' || last == '\'')
return read_string(last, true); if (last == '&')
return (last == '&') ? iw8::parser::make_AND(loc_) : iw8::parser::make_ASSIGN_BW_AND(loc_); return iw8::parser::make_AND(loc_);
if (last == '=')
return iw8::parser::make_ASSIGN_BW_AND(loc_);
state_ = state::localize;
goto lex_string;
case '^': case '^':
if (curr != '=') if (curr != '=')
return iw8::parser::make_BITWISE_EXOR(loc_); return iw8::parser::make_BITWISE_EXOR(loc_);
reader_.advance(); reader_.advance();
return iw8::parser::make_ASSIGN_BW_EXOR(loc_); return iw8::parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return iw8::parser::make_NOT(loc_); return iw8::parser::make_NOT(loc_);
reader_.advance(); reader_.advance();
return iw8::parser::make_INEQUALITY(loc_); return iw8::parser::make_INEQUALITY(loc_);
case '~': case '~':
@ -429,337 +423,144 @@ auto lexer::lex() -> xsk::gsc::iw8::parser::symbol_type
case '<': case '<':
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return iw8::parser::make_LESS(loc_); return iw8::parser::make_LESS(loc_);
reader_.advance(); reader_.advance();
if (last == '<') if (last == '=')
{
reader_.advance();
return (last == '=') ? iw8::parser::make_ASSIGN_LSHIFT(loc_) : iw8::parser::make_LSHIFT(loc_);
}
return iw8::parser::make_LESS_EQUAL(loc_); return iw8::parser::make_LESS_EQUAL(loc_);
if (curr != '=')
return iw8::parser::make_LSHIFT(loc_);
reader_.advance();
return iw8::parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return iw8::parser::make_GREATER(loc_); return iw8::parser::make_GREATER(loc_);
reader_.advance(); reader_.advance();
if (last == '>')
{ if (last == '=')
reader_.advance();
return (last == '=') ? iw8::parser::make_ASSIGN_RSHIFT(loc_) : iw8::parser::make_RSHIFT(loc_);
}
return iw8::parser::make_GREATER_EQUAL(loc_); return iw8::parser::make_GREATER_EQUAL(loc_);
if (curr != '=')
return iw8::parser::make_RSHIFT(loc_);
reader_.advance();
return iw8::parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number:
if (last >= '0' && last <= '9') if (last >= '0' && last <= '9')
return lexer::read_number(last); goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z') else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
return lexer::read_word(last); goto lex_name;
throw iw8::parser::syntax_error(loc_, utils::string::va("bad token: \'%c\'", last)); throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
} }
}
}
auto lexer::read_string(char quote, bool localize) -> xsk::gsc::iw8::parser::symbol_type lex_string:
{ if (state == reader::end)
if (localize) throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
reader_.advance(); reader_.advance();
if (last == '\n') while (true)
throw iw8::parser::syntax_error(loc_, "unterminated string");
if (last == '\\') // process scapes
{ {
// TODO: if (last == '"')
}
if (last != '\\' && curr == quote)
break; break;
if (!buffer_.push(curr)) if (last == '\n')
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "unterminated string literal");
}
if (reader_.state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\')
{ {
throw iw8::parser::syntax_error(loc_, utils::string::va("unmatched string start ('%s')", (quote == '"') ? "\"" : "\\'")); char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
} }
if (localize) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
else if (!buffer_.push(last))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if (state_ == state::localize)
return iw8::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_); return iw8::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
return iw8::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_); return iw8::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
}
auto lexer::read_number(char first) -> xsk::gsc::iw8::parser::symbol_type lex_name:
{ buffer_.push(last);
if (first == '.')
while (true)
{ {
buffer_.push(first); if (state == reader::end)
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break; break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = true;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw iw8::parser::syntax_error(loc_, "invalid number '.'");
}
if (data[i] == 'f' && i != len - 1)
throw iw8::parser::syntax_error(loc_, "invalid number 'f'");
}
return iw8::parser::make_FLOAT(std::string(data, len), loc_);
}
else
{
auto curr = reader_.current_byte;
if (first == '0' && curr == 'o')
{
buffer_.push(first);
buffer_.push('o');
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'o'))
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (curr == 'o' && last == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!(curr > 47 && curr < 56))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (buffer_.length < 3)
throw error("gsc lexer: invalid octal literal!");
return iw8::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data + 2), loc_);
}
else if (first == '0' && curr == 'b')
{
buffer_.push(first);
buffer_.push('b');
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'b'))
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (curr == 'b' && last == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (curr != '0' && curr != '1')
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (buffer_.length < 3)
throw error("gsc lexer: invalid binary literal!");
return iw8::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
}
else if (first == '0' && curr == 'x')
{
buffer_.push(first);
buffer_.push('x');
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'x'))
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (curr == 'x' && last == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!((curr > 47 && curr < 58) || (curr > 64 && curr < 71) || (curr > 96 && curr < 103)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (buffer_.length < 3)
throw error("gsc lexer: invalid hexadecimal literal!");
return iw8::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
}
else
{
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw iw8::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = false;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw iw8::parser::syntax_error(loc_, "invalid number '.'");
dot = true;
}
if (data[i] == 'f' && i != len - 1)
throw iw8::parser::syntax_error(loc_, "invalid number 'f'");
}
if (dot || data[len - 1] == 'f')
return iw8::parser::make_FLOAT(std::string(data, len), loc_);
return iw8::parser::make_INTEGER(std::string(data, len), loc_);
}
}
}
auto lexer::read_word(char first) -> xsk::gsc::iw8::parser::symbol_type
{
auto path = false;
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto curr = reader_.current_byte;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58))) if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break; break;
if (curr == '\\') if (curr == '\\')
{ {
if (reader_.last_byte == '\\') if (last == '\\')
throw iw8::parser::syntax_error(loc_, "invalid path '\\\\'"); throw comp_error(loc_, "invalid path '\\\\'");
path = true; path = true;
curr = '/'; if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
} }
else if (!buffer_.push(curr))
if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded");
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
} }
if(buffer_.data[0] != '_') if(state_ == state::field)
{ {
for(auto i = 0; i < buffer_.length; i++) if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return iw8::parser::make_SIZE(loc_);
}
return iw8::parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
// TODO: call preprocessor(key);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start;
continue;
}
else
{
if (buffer_.data[0] != '_')
{
for (auto i = 0; i < buffer_.length; i++)
{ {
auto c = buffer_.data[i]; auto c = buffer_.data[i];
@ -776,36 +577,87 @@ auto lexer::read_word(char first) -> xsk::gsc::iw8::parser::symbol_type
if (path) if (path)
{ {
if (buffer_.data[buffer_.length - 1] == '/') if (buffer_.data[buffer_.length - 1] == '/')
throw iw8::parser::syntax_error(loc_, "invalid path end '\\'"); throw comp_error(loc_, "invalid path end '\\'");
return iw8::parser::make_PATH(xsk::gsc::iw8::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_); //return iw8::parser::make_PATH(xsk::gsc::iw8::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return iw8::parser::make_PATH(std::string(buffer_.data, buffer_.length), loc_);
} }
return iw8::parser::make_IDENTIFIER(xsk::gsc::iw8::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_); //return iw8::parser::make_IDENTIFIER(xsk::gsc::iw8::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
} return iw8::parser::make_IDENTIFIER(std::string(buffer_.data, buffer_.length), loc_);
auto lexer::read_dotsize() -> xsk::gsc::iw8::parser::symbol_type
{
auto curr = reader_.current_byte;
if (curr > 47 && curr < 58)
{
return lexer::read_number('.');
} }
else if (curr == '_' || curr > 64 && curr < 91 || curr > 96 && curr < 123)
{
reader save;
save.state = reader_.state;
save.bytes_remaining = reader_.bytes_remaining;
save.buffer_pos = reader_.buffer_pos;
save.last_byte = reader_.last_byte;
save.current_byte = reader_.current_byte;
while (reader_.state == reader::ok) lex_number:
{ if (state_ == state::field)
curr = reader_.current_byte; buffer_.push('.');
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123))) if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto flt = 0;
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw comp_error(loc_, "invalid number literal");
if ((curr == '.' || curr == 'f') && last == '\'')
throw comp_error(loc_, "invalid number literal");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
break;
if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded");
reader_.advance();
}
if (last == '\'')
throw comp_error(loc_, "invalid number literal");
if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
throw comp_error(loc_, "invalid number literal");
if (state_ == state::field || dot || flt)
return iw8::parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
return iw8::parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
}
else if (curr == 'o')
{
reader_.advance();
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw comp_error(loc_, "invalid octal literal");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!(curr > 47 && curr < 56))
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
@ -814,21 +666,81 @@ auto lexer::read_dotsize() -> xsk::gsc::iw8::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (std::string_view(buffer_.data, buffer_.length) == "size") if (last == '\'' || buffer_.length <= 0)
throw comp_error(loc_, "invalid octal literal");
return iw8::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
}
else if (curr == 'b')
{ {
return iw8::parser::make_SIZE(loc_); buffer_.push(last);
buffer_.push(curr);
reader_.advance();
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw comp_error(loc_, "invalid binary literal");
if (curr == '\'')
{
reader_.advance();
continue;
} }
reader_.state = save.state; if (curr != '0' && curr != '1')
reader_.bytes_remaining = save.bytes_remaining; break;
reader_.buffer_pos = save.buffer_pos;
reader_.last_byte = save.last_byte;
reader_.current_byte = save.current_byte;
return iw8::parser::make_DOT(loc_); if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded");
reader_.advance();
}
if (last == '\'' || buffer_.length < 3)
throw comp_error(loc_, "invalid binary literal");
return iw8::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
}
else if (curr == 'x')
{
buffer_.push(last);
buffer_.push(curr);
reader_.advance();
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw comp_error(loc_, "invalid hexadecimal literal");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!((curr > 47 && curr < 58) || (curr > 64 && curr < 71) || (curr > 96 && curr < 103)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (last == '\'' || buffer_.length < 3)
throw comp_error(loc_, "invalid hexadecimal literal");
return iw8::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
}
// cant get here!
} }
else return iw8::parser::make_DOT(loc_);
} }
auto lexer::keyword_token(keyword k) -> xsk::gsc::iw8::parser::symbol_type auto lexer::keyword_token(keyword k) -> xsk::gsc::iw8::parser::symbol_type

View File

@ -12,11 +12,12 @@ namespace xsk::gsc::iw8
enum class keyword; enum class keyword;
constexpr size_t max_buf_size = 0x2000;
struct buffer struct buffer
{ {
int size;
int length;
char* data; char* data;
int length;
buffer(); buffer();
~buffer(); ~buffer();
@ -25,13 +26,13 @@ struct buffer
struct reader struct reader
{ {
enum states { end, ok }; enum state_type : std::uint8_t { end, ok };
states state;
int bytes_remaining;
const char* buffer_pos; const char* buffer_pos;
std::uint32_t bytes_remaining;
char last_byte; char last_byte;
char current_byte; char current_byte;
state_type state;
reader(); reader();
@ -47,15 +48,17 @@ struct reader
class lexer class lexer
{ {
private: enum class state : std::uint8_t { start, string, localize, field, preprocessor };
reader reader_; reader reader_;
buffer buffer_; buffer buffer_;
location loc_; location loc_;
build mode_; build mode_;
bool in_dev_state_;
std::stack<location> locs_; std::stack<location> locs_;
std::stack<reader> readers_; std::stack<reader> readers_;
std::uint32_t header_top_; std::uint32_t header_top_;
state state_;
bool indev_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -65,10 +68,6 @@ public:
void restrict_header(const xsk::gsc::location& loc); void restrict_header(const xsk::gsc::location& loc);
private: private:
auto read_string(char quote, bool localize) -> xsk::gsc::iw8::parser::symbol_type;
auto read_number(char first) -> xsk::gsc::iw8::parser::symbol_type;
auto read_word(char first) -> xsk::gsc::iw8::parser::symbol_type;
auto read_dotsize() -> xsk::gsc::iw8::parser::symbol_type;
auto keyword_token(keyword k) -> xsk::gsc::iw8::parser::symbol_type; auto keyword_token(keyword k) -> xsk::gsc::iw8::parser::symbol_type;
static auto keyword_is_token(keyword k) -> bool; static auto keyword_is_token(keyword k) -> bool;
static auto get_keyword(std::string_view str) -> keyword; static auto get_keyword(std::string_view str) -> keyword;

File diff suppressed because it is too large Load Diff

View File

@ -623,6 +623,7 @@ namespace xsk { namespace gsc { namespace iw8 {
// stmt_while // stmt_while
char dummy68[sizeof (ast::stmt_while::ptr)]; char dummy68[sizeof (ast::stmt_while::ptr)];
// "field"
// "path" // "path"
// "identifier" // "identifier"
// "string literal" // "string literal"
@ -770,22 +771,23 @@ namespace xsk { namespace gsc { namespace iw8 {
MUL = 88, // "*" MUL = 88, // "*"
DIV = 89, // "/" DIV = 89, // "/"
MOD = 90, // "%" MOD = 90, // "%"
PATH = 91, // "path" FIELD = 91, // "field"
IDENTIFIER = 92, // "identifier" PATH = 92, // "path"
STRING = 93, // "string literal" IDENTIFIER = 93, // "identifier"
ISTRING = 94, // "localized string" STRING = 94, // "string literal"
COLOR = 95, // "color" ISTRING = 95, // "localized string"
FLOAT = 96, // "float" COLOR = 96, // "color"
INTEGER = 97, // "integer" FLOAT = 97, // "float"
ADD_ARRAY = 98, // ADD_ARRAY INTEGER = 98, // "integer"
THEN = 99, // THEN ADD_ARRAY = 99, // ADD_ARRAY
TERN = 100, // TERN THEN = 100, // THEN
NEG = 101, // NEG TERN = 101, // TERN
ANIMREF = 102, // ANIMREF NEG = 102, // NEG
PREINC = 103, // PREINC ANIMREF = 103, // ANIMREF
PREDEC = 104, // PREDEC PREINC = 104, // PREINC
POSTINC = 105, // POSTINC PREDEC = 105, // PREDEC
POSTDEC = 106 // POSTDEC POSTINC = 106, // POSTINC
POSTDEC = 107 // POSTDEC
}; };
/// Backward compatibility alias (Bison 3.6). /// Backward compatibility alias (Bison 3.6).
typedef token_kind_type yytokentype; typedef token_kind_type yytokentype;
@ -802,7 +804,7 @@ namespace xsk { namespace gsc { namespace iw8 {
{ {
enum symbol_kind_type enum symbol_kind_type
{ {
YYNTOKENS = 107, ///< Number of tokens. YYNTOKENS = 108, ///< Number of tokens.
S_YYEMPTY = -2, S_YYEMPTY = -2,
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
@ -895,105 +897,106 @@ namespace xsk { namespace gsc { namespace iw8 {
S_MUL = 88, // "*" S_MUL = 88, // "*"
S_DIV = 89, // "/" S_DIV = 89, // "/"
S_MOD = 90, // "%" S_MOD = 90, // "%"
S_PATH = 91, // "path" S_FIELD = 91, // "field"
S_IDENTIFIER = 92, // "identifier" S_PATH = 92, // "path"
S_STRING = 93, // "string literal" S_IDENTIFIER = 93, // "identifier"
S_ISTRING = 94, // "localized string" S_STRING = 94, // "string literal"
S_COLOR = 95, // "color" S_ISTRING = 95, // "localized string"
S_FLOAT = 96, // "float" S_COLOR = 96, // "color"
S_INTEGER = 97, // "integer" S_FLOAT = 97, // "float"
S_ADD_ARRAY = 98, // ADD_ARRAY S_INTEGER = 98, // "integer"
S_THEN = 99, // THEN S_ADD_ARRAY = 99, // ADD_ARRAY
S_TERN = 100, // TERN S_THEN = 100, // THEN
S_NEG = 101, // NEG S_TERN = 101, // TERN
S_ANIMREF = 102, // ANIMREF S_NEG = 102, // NEG
S_PREINC = 103, // PREINC S_ANIMREF = 103, // ANIMREF
S_PREDEC = 104, // PREDEC S_PREINC = 104, // PREINC
S_POSTINC = 105, // POSTINC S_PREDEC = 105, // PREDEC
S_POSTDEC = 106, // POSTDEC S_POSTINC = 106, // POSTINC
S_YYACCEPT = 107, // $accept S_POSTDEC = 107, // POSTDEC
S_root = 108, // root S_YYACCEPT = 108, // $accept
S_program = 109, // program S_root = 109, // root
S_inline = 110, // inline S_program = 110, // program
S_include = 111, // include S_inline = 111, // inline
S_declaration = 112, // declaration S_include = 112, // include
S_decl_usingtree = 113, // decl_usingtree S_declaration = 113, // declaration
S_decl_constant = 114, // decl_constant S_decl_usingtree = 114, // decl_usingtree
S_decl_thread = 115, // decl_thread S_decl_constant = 115, // decl_constant
S_stmt = 116, // stmt S_decl_thread = 116, // decl_thread
S_stmt_dev = 117, // stmt_dev S_stmt = 117, // stmt
S_stmt_block = 118, // stmt_block S_stmt_dev = 118, // stmt_dev
S_stmt_list = 119, // stmt_list S_stmt_block = 119, // stmt_block
S_stmt_expr = 120, // stmt_expr S_stmt_list = 120, // stmt_list
S_stmt_call = 121, // stmt_call S_stmt_expr = 121, // stmt_expr
S_stmt_assign = 122, // stmt_assign S_stmt_call = 122, // stmt_call
S_stmt_endon = 123, // stmt_endon S_stmt_assign = 123, // stmt_assign
S_stmt_notify = 124, // stmt_notify S_stmt_endon = 124, // stmt_endon
S_stmt_wait = 125, // stmt_wait S_stmt_notify = 125, // stmt_notify
S_stmt_waittill = 126, // stmt_waittill S_stmt_wait = 126, // stmt_wait
S_stmt_waittillmatch = 127, // stmt_waittillmatch S_stmt_waittill = 127, // stmt_waittill
S_stmt_waittillframeend = 128, // stmt_waittillframeend S_stmt_waittillmatch = 128, // stmt_waittillmatch
S_stmt_waitframe = 129, // stmt_waitframe S_stmt_waittillframeend = 129, // stmt_waittillframeend
S_stmt_if = 130, // stmt_if S_stmt_waitframe = 130, // stmt_waitframe
S_stmt_ifelse = 131, // stmt_ifelse S_stmt_if = 131, // stmt_if
S_stmt_while = 132, // stmt_while S_stmt_ifelse = 132, // stmt_ifelse
S_stmt_dowhile = 133, // stmt_dowhile S_stmt_while = 133, // stmt_while
S_stmt_for = 134, // stmt_for S_stmt_dowhile = 134, // stmt_dowhile
S_stmt_foreach = 135, // stmt_foreach S_stmt_for = 135, // stmt_for
S_stmt_switch = 136, // stmt_switch S_stmt_foreach = 136, // stmt_foreach
S_stmt_case = 137, // stmt_case S_stmt_switch = 137, // stmt_switch
S_stmt_default = 138, // stmt_default S_stmt_case = 138, // stmt_case
S_stmt_break = 139, // stmt_break S_stmt_default = 139, // stmt_default
S_stmt_continue = 140, // stmt_continue S_stmt_break = 140, // stmt_break
S_stmt_return = 141, // stmt_return S_stmt_continue = 141, // stmt_continue
S_stmt_breakpoint = 142, // stmt_breakpoint S_stmt_return = 142, // stmt_return
S_stmt_prof_begin = 143, // stmt_prof_begin S_stmt_breakpoint = 143, // stmt_breakpoint
S_stmt_prof_end = 144, // stmt_prof_end S_stmt_prof_begin = 144, // stmt_prof_begin
S_expr = 145, // expr S_stmt_prof_end = 145, // stmt_prof_end
S_expr_or_empty = 146, // expr_or_empty S_expr = 146, // expr
S_expr_assign = 147, // expr_assign S_expr_or_empty = 147, // expr_or_empty
S_expr_increment = 148, // expr_increment S_expr_assign = 148, // expr_assign
S_expr_decrement = 149, // expr_decrement S_expr_increment = 149, // expr_increment
S_expr_ternary = 150, // expr_ternary S_expr_decrement = 150, // expr_decrement
S_expr_binary = 151, // expr_binary S_expr_ternary = 151, // expr_ternary
S_expr_primitive = 152, // expr_primitive S_expr_binary = 152, // expr_binary
S_expr_complement = 153, // expr_complement S_expr_primitive = 153, // expr_primitive
S_expr_not = 154, // expr_not S_expr_complement = 154, // expr_complement
S_expr_call = 155, // expr_call S_expr_not = 155, // expr_not
S_expr_method = 156, // expr_method S_expr_call = 156, // expr_call
S_expr_function = 157, // expr_function S_expr_method = 157, // expr_method
S_expr_pointer = 158, // expr_pointer S_expr_function = 158, // expr_function
S_expr_add_array = 159, // expr_add_array S_expr_pointer = 159, // expr_pointer
S_expr_parameters = 160, // expr_parameters S_expr_add_array = 160, // expr_add_array
S_expr_arguments = 161, // expr_arguments S_expr_parameters = 161, // expr_parameters
S_expr_arguments_no_empty = 162, // expr_arguments_no_empty S_expr_arguments = 162, // expr_arguments
S_expr_isdefined = 163, // expr_isdefined S_expr_arguments_no_empty = 163, // expr_arguments_no_empty
S_expr_istrue = 164, // expr_istrue S_expr_isdefined = 164, // expr_isdefined
S_expr_reference = 165, // expr_reference S_expr_istrue = 165, // expr_istrue
S_expr_array = 166, // expr_array S_expr_reference = 166, // expr_reference
S_expr_field = 167, // expr_field S_expr_array = 167, // expr_array
S_expr_size = 168, // expr_size S_expr_field = 168, // expr_field
S_expr_paren = 169, // expr_paren S_expr_size = 169, // expr_size
S_expr_object = 170, // expr_object S_expr_paren = 170, // expr_paren
S_expr_thisthread = 171, // expr_thisthread S_expr_object = 171, // expr_object
S_expr_empty_array = 172, // expr_empty_array S_expr_thisthread = 172, // expr_thisthread
S_expr_undefined = 173, // expr_undefined S_expr_empty_array = 173, // expr_empty_array
S_expr_game = 174, // expr_game S_expr_undefined = 174, // expr_undefined
S_expr_self = 175, // expr_self S_expr_game = 175, // expr_game
S_expr_anim = 176, // expr_anim S_expr_self = 176, // expr_self
S_expr_level = 177, // expr_level S_expr_anim = 177, // expr_anim
S_expr_animation = 178, // expr_animation S_expr_level = 178, // expr_level
S_expr_animtree = 179, // expr_animtree S_expr_animation = 179, // expr_animation
S_expr_identifier = 180, // expr_identifier S_expr_animtree = 180, // expr_animtree
S_expr_path = 181, // expr_path S_expr_identifier = 181, // expr_identifier
S_expr_istring = 182, // expr_istring S_expr_path = 182, // expr_path
S_expr_string = 183, // expr_string S_expr_istring = 183, // expr_istring
S_expr_color = 184, // expr_color S_expr_string = 184, // expr_string
S_expr_vector = 185, // expr_vector S_expr_color = 185, // expr_color
S_expr_float = 186, // expr_float S_expr_vector = 186, // expr_vector
S_expr_integer = 187, // expr_integer S_expr_float = 187, // expr_float
S_expr_false = 188, // expr_false S_expr_integer = 188, // expr_integer
S_expr_true = 189 // expr_true S_expr_false = 189, // expr_false
S_expr_true = 190 // expr_true
}; };
}; };
@ -1314,6 +1317,7 @@ namespace xsk { namespace gsc { namespace iw8 {
value.move< ast::stmt_while::ptr > (std::move (that.value)); value.move< ast::stmt_while::ptr > (std::move (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2619,6 +2623,7 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > (); value.template destroy< ast::stmt_while::ptr > ();
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2735,7 +2740,7 @@ switch (yykind)
: super_type(token_type (tok), v, l) : super_type(token_type (tok), v, l)
#endif #endif
{ {
IW8_ASSERT ((token::PATH <= tok && tok <= token::INTEGER)); IW8_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
} }
}; };
@ -4150,6 +4155,21 @@ switch (yykind)
return symbol_type (token::MOD, l); return symbol_type (token::MOD, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
@ -4734,7 +4754,7 @@ switch (yykind)
/// Constants. /// Constants.
enum enum
{ {
yylast_ = 2336, ///< Last index in yytable_. yylast_ = 2425, ///< Last index in yytable_.
yynnts_ = 83, ///< Number of nonterminal symbols. yynnts_ = 83, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number. yyfinal_ = 21 ///< Termination state number.
}; };
@ -5046,6 +5066,7 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value)); value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5369,6 +5390,7 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value)); value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5442,7 +5464,7 @@ switch (yykind)
#line 13 "parser.ypp" #line 13 "parser.ypp"
} } } // xsk::gsc::iw8 } } } // xsk::gsc::iw8
#line 5446 "parser.hpp" #line 5468 "parser.hpp"

View File

@ -68,9 +68,9 @@ enum class keyword
KW_INVALID, KW_INVALID,
}; };
buffer::buffer() : size(1024), length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(size)); data = static_cast<char*>(std::malloc(max_buf_size));
} }
buffer::~buffer() buffer::~buffer()
@ -80,18 +80,9 @@ buffer::~buffer()
bool buffer::push(char c) bool buffer::push(char c)
{ {
if(length >= size) if(length >= max_buf_size)
{ return false;
auto nsize = size * 2;
auto ndata = reinterpret_cast<char*>(std::malloc(nsize));
if(!ndata) return false;
std::memmove(ndata, data, size);
std::free(data);
size = nsize;
data = ndata;
}
data[length++] = c; data[length++] = c;
return true; return true;
} }
@ -139,7 +130,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : in_dev_state_(false), loc_(xsk::gsc::location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(xsk::gsc::location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -185,24 +176,28 @@ void lexer::restrict_header(const xsk::gsc::location& loc)
auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type
{ {
buffer_.length = 0; buffer_.length = 0;
state_ = state::start;
loc_.step(); loc_.step();
while (true) while (true)
{ {
if (reader_.state == reader::end) const auto& state = reader_.state;
{ auto& last = reader_.last_byte;
if (in_dev_state_) auto& curr = reader_.current_byte;
throw s1::parser::syntax_error(loc_, "unmatched devblock start ('/#')"); auto path = false;
if(header_top_ > 0) if (state == reader::end)
{
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (header_top_ > 0)
pop_header(); pop_header();
else else
return s1::parser::make_S1EOF(loc_); return s1::parser::make_S1EOF(loc_);
} }
reader_.advance(); reader_.advance();
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
switch (last) switch (last)
{ {
@ -216,7 +211,7 @@ auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type
loc_.step(); loc_.step();
continue; continue;
case '/': case '/':
if(reader_.state == reader::end || (curr != '/' && curr != '*' && curr != '#' && curr != '=')) if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return s1::parser::make_DIV(loc_); return s1::parser::make_DIV(loc_);
reader_.advance(); reader_.advance();
@ -226,29 +221,27 @@ auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type
if (last == '#') if (last == '#')
{ {
if (in_dev_state_) if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
if (mode_ == xsk::gsc::build::dev)
{ {
throw s1::parser::syntax_error(loc_, "cannot recurse devblock ('/#')"); indev_ = true;
}
else if (mode_ == xsk::gsc::build::dev)
{
in_dev_state_ = true;
return s1::parser::make_DEVBEGIN(loc_); return s1::parser::make_DEVBEGIN(loc_);
} }
else else
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched devblock start ('/#')");
throw s1::parser::syntax_error(loc_, "unmatched devblock start ('/#')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '#' && curr == '/') else if (last == '#' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -262,16 +255,15 @@ auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched multiline comment start ('/*')");
throw s1::parser::syntax_error(loc_, "unmatched multiline comment start ('/*')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '*' && curr == '/') else if (last == '*' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -284,7 +276,7 @@ auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end || curr == '\n') if (state == reader::end || curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -294,69 +286,43 @@ auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type
case '#': case '#':
if (curr == '/') if (curr == '/')
{ {
if (!in_dev_state_) if (!indev_)
throw s1::parser::syntax_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
in_dev_state_ = false; indev_ = false;
reader_.advance(); reader_.advance();
return s1::parser::make_DEVEND(loc_); return s1::parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
while (reader_.state == reader::ok)
{
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
}
{ if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
if (auto len = buffer_.length; len == 4 || len == 7) throw comp_error(loc_, "unterminated preprocessor directive ('#')");
{
auto data = buffer_.data;
auto color = true;
for (auto i = 1; i < len; i++)
{
if ((data[i] < 48 || data[i] > 57) && (data[i] < 65 || data[i] > 70) && (data[i] < 97 || data[i] > 102))
{
color = false;
break;
}
}
if (color) return s1::parser::make_COLOR(std::string(++data, --len), loc_); state_ = state::preprocessor;
} goto lex_name;
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key == keyword::KW_INVALID)
throw s1::parser::syntax_error(loc_, utils::string::va("unknown preprocessor directive ('%s')", "#"));
if (keyword_is_token(key))
return keyword_token(key);
// call preprocessor(key);
}
continue;
case '*': case '*':
if (reader_.state == reader::end || (curr != '/' && curr != '=')) if (curr != '/' && curr != '=')
return s1::parser::make_MUL(loc_); return s1::parser::make_MUL(loc_);
reader_.advance(); reader_.advance();
if (curr == '/') if (last == '=')
throw s1::parser::syntax_error(loc_, "unmatched multiline comment end ('*/')");
return s1::parser::make_ASSIGN_MUL(loc_); return s1::parser::make_ASSIGN_MUL(loc_);
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
case '"': case '"':
case '\'': state_ = state::string;
return read_string(last, false); goto lex_string;
case '.': case '.':
return read_dotsize(); reader_.advance();
if(state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
case '(': case '(':
return s1::parser::make_LPAREN(loc_); return s1::parser::make_LPAREN(loc_);
case ')': case ')':
@ -376,6 +342,7 @@ auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type
case ':': case ':':
if (curr != ':') if (curr != ':')
return s1::parser::make_COLON(loc_); return s1::parser::make_COLON(loc_);
reader_.advance(); reader_.advance();
return s1::parser::make_DOUBLECOLON(loc_); return s1::parser::make_DOUBLECOLON(loc_);
case '?': case '?':
@ -383,43 +350,70 @@ auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type
case '=': case '=':
if (curr != '=') if (curr != '=')
return s1::parser::make_ASSIGN(loc_); return s1::parser::make_ASSIGN(loc_);
reader_.advance(); reader_.advance();
return s1::parser::make_EQUALITY(loc_); return s1::parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return s1::parser::make_ADD(loc_); return s1::parser::make_ADD(loc_);
reader_.advance(); reader_.advance();
return (last == '+') ? s1::parser::make_INCREMENT(loc_) : s1::parser::make_ASSIGN_ADD(loc_);
if (last == '+')
return s1::parser::make_INCREMENT(loc_);
return s1::parser::make_ASSIGN_ADD(loc_);
case '-': case '-':
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return s1::parser::make_SUB(loc_); return s1::parser::make_SUB(loc_);
reader_.advance(); reader_.advance();
return (last == '-') ? s1::parser::make_DECREMENT(loc_) : s1::parser::make_ASSIGN_SUB(loc_);
if (last == '-')
return s1::parser::make_DECREMENT(loc_);
return s1::parser::make_ASSIGN_SUB(loc_);
case '%': case '%':
if (curr != '=') if (curr != '=')
return s1::parser::make_MOD(loc_); return s1::parser::make_MOD(loc_);
reader_.advance(); reader_.advance();
return s1::parser::make_ASSIGN_MOD(loc_); return s1::parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return s1::parser::make_BITWISE_OR(loc_); return s1::parser::make_BITWISE_OR(loc_);
reader_.advance(); reader_.advance();
return (last == '|') ? s1::parser::make_OR(loc_) : s1::parser::make_ASSIGN_BW_OR(loc_);
if (last == '|')
return s1::parser::make_OR(loc_);
return s1::parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"' && curr != '\'')
return s1::parser::make_BITWISE_AND(loc_); return s1::parser::make_BITWISE_AND(loc_);
reader_.advance(); reader_.advance();
if (last == '"' || last == '\'')
return read_string(last, true); if (last == '&')
return (last == '&') ? s1::parser::make_AND(loc_) : s1::parser::make_ASSIGN_BW_AND(loc_); return s1::parser::make_AND(loc_);
if (last == '=')
return s1::parser::make_ASSIGN_BW_AND(loc_);
state_ = state::localize;
goto lex_string;
case '^': case '^':
if (curr != '=') if (curr != '=')
return s1::parser::make_BITWISE_EXOR(loc_); return s1::parser::make_BITWISE_EXOR(loc_);
reader_.advance(); reader_.advance();
return s1::parser::make_ASSIGN_BW_EXOR(loc_); return s1::parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return s1::parser::make_NOT(loc_); return s1::parser::make_NOT(loc_);
reader_.advance(); reader_.advance();
return s1::parser::make_INEQUALITY(loc_); return s1::parser::make_INEQUALITY(loc_);
case '~': case '~':
@ -427,88 +421,180 @@ auto lexer::lex() -> xsk::gsc::s1::parser::symbol_type
case '<': case '<':
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return s1::parser::make_LESS(loc_); return s1::parser::make_LESS(loc_);
reader_.advance(); reader_.advance();
if (last == '<') if (last == '=')
{
reader_.advance();
return (last == '=') ? s1::parser::make_ASSIGN_LSHIFT(loc_) : s1::parser::make_LSHIFT(loc_);
}
return s1::parser::make_LESS_EQUAL(loc_); return s1::parser::make_LESS_EQUAL(loc_);
if (curr != '=')
return s1::parser::make_LSHIFT(loc_);
reader_.advance();
return s1::parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return s1::parser::make_GREATER(loc_); return s1::parser::make_GREATER(loc_);
reader_.advance(); reader_.advance();
if (last == '>')
{ if (last == '=')
reader_.advance();
return (last == '=') ? s1::parser::make_ASSIGN_RSHIFT(loc_) : s1::parser::make_RSHIFT(loc_);
}
return s1::parser::make_GREATER_EQUAL(loc_); return s1::parser::make_GREATER_EQUAL(loc_);
if (curr != '=')
return s1::parser::make_RSHIFT(loc_);
reader_.advance();
return s1::parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number:
if (last >= '0' && last <= '9') if (last >= '0' && last <= '9')
return lexer::read_number(last); goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z') else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
return lexer::read_word(last); goto lex_name;
throw s1::parser::syntax_error(loc_, utils::string::va("bad token: \'%c\'", last)); throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
} }
}
}
auto lexer::read_string(char quote, bool localize) -> xsk::gsc::s1::parser::symbol_type lex_string:
{ if (state == reader::end)
if (localize) throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
reader_.advance(); reader_.advance();
if (last == '\n') while (true)
throw s1::parser::syntax_error(loc_, "unterminated string");
if (last == '\\') // process scapes
{ {
// TODO: if (last == '"')
}
if (last != '\\' && curr == quote)
break; break;
if (!buffer_.push(curr)) if (last == '\n')
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "unterminated string literal");
}
if (reader_.state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\')
{ {
throw s1::parser::syntax_error(loc_, utils::string::va("unmatched string start ('%s')", (quote == '"') ? "\"" : "\\'")); char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
} }
if (localize) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
else if (!buffer_.push(last))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if (state_ == state::localize)
return s1::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_); return s1::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
return s1::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_); return s1::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
}
auto lexer::read_number(char first) -> xsk::gsc::s1::parser::symbol_type lex_name:
{ buffer_.push(last);
if (first == '.')
{
buffer_.push(first);
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (last == '\\')
throw comp_error(loc_, "invalid path '\\\\'");
path = true;
if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
}
else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if(state_ == state::field)
{
if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return s1::parser::make_SIZE(loc_);
}
return s1::parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
// TODO: call preprocessor(key);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start;
continue;
}
else
{
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw comp_error(loc_, "invalid path end '\\'");
//return s1::parser::make_PATH(xsk::gsc::s1::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return s1::parser::make_PATH(std::string(buffer_.data, buffer_.length), loc_);
}
//return s1::parser::make_IDENTIFIER(xsk::gsc::s1::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return s1::parser::make_IDENTIFIER(std::string(buffer_.data, buffer_.length), loc_);
}
lex_number:
if (state_ == state::field)
buffer_.push('.');
if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto flt = 0;
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.')) if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw s1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if ((curr == '.' || curr == 'f') && last == '\'') if ((curr == '.' || curr == 'f') && last == '\'')
throw s1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if (curr == '\'') if (curr == '\'')
{ {
@ -516,55 +602,41 @@ auto lexer::read_number(char first) -> xsk::gsc::s1::parser::symbol_type
continue; continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58))) if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'')
throw s1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
auto data = buffer_.data; if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
auto len = buffer_.length; throw comp_error(loc_, "invalid number literal");
auto dot = true;
for (auto i = 1; i < len; i++) if (state_ == state::field || dot || flt)
{ return s1::parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
if (data[i] == '.')
{ return s1::parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
if (dot)
throw s1::parser::syntax_error(loc_, "invalid number '.'");
} }
if (data[i] == 'f' && i != len - 1) else if (curr == 'o')
throw s1::parser::syntax_error(loc_, "invalid number 'f'");
}
return s1::parser::make_FLOAT(std::string(data, len), loc_);
}
else
{ {
auto curr = reader_.current_byte;
if (first == '0' && curr == 'o')
{
buffer_.push(first);
buffer_.push('o');
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'o')) if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw s1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (curr == 'o' && last == '\'')
throw s1::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -581,30 +653,24 @@ auto lexer::read_number(char first) -> xsk::gsc::s1::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length <= 0)
throw s1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (buffer_.length < 3) return s1::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
throw error("gsc lexer: invalid octal literal!");
return s1::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data + 2), loc_);
} }
else if (first == '0' && curr == 'b') else if (curr == 'b')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('b'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'b')) if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw s1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (curr == 'b' && last == '\'')
throw s1::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -616,35 +682,29 @@ auto lexer::read_number(char first) -> xsk::gsc::s1::parser::symbol_type
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw s1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid binary literal!");
return s1::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_); return s1::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
} }
else if (first == '0' && curr == 'x') else if (curr == 'x')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('x'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'x')) if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw s1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (curr == 'x' && last == '\'')
throw s1::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -661,161 +721,13 @@ auto lexer::read_number(char first) -> xsk::gsc::s1::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw s1::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid hexadecimal literal!");
return s1::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_); return s1::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
} }
else // cant get here!
{
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw s1::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw s1::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw s1::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = false;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw s1::parser::syntax_error(loc_, "invalid number '.'");
dot = true;
}
if (data[i] == 'f' && i != len - 1)
throw s1::parser::syntax_error(loc_, "invalid number 'f'");
}
if (dot || data[len - 1] == 'f')
return s1::parser::make_FLOAT(std::string(data, len), loc_);
return s1::parser::make_INTEGER(std::string(data, len), loc_);
}
}
}
auto lexer::read_word(char first) -> xsk::gsc::s1::parser::symbol_type
{
auto path = false;
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto curr = reader_.current_byte;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (reader_.last_byte == '\\')
throw s1::parser::syntax_error(loc_, "invalid path '\\\\'");
path = true;
curr = '/';
}
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw s1::parser::syntax_error(loc_, "invalid path end '\\'");
return s1::parser::make_PATH(xsk::gsc::s1::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
return s1::parser::make_IDENTIFIER(xsk::gsc::s1::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
auto lexer::read_dotsize() -> xsk::gsc::s1::parser::symbol_type
{
auto curr = reader_.current_byte;
if (curr > 47 && curr < 58)
{
return lexer::read_number('.');
}
else if (curr == '_' || curr > 64 && curr < 91 || curr > 96 && curr < 123)
{
reader save;
save.state = reader_.state;
save.bytes_remaining = reader_.bytes_remaining;
save.buffer_pos = reader_.buffer_pos;
save.last_byte = reader_.last_byte;
save.current_byte = reader_.current_byte;
while (reader_.state == reader::ok)
{
curr = reader_.current_byte;
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return s1::parser::make_SIZE(loc_);
}
reader_.state = save.state;
reader_.bytes_remaining = save.bytes_remaining;
reader_.buffer_pos = save.buffer_pos;
reader_.last_byte = save.last_byte;
reader_.current_byte = save.current_byte;
return s1::parser::make_DOT(loc_);
}
else return s1::parser::make_DOT(loc_);
} }
auto lexer::keyword_token(keyword k) -> xsk::gsc::s1::parser::symbol_type auto lexer::keyword_token(keyword k) -> xsk::gsc::s1::parser::symbol_type

View File

@ -12,11 +12,12 @@ namespace xsk::gsc::s1
enum class keyword; enum class keyword;
constexpr size_t max_buf_size = 0x2000;
struct buffer struct buffer
{ {
int size;
int length;
char* data; char* data;
int length;
buffer(); buffer();
~buffer(); ~buffer();
@ -25,13 +26,13 @@ struct buffer
struct reader struct reader
{ {
enum states { end, ok }; enum state_type : std::uint8_t { end, ok };
states state;
int bytes_remaining;
const char* buffer_pos; const char* buffer_pos;
std::uint32_t bytes_remaining;
char last_byte; char last_byte;
char current_byte; char current_byte;
state_type state;
reader(); reader();
@ -47,15 +48,17 @@ struct reader
class lexer class lexer
{ {
private: enum class state : std::uint8_t { start, string, localize, field, preprocessor };
reader reader_; reader reader_;
buffer buffer_; buffer buffer_;
location loc_; location loc_;
build mode_; build mode_;
bool in_dev_state_;
std::stack<location> locs_; std::stack<location> locs_;
std::stack<reader> readers_; std::stack<reader> readers_;
std::uint32_t header_top_; std::uint32_t header_top_;
state state_;
bool indev_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -65,10 +68,6 @@ public:
void restrict_header(const xsk::gsc::location& loc); void restrict_header(const xsk::gsc::location& loc);
private: private:
auto read_string(char quote, bool localize) -> xsk::gsc::s1::parser::symbol_type;
auto read_number(char first) -> xsk::gsc::s1::parser::symbol_type;
auto read_word(char first) -> xsk::gsc::s1::parser::symbol_type;
auto read_dotsize() -> xsk::gsc::s1::parser::symbol_type;
auto keyword_token(keyword k) -> xsk::gsc::s1::parser::symbol_type; auto keyword_token(keyword k) -> xsk::gsc::s1::parser::symbol_type;
static auto keyword_is_token(keyword k) -> bool; static auto keyword_is_token(keyword k) -> bool;
static auto get_keyword(std::string_view str) -> keyword; static auto get_keyword(std::string_view str) -> keyword;

File diff suppressed because it is too large Load Diff

View File

@ -617,6 +617,7 @@ namespace xsk { namespace gsc { namespace s1 {
// stmt_while // stmt_while
char dummy66[sizeof (ast::stmt_while::ptr)]; char dummy66[sizeof (ast::stmt_while::ptr)];
// "field"
// "path" // "path"
// "identifier" // "identifier"
// "string literal" // "string literal"
@ -762,22 +763,23 @@ namespace xsk { namespace gsc { namespace s1 {
MUL = 86, // "*" MUL = 86, // "*"
DIV = 87, // "/" DIV = 87, // "/"
MOD = 88, // "%" MOD = 88, // "%"
PATH = 89, // "path" FIELD = 89, // "field"
IDENTIFIER = 90, // "identifier" PATH = 90, // "path"
STRING = 91, // "string literal" IDENTIFIER = 91, // "identifier"
ISTRING = 92, // "localized string" STRING = 92, // "string literal"
COLOR = 93, // "color" ISTRING = 93, // "localized string"
FLOAT = 94, // "float" COLOR = 94, // "color"
INTEGER = 95, // "integer" FLOAT = 95, // "float"
ADD_ARRAY = 96, // ADD_ARRAY INTEGER = 96, // "integer"
THEN = 97, // THEN ADD_ARRAY = 97, // ADD_ARRAY
TERN = 98, // TERN THEN = 98, // THEN
NEG = 99, // NEG TERN = 99, // TERN
ANIMREF = 100, // ANIMREF NEG = 100, // NEG
PREINC = 101, // PREINC ANIMREF = 101, // ANIMREF
PREDEC = 102, // PREDEC PREINC = 102, // PREINC
POSTINC = 103, // POSTINC PREDEC = 103, // PREDEC
POSTDEC = 104 // POSTDEC POSTINC = 104, // POSTINC
POSTDEC = 105 // POSTDEC
}; };
/// Backward compatibility alias (Bison 3.6). /// Backward compatibility alias (Bison 3.6).
typedef token_kind_type yytokentype; typedef token_kind_type yytokentype;
@ -794,7 +796,7 @@ namespace xsk { namespace gsc { namespace s1 {
{ {
enum symbol_kind_type enum symbol_kind_type
{ {
YYNTOKENS = 105, ///< Number of tokens. YYNTOKENS = 106, ///< Number of tokens.
S_YYEMPTY = -2, S_YYEMPTY = -2,
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
@ -885,103 +887,104 @@ namespace xsk { namespace gsc { namespace s1 {
S_MUL = 86, // "*" S_MUL = 86, // "*"
S_DIV = 87, // "/" S_DIV = 87, // "/"
S_MOD = 88, // "%" S_MOD = 88, // "%"
S_PATH = 89, // "path" S_FIELD = 89, // "field"
S_IDENTIFIER = 90, // "identifier" S_PATH = 90, // "path"
S_STRING = 91, // "string literal" S_IDENTIFIER = 91, // "identifier"
S_ISTRING = 92, // "localized string" S_STRING = 92, // "string literal"
S_COLOR = 93, // "color" S_ISTRING = 93, // "localized string"
S_FLOAT = 94, // "float" S_COLOR = 94, // "color"
S_INTEGER = 95, // "integer" S_FLOAT = 95, // "float"
S_ADD_ARRAY = 96, // ADD_ARRAY S_INTEGER = 96, // "integer"
S_THEN = 97, // THEN S_ADD_ARRAY = 97, // ADD_ARRAY
S_TERN = 98, // TERN S_THEN = 98, // THEN
S_NEG = 99, // NEG S_TERN = 99, // TERN
S_ANIMREF = 100, // ANIMREF S_NEG = 100, // NEG
S_PREINC = 101, // PREINC S_ANIMREF = 101, // ANIMREF
S_PREDEC = 102, // PREDEC S_PREINC = 102, // PREINC
S_POSTINC = 103, // POSTINC S_PREDEC = 103, // PREDEC
S_POSTDEC = 104, // POSTDEC S_POSTINC = 104, // POSTINC
S_YYACCEPT = 105, // $accept S_POSTDEC = 105, // POSTDEC
S_root = 106, // root S_YYACCEPT = 106, // $accept
S_program = 107, // program S_root = 107, // root
S_inline = 108, // inline S_program = 108, // program
S_include = 109, // include S_inline = 109, // inline
S_declaration = 110, // declaration S_include = 110, // include
S_decl_usingtree = 111, // decl_usingtree S_declaration = 111, // declaration
S_decl_constant = 112, // decl_constant S_decl_usingtree = 112, // decl_usingtree
S_decl_thread = 113, // decl_thread S_decl_constant = 113, // decl_constant
S_stmt = 114, // stmt S_decl_thread = 114, // decl_thread
S_stmt_dev = 115, // stmt_dev S_stmt = 115, // stmt
S_stmt_block = 116, // stmt_block S_stmt_dev = 116, // stmt_dev
S_stmt_list = 117, // stmt_list S_stmt_block = 117, // stmt_block
S_stmt_expr = 118, // stmt_expr S_stmt_list = 118, // stmt_list
S_stmt_call = 119, // stmt_call S_stmt_expr = 119, // stmt_expr
S_stmt_assign = 120, // stmt_assign S_stmt_call = 120, // stmt_call
S_stmt_endon = 121, // stmt_endon S_stmt_assign = 121, // stmt_assign
S_stmt_notify = 122, // stmt_notify S_stmt_endon = 122, // stmt_endon
S_stmt_wait = 123, // stmt_wait S_stmt_notify = 123, // stmt_notify
S_stmt_waittill = 124, // stmt_waittill S_stmt_wait = 124, // stmt_wait
S_stmt_waittillmatch = 125, // stmt_waittillmatch S_stmt_waittill = 125, // stmt_waittill
S_stmt_waittillframeend = 126, // stmt_waittillframeend S_stmt_waittillmatch = 126, // stmt_waittillmatch
S_stmt_waitframe = 127, // stmt_waitframe S_stmt_waittillframeend = 127, // stmt_waittillframeend
S_stmt_if = 128, // stmt_if S_stmt_waitframe = 128, // stmt_waitframe
S_stmt_ifelse = 129, // stmt_ifelse S_stmt_if = 129, // stmt_if
S_stmt_while = 130, // stmt_while S_stmt_ifelse = 130, // stmt_ifelse
S_stmt_dowhile = 131, // stmt_dowhile S_stmt_while = 131, // stmt_while
S_stmt_for = 132, // stmt_for S_stmt_dowhile = 132, // stmt_dowhile
S_stmt_foreach = 133, // stmt_foreach S_stmt_for = 133, // stmt_for
S_stmt_switch = 134, // stmt_switch S_stmt_foreach = 134, // stmt_foreach
S_stmt_case = 135, // stmt_case S_stmt_switch = 135, // stmt_switch
S_stmt_default = 136, // stmt_default S_stmt_case = 136, // stmt_case
S_stmt_break = 137, // stmt_break S_stmt_default = 137, // stmt_default
S_stmt_continue = 138, // stmt_continue S_stmt_break = 138, // stmt_break
S_stmt_return = 139, // stmt_return S_stmt_continue = 139, // stmt_continue
S_stmt_breakpoint = 140, // stmt_breakpoint S_stmt_return = 140, // stmt_return
S_stmt_prof_begin = 141, // stmt_prof_begin S_stmt_breakpoint = 141, // stmt_breakpoint
S_stmt_prof_end = 142, // stmt_prof_end S_stmt_prof_begin = 142, // stmt_prof_begin
S_expr = 143, // expr S_stmt_prof_end = 143, // stmt_prof_end
S_expr_or_empty = 144, // expr_or_empty S_expr = 144, // expr
S_expr_assign = 145, // expr_assign S_expr_or_empty = 145, // expr_or_empty
S_expr_increment = 146, // expr_increment S_expr_assign = 146, // expr_assign
S_expr_decrement = 147, // expr_decrement S_expr_increment = 147, // expr_increment
S_expr_ternary = 148, // expr_ternary S_expr_decrement = 148, // expr_decrement
S_expr_binary = 149, // expr_binary S_expr_ternary = 149, // expr_ternary
S_expr_primitive = 150, // expr_primitive S_expr_binary = 150, // expr_binary
S_expr_complement = 151, // expr_complement S_expr_primitive = 151, // expr_primitive
S_expr_not = 152, // expr_not S_expr_complement = 152, // expr_complement
S_expr_call = 153, // expr_call S_expr_not = 153, // expr_not
S_expr_method = 154, // expr_method S_expr_call = 154, // expr_call
S_expr_function = 155, // expr_function S_expr_method = 155, // expr_method
S_expr_pointer = 156, // expr_pointer S_expr_function = 156, // expr_function
S_expr_add_array = 157, // expr_add_array S_expr_pointer = 157, // expr_pointer
S_expr_parameters = 158, // expr_parameters S_expr_add_array = 158, // expr_add_array
S_expr_arguments = 159, // expr_arguments S_expr_parameters = 159, // expr_parameters
S_expr_arguments_no_empty = 160, // expr_arguments_no_empty S_expr_arguments = 160, // expr_arguments
S_expr_reference = 161, // expr_reference S_expr_arguments_no_empty = 161, // expr_arguments_no_empty
S_expr_array = 162, // expr_array S_expr_reference = 162, // expr_reference
S_expr_field = 163, // expr_field S_expr_array = 163, // expr_array
S_expr_size = 164, // expr_size S_expr_field = 164, // expr_field
S_expr_paren = 165, // expr_paren S_expr_size = 165, // expr_size
S_expr_object = 166, // expr_object S_expr_paren = 166, // expr_paren
S_expr_thisthread = 167, // expr_thisthread S_expr_object = 167, // expr_object
S_expr_empty_array = 168, // expr_empty_array S_expr_thisthread = 168, // expr_thisthread
S_expr_undefined = 169, // expr_undefined S_expr_empty_array = 169, // expr_empty_array
S_expr_game = 170, // expr_game S_expr_undefined = 170, // expr_undefined
S_expr_self = 171, // expr_self S_expr_game = 171, // expr_game
S_expr_anim = 172, // expr_anim S_expr_self = 172, // expr_self
S_expr_level = 173, // expr_level S_expr_anim = 173, // expr_anim
S_expr_animation = 174, // expr_animation S_expr_level = 174, // expr_level
S_expr_animtree = 175, // expr_animtree S_expr_animation = 175, // expr_animation
S_expr_identifier = 176, // expr_identifier S_expr_animtree = 176, // expr_animtree
S_expr_path = 177, // expr_path S_expr_identifier = 177, // expr_identifier
S_expr_istring = 178, // expr_istring S_expr_path = 178, // expr_path
S_expr_string = 179, // expr_string S_expr_istring = 179, // expr_istring
S_expr_color = 180, // expr_color S_expr_string = 180, // expr_string
S_expr_vector = 181, // expr_vector S_expr_color = 181, // expr_color
S_expr_float = 182, // expr_float S_expr_vector = 182, // expr_vector
S_expr_integer = 183, // expr_integer S_expr_float = 183, // expr_float
S_expr_false = 184, // expr_false S_expr_integer = 184, // expr_integer
S_expr_true = 185 // expr_true S_expr_false = 185, // expr_false
S_expr_true = 186 // expr_true
}; };
}; };
@ -1294,6 +1297,7 @@ namespace xsk { namespace gsc { namespace s1 {
value.move< ast::stmt_while::ptr > (std::move (that.value)); value.move< ast::stmt_while::ptr > (std::move (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2563,6 +2567,7 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > (); value.template destroy< ast::stmt_while::ptr > ();
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2679,7 +2684,7 @@ switch (yykind)
: super_type(token_type (tok), v, l) : super_type(token_type (tok), v, l)
#endif #endif
{ {
S1_ASSERT ((token::PATH <= tok && tok <= token::INTEGER)); S1_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
} }
}; };
@ -4064,6 +4069,21 @@ switch (yykind)
return symbol_type (token::MOD, l); return symbol_type (token::MOD, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
@ -4648,7 +4668,7 @@ switch (yykind)
/// Constants. /// Constants.
enum enum
{ {
yylast_ = 2191, ///< Last index in yytable_. yylast_ = 2291, ///< Last index in yytable_.
yynnts_ = 81, ///< Number of nonterminal symbols. yynnts_ = 81, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number. yyfinal_ = 21 ///< Termination state number.
}; };
@ -4952,6 +4972,7 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value)); value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5267,6 +5288,7 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value)); value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5340,7 +5362,7 @@ switch (yykind)
#line 13 "parser.ypp" #line 13 "parser.ypp"
} } } // xsk::gsc::s1 } } } // xsk::gsc::s1
#line 5344 "parser.hpp" #line 5366 "parser.hpp"

View File

@ -68,9 +68,9 @@ enum class keyword
KW_INVALID, KW_INVALID,
}; };
buffer::buffer() : size(1024), length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(size)); data = static_cast<char*>(std::malloc(max_buf_size));
} }
buffer::~buffer() buffer::~buffer()
@ -80,18 +80,9 @@ buffer::~buffer()
bool buffer::push(char c) bool buffer::push(char c)
{ {
if(length >= size) if(length >= max_buf_size)
{ return false;
auto nsize = size * 2;
auto ndata = reinterpret_cast<char*>(std::malloc(nsize));
if(!ndata) return false;
std::memmove(ndata, data, size);
std::free(data);
size = nsize;
data = ndata;
}
data[length++] = c; data[length++] = c;
return true; return true;
} }
@ -139,7 +130,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : in_dev_state_(false), loc_(xsk::gsc::location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(xsk::gsc::location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -185,24 +176,28 @@ void lexer::restrict_header(const xsk::gsc::location& loc)
auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type
{ {
buffer_.length = 0; buffer_.length = 0;
state_ = state::start;
loc_.step(); loc_.step();
while (true) while (true)
{ {
if (reader_.state == reader::end) const auto& state = reader_.state;
{ auto& last = reader_.last_byte;
if (in_dev_state_) auto& curr = reader_.current_byte;
throw s2::parser::syntax_error(loc_, "unmatched devblock start ('/#')"); auto path = false;
if(header_top_ > 0) if (state == reader::end)
{
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (header_top_ > 0)
pop_header(); pop_header();
else else
return s2::parser::make_S2EOF(loc_); return s2::parser::make_S2EOF(loc_);
} }
reader_.advance(); reader_.advance();
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
switch (last) switch (last)
{ {
@ -216,7 +211,7 @@ auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type
loc_.step(); loc_.step();
continue; continue;
case '/': case '/':
if(reader_.state == reader::end || (curr != '/' && curr != '*' && curr != '#' && curr != '=')) if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return s2::parser::make_DIV(loc_); return s2::parser::make_DIV(loc_);
reader_.advance(); reader_.advance();
@ -226,29 +221,27 @@ auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type
if (last == '#') if (last == '#')
{ {
if (in_dev_state_) if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
if (mode_ == xsk::gsc::build::dev)
{ {
throw s2::parser::syntax_error(loc_, "cannot recurse devblock ('/#')"); indev_ = true;
}
else if (mode_ == xsk::gsc::build::dev)
{
in_dev_state_ = true;
return s2::parser::make_DEVBEGIN(loc_); return s2::parser::make_DEVBEGIN(loc_);
} }
else else
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched devblock start ('/#')");
throw s2::parser::syntax_error(loc_, "unmatched devblock start ('/#')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '#' && curr == '/') else if (last == '#' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -262,16 +255,15 @@ auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched multiline comment start ('/*')");
throw s2::parser::syntax_error(loc_, "unmatched multiline comment start ('/*')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '*' && curr == '/') else if (last == '*' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -284,7 +276,7 @@ auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end || curr == '\n') if (state == reader::end || curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -294,69 +286,43 @@ auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type
case '#': case '#':
if (curr == '/') if (curr == '/')
{ {
if (!in_dev_state_) if (!indev_)
throw s2::parser::syntax_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
in_dev_state_ = false; indev_ = false;
reader_.advance(); reader_.advance();
return s2::parser::make_DEVEND(loc_); return s2::parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
while (reader_.state == reader::ok)
{
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
}
{ if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
if (auto len = buffer_.length; len == 4 || len == 7) throw comp_error(loc_, "unterminated preprocessor directive ('#')");
{
auto data = buffer_.data;
auto color = true;
for (auto i = 1; i < len; i++)
{
if ((data[i] < 48 || data[i] > 57) && (data[i] < 65 || data[i] > 70) && (data[i] < 97 || data[i] > 102))
{
color = false;
break;
}
}
if (color) return s2::parser::make_COLOR(std::string(++data, --len), loc_); state_ = state::preprocessor;
} goto lex_name;
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key == keyword::KW_INVALID)
throw s2::parser::syntax_error(loc_, utils::string::va("unknown preprocessor directive ('%s')", "#"));
if (keyword_is_token(key))
return keyword_token(key);
// call preprocessor(key);
}
continue;
case '*': case '*':
if (reader_.state == reader::end || (curr != '/' && curr != '=')) if (curr != '/' && curr != '=')
return s2::parser::make_MUL(loc_); return s2::parser::make_MUL(loc_);
reader_.advance(); reader_.advance();
if (curr == '/') if (last == '=')
throw s2::parser::syntax_error(loc_, "unmatched multiline comment end ('*/')");
return s2::parser::make_ASSIGN_MUL(loc_); return s2::parser::make_ASSIGN_MUL(loc_);
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
case '"': case '"':
case '\'': state_ = state::string;
return read_string(last, false); goto lex_string;
case '.': case '.':
return read_dotsize(); reader_.advance();
if(state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
case '(': case '(':
return s2::parser::make_LPAREN(loc_); return s2::parser::make_LPAREN(loc_);
case ')': case ')':
@ -376,6 +342,7 @@ auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type
case ':': case ':':
if (curr != ':') if (curr != ':')
return s2::parser::make_COLON(loc_); return s2::parser::make_COLON(loc_);
reader_.advance(); reader_.advance();
return s2::parser::make_DOUBLECOLON(loc_); return s2::parser::make_DOUBLECOLON(loc_);
case '?': case '?':
@ -383,43 +350,70 @@ auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type
case '=': case '=':
if (curr != '=') if (curr != '=')
return s2::parser::make_ASSIGN(loc_); return s2::parser::make_ASSIGN(loc_);
reader_.advance(); reader_.advance();
return s2::parser::make_EQUALITY(loc_); return s2::parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return s2::parser::make_ADD(loc_); return s2::parser::make_ADD(loc_);
reader_.advance(); reader_.advance();
return (last == '+') ? s2::parser::make_INCREMENT(loc_) : s2::parser::make_ASSIGN_ADD(loc_);
if (last == '+')
return s2::parser::make_INCREMENT(loc_);
return s2::parser::make_ASSIGN_ADD(loc_);
case '-': case '-':
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return s2::parser::make_SUB(loc_); return s2::parser::make_SUB(loc_);
reader_.advance(); reader_.advance();
return (last == '-') ? s2::parser::make_DECREMENT(loc_) : s2::parser::make_ASSIGN_SUB(loc_);
if (last == '-')
return s2::parser::make_DECREMENT(loc_);
return s2::parser::make_ASSIGN_SUB(loc_);
case '%': case '%':
if (curr != '=') if (curr != '=')
return s2::parser::make_MOD(loc_); return s2::parser::make_MOD(loc_);
reader_.advance(); reader_.advance();
return s2::parser::make_ASSIGN_MOD(loc_); return s2::parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return s2::parser::make_BITWISE_OR(loc_); return s2::parser::make_BITWISE_OR(loc_);
reader_.advance(); reader_.advance();
return (last == '|') ? s2::parser::make_OR(loc_) : s2::parser::make_ASSIGN_BW_OR(loc_);
if (last == '|')
return s2::parser::make_OR(loc_);
return s2::parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"' && curr != '\'')
return s2::parser::make_BITWISE_AND(loc_); return s2::parser::make_BITWISE_AND(loc_);
reader_.advance(); reader_.advance();
if (last == '"' || last == '\'')
return read_string(last, true); if (last == '&')
return (last == '&') ? s2::parser::make_AND(loc_) : s2::parser::make_ASSIGN_BW_AND(loc_); return s2::parser::make_AND(loc_);
if (last == '=')
return s2::parser::make_ASSIGN_BW_AND(loc_);
state_ = state::localize;
goto lex_string;
case '^': case '^':
if (curr != '=') if (curr != '=')
return s2::parser::make_BITWISE_EXOR(loc_); return s2::parser::make_BITWISE_EXOR(loc_);
reader_.advance(); reader_.advance();
return s2::parser::make_ASSIGN_BW_EXOR(loc_); return s2::parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return s2::parser::make_NOT(loc_); return s2::parser::make_NOT(loc_);
reader_.advance(); reader_.advance();
return s2::parser::make_INEQUALITY(loc_); return s2::parser::make_INEQUALITY(loc_);
case '~': case '~':
@ -427,88 +421,180 @@ auto lexer::lex() -> xsk::gsc::s2::parser::symbol_type
case '<': case '<':
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return s2::parser::make_LESS(loc_); return s2::parser::make_LESS(loc_);
reader_.advance(); reader_.advance();
if (last == '<') if (last == '=')
{
reader_.advance();
return (last == '=') ? s2::parser::make_ASSIGN_LSHIFT(loc_) : s2::parser::make_LSHIFT(loc_);
}
return s2::parser::make_LESS_EQUAL(loc_); return s2::parser::make_LESS_EQUAL(loc_);
if (curr != '=')
return s2::parser::make_LSHIFT(loc_);
reader_.advance();
return s2::parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return s2::parser::make_GREATER(loc_); return s2::parser::make_GREATER(loc_);
reader_.advance(); reader_.advance();
if (last == '>')
{ if (last == '=')
reader_.advance();
return (last == '=') ? s2::parser::make_ASSIGN_RSHIFT(loc_) : s2::parser::make_RSHIFT(loc_);
}
return s2::parser::make_GREATER_EQUAL(loc_); return s2::parser::make_GREATER_EQUAL(loc_);
if (curr != '=')
return s2::parser::make_RSHIFT(loc_);
reader_.advance();
return s2::parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number:
if (last >= '0' && last <= '9') if (last >= '0' && last <= '9')
return lexer::read_number(last); goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z') else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
return lexer::read_word(last); goto lex_name;
throw s2::parser::syntax_error(loc_, utils::string::va("bad token: \'%c\'", last)); throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
} }
}
}
auto lexer::read_string(char quote, bool localize) -> xsk::gsc::s2::parser::symbol_type lex_string:
{ if (state == reader::end)
if (localize) throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
reader_.advance(); reader_.advance();
if (last == '\n') while (true)
throw s2::parser::syntax_error(loc_, "unterminated string");
if (last == '\\') // process scapes
{ {
// TODO: if (last == '"')
}
if (last != '\\' && curr == quote)
break; break;
if (!buffer_.push(curr)) if (last == '\n')
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "unterminated string literal");
}
if (reader_.state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\')
{ {
throw s2::parser::syntax_error(loc_, utils::string::va("unmatched string start ('%s')", (quote == '"') ? "\"" : "\\'")); char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
} }
if (localize) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
else if (!buffer_.push(last))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if (state_ == state::localize)
return s2::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_); return s2::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
return s2::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_); return s2::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
}
auto lexer::read_number(char first) -> xsk::gsc::s2::parser::symbol_type lex_name:
{ buffer_.push(last);
if (first == '.')
{
buffer_.push(first);
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (last == '\\')
throw comp_error(loc_, "invalid path '\\\\'");
path = true;
if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
}
else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if(state_ == state::field)
{
if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return s2::parser::make_SIZE(loc_);
}
return s2::parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
// TODO: call preprocessor(key);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start;
continue;
}
else
{
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw comp_error(loc_, "invalid path end '\\'");
//return s2::parser::make_PATH(xsk::gsc::s2::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return s2::parser::make_PATH(std::string(buffer_.data, buffer_.length), loc_);
}
//return s2::parser::make_IDENTIFIER(xsk::gsc::s2::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return s2::parser::make_IDENTIFIER(std::string(buffer_.data, buffer_.length), loc_);
}
lex_number:
if (state_ == state::field)
buffer_.push('.');
if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto flt = 0;
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.')) if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw s2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if ((curr == '.' || curr == 'f') && last == '\'') if ((curr == '.' || curr == 'f') && last == '\'')
throw s2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
if (curr == '\'') if (curr == '\'')
{ {
@ -516,55 +602,41 @@ auto lexer::read_number(char first) -> xsk::gsc::s2::parser::symbol_type
continue; continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58))) if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'')
throw s2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid number literal");
auto data = buffer_.data; if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
auto len = buffer_.length; throw comp_error(loc_, "invalid number literal");
auto dot = true;
for (auto i = 1; i < len; i++) if (state_ == state::field || dot || flt)
{ return s2::parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
if (data[i] == '.')
{ return s2::parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
if (dot)
throw s2::parser::syntax_error(loc_, "invalid number '.'");
} }
if (data[i] == 'f' && i != len - 1) else if (curr == 'o')
throw s2::parser::syntax_error(loc_, "invalid number 'f'");
}
return s2::parser::make_FLOAT(std::string(data, len), loc_);
}
else
{ {
auto curr = reader_.current_byte;
if (first == '0' && curr == 'o')
{
buffer_.push(first);
buffer_.push('o');
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'o')) if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw s2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (curr == 'o' && last == '\'')
throw s2::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -581,30 +653,24 @@ auto lexer::read_number(char first) -> xsk::gsc::s2::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length <= 0)
throw s2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid octal literal");
if (buffer_.length < 3) return s2::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
throw error("gsc lexer: invalid octal literal!");
return s2::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data + 2), loc_);
} }
else if (first == '0' && curr == 'b') else if (curr == 'b')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('b'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'b')) if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw s2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (curr == 'b' && last == '\'')
throw s2::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -616,35 +682,29 @@ auto lexer::read_number(char first) -> xsk::gsc::s2::parser::symbol_type
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "number literal size exceeded");
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw s2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid binary literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid binary literal!");
return s2::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_); return s2::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
} }
else if (first == '0' && curr == 'x') else if (curr == 'x')
{ {
buffer_.push(first); buffer_.push(last);
buffer_.push('x'); buffer_.push(curr);
reader_.advance(); reader_.advance();
while (reader_.state == reader::ok) while (true)
{ {
auto last = reader_.last_byte; if (state == reader::end)
auto curr = reader_.current_byte; break;
if (curr == '\'' && (last == '\'' || last == 'x')) if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw s2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (curr == 'x' && last == '\'')
throw s2::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'') if (curr == '\'')
{ {
@ -661,161 +721,13 @@ auto lexer::read_number(char first) -> xsk::gsc::s2::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (reader_.last_byte == '\'') if (last == '\'' || buffer_.length < 3)
throw s2::parser::syntax_error(loc_, "invalid number '\''"); throw comp_error(loc_, "invalid hexadecimal literal");
if (buffer_.length < 3)
throw error("gsc lexer: invalid hexadecimal literal!");
return s2::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_); return s2::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
} }
else // cant get here!
{
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw s2::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw s2::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
} }
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw s2::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = false;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw s2::parser::syntax_error(loc_, "invalid number '.'");
dot = true;
}
if (data[i] == 'f' && i != len - 1)
throw s2::parser::syntax_error(loc_, "invalid number 'f'");
}
if (dot || data[len - 1] == 'f')
return s2::parser::make_FLOAT(std::string(data, len), loc_);
return s2::parser::make_INTEGER(std::string(data, len), loc_);
}
}
}
auto lexer::read_word(char first) -> xsk::gsc::s2::parser::symbol_type
{
auto path = false;
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto curr = reader_.current_byte;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (curr == '\\')
{
if (reader_.last_byte == '\\')
throw s2::parser::syntax_error(loc_, "invalid path '\\\\'");
path = true;
curr = '/';
}
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
if (path)
{
if (buffer_.data[buffer_.length - 1] == '/')
throw s2::parser::syntax_error(loc_, "invalid path end '\\'");
return s2::parser::make_PATH(xsk::gsc::s2::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
return s2::parser::make_IDENTIFIER(xsk::gsc::s2::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
}
auto lexer::read_dotsize() -> xsk::gsc::s2::parser::symbol_type
{
auto curr = reader_.current_byte;
if (curr > 47 && curr < 58)
{
return lexer::read_number('.');
}
else if (curr == '_' || curr > 64 && curr < 91 || curr > 96 && curr < 123)
{
reader save;
save.state = reader_.state;
save.bytes_remaining = reader_.bytes_remaining;
save.buffer_pos = reader_.buffer_pos;
save.last_byte = reader_.last_byte;
save.current_byte = reader_.current_byte;
while (reader_.state == reader::ok)
{
curr = reader_.current_byte;
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return s2::parser::make_SIZE(loc_);
}
reader_.state = save.state;
reader_.bytes_remaining = save.bytes_remaining;
reader_.buffer_pos = save.buffer_pos;
reader_.last_byte = save.last_byte;
reader_.current_byte = save.current_byte;
return s2::parser::make_DOT(loc_);
}
else return s2::parser::make_DOT(loc_);
} }
auto lexer::keyword_token(keyword k) -> xsk::gsc::s2::parser::symbol_type auto lexer::keyword_token(keyword k) -> xsk::gsc::s2::parser::symbol_type

View File

@ -12,11 +12,12 @@ namespace xsk::gsc::s2
enum class keyword; enum class keyword;
constexpr size_t max_buf_size = 0x2000;
struct buffer struct buffer
{ {
int size;
int length;
char* data; char* data;
int length;
buffer(); buffer();
~buffer(); ~buffer();
@ -25,13 +26,13 @@ struct buffer
struct reader struct reader
{ {
enum states { end, ok }; enum state_type : std::uint8_t { end, ok };
states state;
int bytes_remaining;
const char* buffer_pos; const char* buffer_pos;
std::uint32_t bytes_remaining;
char last_byte; char last_byte;
char current_byte; char current_byte;
state_type state;
reader(); reader();
@ -47,15 +48,17 @@ struct reader
class lexer class lexer
{ {
private: enum class state : std::uint8_t { start, string, localize, field, preprocessor };
reader reader_; reader reader_;
buffer buffer_; buffer buffer_;
location loc_; location loc_;
build mode_; build mode_;
bool in_dev_state_;
std::stack<location> locs_; std::stack<location> locs_;
std::stack<reader> readers_; std::stack<reader> readers_;
std::uint32_t header_top_; std::uint32_t header_top_;
state state_;
bool indev_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -65,10 +68,6 @@ public:
void restrict_header(const xsk::gsc::location& loc); void restrict_header(const xsk::gsc::location& loc);
private: private:
auto read_string(char quote, bool localize) -> xsk::gsc::s2::parser::symbol_type;
auto read_number(char first) -> xsk::gsc::s2::parser::symbol_type;
auto read_word(char first) -> xsk::gsc::s2::parser::symbol_type;
auto read_dotsize() -> xsk::gsc::s2::parser::symbol_type;
auto keyword_token(keyword k) -> xsk::gsc::s2::parser::symbol_type; auto keyword_token(keyword k) -> xsk::gsc::s2::parser::symbol_type;
static auto keyword_is_token(keyword k) -> bool; static auto keyword_is_token(keyword k) -> bool;
static auto get_keyword(std::string_view str) -> keyword; static auto get_keyword(std::string_view str) -> keyword;

File diff suppressed because it is too large Load Diff

View File

@ -617,6 +617,7 @@ namespace xsk { namespace gsc { namespace s2 {
// stmt_while // stmt_while
char dummy66[sizeof (ast::stmt_while::ptr)]; char dummy66[sizeof (ast::stmt_while::ptr)];
// "field"
// "path" // "path"
// "identifier" // "identifier"
// "string literal" // "string literal"
@ -762,22 +763,23 @@ namespace xsk { namespace gsc { namespace s2 {
MUL = 86, // "*" MUL = 86, // "*"
DIV = 87, // "/" DIV = 87, // "/"
MOD = 88, // "%" MOD = 88, // "%"
PATH = 89, // "path" FIELD = 89, // "field"
IDENTIFIER = 90, // "identifier" PATH = 90, // "path"
STRING = 91, // "string literal" IDENTIFIER = 91, // "identifier"
ISTRING = 92, // "localized string" STRING = 92, // "string literal"
COLOR = 93, // "color" ISTRING = 93, // "localized string"
FLOAT = 94, // "float" COLOR = 94, // "color"
INTEGER = 95, // "integer" FLOAT = 95, // "float"
ADD_ARRAY = 96, // ADD_ARRAY INTEGER = 96, // "integer"
THEN = 97, // THEN ADD_ARRAY = 97, // ADD_ARRAY
TERN = 98, // TERN THEN = 98, // THEN
NEG = 99, // NEG TERN = 99, // TERN
ANIMREF = 100, // ANIMREF NEG = 100, // NEG
PREINC = 101, // PREINC ANIMREF = 101, // ANIMREF
PREDEC = 102, // PREDEC PREINC = 102, // PREINC
POSTINC = 103, // POSTINC PREDEC = 103, // PREDEC
POSTDEC = 104 // POSTDEC POSTINC = 104, // POSTINC
POSTDEC = 105 // POSTDEC
}; };
/// Backward compatibility alias (Bison 3.6). /// Backward compatibility alias (Bison 3.6).
typedef token_kind_type yytokentype; typedef token_kind_type yytokentype;
@ -794,7 +796,7 @@ namespace xsk { namespace gsc { namespace s2 {
{ {
enum symbol_kind_type enum symbol_kind_type
{ {
YYNTOKENS = 105, ///< Number of tokens. YYNTOKENS = 106, ///< Number of tokens.
S_YYEMPTY = -2, S_YYEMPTY = -2,
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
@ -885,103 +887,104 @@ namespace xsk { namespace gsc { namespace s2 {
S_MUL = 86, // "*" S_MUL = 86, // "*"
S_DIV = 87, // "/" S_DIV = 87, // "/"
S_MOD = 88, // "%" S_MOD = 88, // "%"
S_PATH = 89, // "path" S_FIELD = 89, // "field"
S_IDENTIFIER = 90, // "identifier" S_PATH = 90, // "path"
S_STRING = 91, // "string literal" S_IDENTIFIER = 91, // "identifier"
S_ISTRING = 92, // "localized string" S_STRING = 92, // "string literal"
S_COLOR = 93, // "color" S_ISTRING = 93, // "localized string"
S_FLOAT = 94, // "float" S_COLOR = 94, // "color"
S_INTEGER = 95, // "integer" S_FLOAT = 95, // "float"
S_ADD_ARRAY = 96, // ADD_ARRAY S_INTEGER = 96, // "integer"
S_THEN = 97, // THEN S_ADD_ARRAY = 97, // ADD_ARRAY
S_TERN = 98, // TERN S_THEN = 98, // THEN
S_NEG = 99, // NEG S_TERN = 99, // TERN
S_ANIMREF = 100, // ANIMREF S_NEG = 100, // NEG
S_PREINC = 101, // PREINC S_ANIMREF = 101, // ANIMREF
S_PREDEC = 102, // PREDEC S_PREINC = 102, // PREINC
S_POSTINC = 103, // POSTINC S_PREDEC = 103, // PREDEC
S_POSTDEC = 104, // POSTDEC S_POSTINC = 104, // POSTINC
S_YYACCEPT = 105, // $accept S_POSTDEC = 105, // POSTDEC
S_root = 106, // root S_YYACCEPT = 106, // $accept
S_program = 107, // program S_root = 107, // root
S_inline = 108, // inline S_program = 108, // program
S_include = 109, // include S_inline = 109, // inline
S_declaration = 110, // declaration S_include = 110, // include
S_decl_usingtree = 111, // decl_usingtree S_declaration = 111, // declaration
S_decl_constant = 112, // decl_constant S_decl_usingtree = 112, // decl_usingtree
S_decl_thread = 113, // decl_thread S_decl_constant = 113, // decl_constant
S_stmt = 114, // stmt S_decl_thread = 114, // decl_thread
S_stmt_dev = 115, // stmt_dev S_stmt = 115, // stmt
S_stmt_block = 116, // stmt_block S_stmt_dev = 116, // stmt_dev
S_stmt_list = 117, // stmt_list S_stmt_block = 117, // stmt_block
S_stmt_expr = 118, // stmt_expr S_stmt_list = 118, // stmt_list
S_stmt_call = 119, // stmt_call S_stmt_expr = 119, // stmt_expr
S_stmt_assign = 120, // stmt_assign S_stmt_call = 120, // stmt_call
S_stmt_endon = 121, // stmt_endon S_stmt_assign = 121, // stmt_assign
S_stmt_notify = 122, // stmt_notify S_stmt_endon = 122, // stmt_endon
S_stmt_wait = 123, // stmt_wait S_stmt_notify = 123, // stmt_notify
S_stmt_waittill = 124, // stmt_waittill S_stmt_wait = 124, // stmt_wait
S_stmt_waittillmatch = 125, // stmt_waittillmatch S_stmt_waittill = 125, // stmt_waittill
S_stmt_waittillframeend = 126, // stmt_waittillframeend S_stmt_waittillmatch = 126, // stmt_waittillmatch
S_stmt_waitframe = 127, // stmt_waitframe S_stmt_waittillframeend = 127, // stmt_waittillframeend
S_stmt_if = 128, // stmt_if S_stmt_waitframe = 128, // stmt_waitframe
S_stmt_ifelse = 129, // stmt_ifelse S_stmt_if = 129, // stmt_if
S_stmt_while = 130, // stmt_while S_stmt_ifelse = 130, // stmt_ifelse
S_stmt_dowhile = 131, // stmt_dowhile S_stmt_while = 131, // stmt_while
S_stmt_for = 132, // stmt_for S_stmt_dowhile = 132, // stmt_dowhile
S_stmt_foreach = 133, // stmt_foreach S_stmt_for = 133, // stmt_for
S_stmt_switch = 134, // stmt_switch S_stmt_foreach = 134, // stmt_foreach
S_stmt_case = 135, // stmt_case S_stmt_switch = 135, // stmt_switch
S_stmt_default = 136, // stmt_default S_stmt_case = 136, // stmt_case
S_stmt_break = 137, // stmt_break S_stmt_default = 137, // stmt_default
S_stmt_continue = 138, // stmt_continue S_stmt_break = 138, // stmt_break
S_stmt_return = 139, // stmt_return S_stmt_continue = 139, // stmt_continue
S_stmt_breakpoint = 140, // stmt_breakpoint S_stmt_return = 140, // stmt_return
S_stmt_prof_begin = 141, // stmt_prof_begin S_stmt_breakpoint = 141, // stmt_breakpoint
S_stmt_prof_end = 142, // stmt_prof_end S_stmt_prof_begin = 142, // stmt_prof_begin
S_expr = 143, // expr S_stmt_prof_end = 143, // stmt_prof_end
S_expr_or_empty = 144, // expr_or_empty S_expr = 144, // expr
S_expr_assign = 145, // expr_assign S_expr_or_empty = 145, // expr_or_empty
S_expr_increment = 146, // expr_increment S_expr_assign = 146, // expr_assign
S_expr_decrement = 147, // expr_decrement S_expr_increment = 147, // expr_increment
S_expr_ternary = 148, // expr_ternary S_expr_decrement = 148, // expr_decrement
S_expr_binary = 149, // expr_binary S_expr_ternary = 149, // expr_ternary
S_expr_primitive = 150, // expr_primitive S_expr_binary = 150, // expr_binary
S_expr_complement = 151, // expr_complement S_expr_primitive = 151, // expr_primitive
S_expr_not = 152, // expr_not S_expr_complement = 152, // expr_complement
S_expr_call = 153, // expr_call S_expr_not = 153, // expr_not
S_expr_method = 154, // expr_method S_expr_call = 154, // expr_call
S_expr_function = 155, // expr_function S_expr_method = 155, // expr_method
S_expr_pointer = 156, // expr_pointer S_expr_function = 156, // expr_function
S_expr_add_array = 157, // expr_add_array S_expr_pointer = 157, // expr_pointer
S_expr_parameters = 158, // expr_parameters S_expr_add_array = 158, // expr_add_array
S_expr_arguments = 159, // expr_arguments S_expr_parameters = 159, // expr_parameters
S_expr_arguments_no_empty = 160, // expr_arguments_no_empty S_expr_arguments = 160, // expr_arguments
S_expr_reference = 161, // expr_reference S_expr_arguments_no_empty = 161, // expr_arguments_no_empty
S_expr_array = 162, // expr_array S_expr_reference = 162, // expr_reference
S_expr_field = 163, // expr_field S_expr_array = 163, // expr_array
S_expr_size = 164, // expr_size S_expr_field = 164, // expr_field
S_expr_paren = 165, // expr_paren S_expr_size = 165, // expr_size
S_expr_object = 166, // expr_object S_expr_paren = 166, // expr_paren
S_expr_thisthread = 167, // expr_thisthread S_expr_object = 167, // expr_object
S_expr_empty_array = 168, // expr_empty_array S_expr_thisthread = 168, // expr_thisthread
S_expr_undefined = 169, // expr_undefined S_expr_empty_array = 169, // expr_empty_array
S_expr_game = 170, // expr_game S_expr_undefined = 170, // expr_undefined
S_expr_self = 171, // expr_self S_expr_game = 171, // expr_game
S_expr_anim = 172, // expr_anim S_expr_self = 172, // expr_self
S_expr_level = 173, // expr_level S_expr_anim = 173, // expr_anim
S_expr_animation = 174, // expr_animation S_expr_level = 174, // expr_level
S_expr_animtree = 175, // expr_animtree S_expr_animation = 175, // expr_animation
S_expr_identifier = 176, // expr_identifier S_expr_animtree = 176, // expr_animtree
S_expr_path = 177, // expr_path S_expr_identifier = 177, // expr_identifier
S_expr_istring = 178, // expr_istring S_expr_path = 178, // expr_path
S_expr_string = 179, // expr_string S_expr_istring = 179, // expr_istring
S_expr_color = 180, // expr_color S_expr_string = 180, // expr_string
S_expr_vector = 181, // expr_vector S_expr_color = 181, // expr_color
S_expr_float = 182, // expr_float S_expr_vector = 182, // expr_vector
S_expr_integer = 183, // expr_integer S_expr_float = 183, // expr_float
S_expr_false = 184, // expr_false S_expr_integer = 184, // expr_integer
S_expr_true = 185 // expr_true S_expr_false = 185, // expr_false
S_expr_true = 186 // expr_true
}; };
}; };
@ -1294,6 +1297,7 @@ namespace xsk { namespace gsc { namespace s2 {
value.move< ast::stmt_while::ptr > (std::move (that.value)); value.move< ast::stmt_while::ptr > (std::move (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2563,6 +2567,7 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > (); value.template destroy< ast::stmt_while::ptr > ();
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2679,7 +2684,7 @@ switch (yykind)
: super_type(token_type (tok), v, l) : super_type(token_type (tok), v, l)
#endif #endif
{ {
S2_ASSERT ((token::PATH <= tok && tok <= token::INTEGER)); S2_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
} }
}; };
@ -4064,6 +4069,21 @@ switch (yykind)
return symbol_type (token::MOD, l); return symbol_type (token::MOD, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
@ -4648,7 +4668,7 @@ switch (yykind)
/// Constants. /// Constants.
enum enum
{ {
yylast_ = 2191, ///< Last index in yytable_. yylast_ = 2291, ///< Last index in yytable_.
yynnts_ = 81, ///< Number of nonterminal symbols. yynnts_ = 81, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number. yyfinal_ = 21 ///< Termination state number.
}; };
@ -4952,6 +4972,7 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value)); value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5267,6 +5288,7 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value)); value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5340,7 +5362,7 @@ switch (yykind)
#line 13 "parser.ypp" #line 13 "parser.ypp"
} } } // xsk::gsc::s2 } } } // xsk::gsc::s2
#line 5344 "parser.hpp" #line 5366 "parser.hpp"

View File

@ -70,9 +70,9 @@ enum class keyword
KW_INVALID, KW_INVALID,
}; };
buffer::buffer() : size(1024), length(0) buffer::buffer() : length(0)
{ {
data = static_cast<char*>(std::malloc(size)); data = static_cast<char*>(std::malloc(max_buf_size));
} }
buffer::~buffer() buffer::~buffer()
@ -82,18 +82,9 @@ buffer::~buffer()
bool buffer::push(char c) bool buffer::push(char c)
{ {
if(length >= size) if(length >= max_buf_size)
{ return false;
auto nsize = size * 2;
auto ndata = reinterpret_cast<char*>(std::malloc(nsize));
if(!ndata) return false;
std::memmove(ndata, data, size);
std::free(data);
size = nsize;
data = ndata;
}
data[length++] = c; data[length++] = c;
return true; return true;
} }
@ -141,7 +132,7 @@ void reader::advance()
} }
} }
lexer::lexer(const std::string& name, const char* data, size_t size) : in_dev_state_(false), loc_(xsk::gsc::location(&name)), lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(xsk::gsc::location(&name)),
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>()) mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
{ {
reader_.init(data, size); reader_.init(data, size);
@ -187,24 +178,28 @@ void lexer::restrict_header(const xsk::gsc::location& loc)
auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type
{ {
buffer_.length = 0; buffer_.length = 0;
state_ = state::start;
loc_.step(); loc_.step();
while (true) while (true)
{ {
if (reader_.state == reader::end) const auto& state = reader_.state;
{ auto& last = reader_.last_byte;
if (in_dev_state_) auto& curr = reader_.current_byte;
throw s4::parser::syntax_error(loc_, "unmatched devblock start ('/#')"); auto path = false;
if(header_top_ > 0) if (state == reader::end)
{
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (header_top_ > 0)
pop_header(); pop_header();
else else
return s4::parser::make_S4EOF(loc_); return s4::parser::make_S4EOF(loc_);
} }
reader_.advance(); reader_.advance();
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
switch (last) switch (last)
{ {
@ -218,7 +213,7 @@ auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type
loc_.step(); loc_.step();
continue; continue;
case '/': case '/':
if(reader_.state == reader::end || (curr != '/' && curr != '*' && curr != '#' && curr != '=')) if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
return s4::parser::make_DIV(loc_); return s4::parser::make_DIV(loc_);
reader_.advance(); reader_.advance();
@ -228,29 +223,27 @@ auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type
if (last == '#') if (last == '#')
{ {
if (in_dev_state_) if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
if (mode_ == xsk::gsc::build::dev)
{ {
throw s4::parser::syntax_error(loc_, "cannot recurse devblock ('/#')"); indev_ = true;
}
else if (mode_ == xsk::gsc::build::dev)
{
in_dev_state_ = true;
return s4::parser::make_DEVBEGIN(loc_); return s4::parser::make_DEVBEGIN(loc_);
} }
else else
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched devblock start ('/#')");
throw s4::parser::syntax_error(loc_, "unmatched devblock start ('/#')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '#' && curr == '/') else if (last == '#' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -264,16 +257,15 @@ auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end) if (state == reader::end)
{ throw comp_error(loc_, "unmatched multiline comment start ('/*')");
throw s4::parser::syntax_error(loc_, "unmatched multiline comment start ('/*')");
} if (curr == '\n')
else if (curr == '\n')
{ {
loc_.lines(); loc_.lines();
loc_.step(); loc_.step();
} }
else if (reader_.state == reader::ok && last == '*' && curr == '/') else if (last == '*' && curr == '/')
{ {
reader_.advance(); reader_.advance();
break; break;
@ -286,7 +278,7 @@ auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type
{ {
while (true) while (true)
{ {
if (reader_.state == reader::end || curr == '\n') if (state == reader::end || curr == '\n')
break; break;
reader_.advance(); reader_.advance();
@ -296,69 +288,43 @@ auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type
case '#': case '#':
if (curr == '/') if (curr == '/')
{ {
if (!in_dev_state_) if (!indev_)
throw s4::parser::syntax_error(loc_, "unmatched devblock end ('#/')"); throw comp_error(loc_, "unmatched devblock end ('#/')");
in_dev_state_ = false; indev_ = false;
reader_.advance(); reader_.advance();
return s4::parser::make_DEVEND(loc_); return s4::parser::make_DEVEND(loc_);
} }
buffer_.push(last); buffer_.push(last);
while (reader_.state == reader::ok)
{
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
}
{ if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
if (auto len = buffer_.length; len == 4 || len == 7) throw comp_error(loc_, "unterminated preprocessor directive ('#')");
{
auto data = buffer_.data;
auto color = true;
for (auto i = 1; i < len; i++)
{
if ((data[i] < 48 || data[i] > 57) && (data[i] < 65 || data[i] > 70) && (data[i] < 97 || data[i] > 102))
{
color = false;
break;
}
}
if (color) return s4::parser::make_COLOR(std::string(++data, --len), loc_); state_ = state::preprocessor;
} goto lex_name;
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key == keyword::KW_INVALID)
throw s4::parser::syntax_error(loc_, utils::string::va("unknown preprocessor directive ('%s')", "#"));
if (keyword_is_token(key))
return keyword_token(key);
// call preprocessor(key);
}
continue;
case '*': case '*':
if (reader_.state == reader::end || (curr != '/' && curr != '=')) if (curr != '/' && curr != '=')
return s4::parser::make_MUL(loc_); return s4::parser::make_MUL(loc_);
reader_.advance(); reader_.advance();
if (curr == '/') if (last == '=')
throw s4::parser::syntax_error(loc_, "unmatched multiline comment end ('*/')");
return s4::parser::make_ASSIGN_MUL(loc_); return s4::parser::make_ASSIGN_MUL(loc_);
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
case '"': case '"':
case '\'': state_ = state::string;
return read_string(last, false); goto lex_string;
case '.': case '.':
return read_dotsize(); reader_.advance();
if(state == reader::end)
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
case '(': case '(':
return s4::parser::make_LPAREN(loc_); return s4::parser::make_LPAREN(loc_);
case ')': case ')':
@ -378,6 +344,7 @@ auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type
case ':': case ':':
if (curr != ':') if (curr != ':')
return s4::parser::make_COLON(loc_); return s4::parser::make_COLON(loc_);
reader_.advance(); reader_.advance();
return s4::parser::make_DOUBLECOLON(loc_); return s4::parser::make_DOUBLECOLON(loc_);
case '?': case '?':
@ -385,43 +352,70 @@ auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type
case '=': case '=':
if (curr != '=') if (curr != '=')
return s4::parser::make_ASSIGN(loc_); return s4::parser::make_ASSIGN(loc_);
reader_.advance(); reader_.advance();
return s4::parser::make_EQUALITY(loc_); return s4::parser::make_EQUALITY(loc_);
case '+': case '+':
if (curr != '+' && curr != '=') if (curr != '+' && curr != '=')
return s4::parser::make_ADD(loc_); return s4::parser::make_ADD(loc_);
reader_.advance(); reader_.advance();
return (last == '+') ? s4::parser::make_INCREMENT(loc_) : s4::parser::make_ASSIGN_ADD(loc_);
if (last == '+')
return s4::parser::make_INCREMENT(loc_);
return s4::parser::make_ASSIGN_ADD(loc_);
case '-': case '-':
if (curr != '-' && curr != '=') if (curr != '-' && curr != '=')
return s4::parser::make_SUB(loc_); return s4::parser::make_SUB(loc_);
reader_.advance(); reader_.advance();
return (last == '-') ? s4::parser::make_DECREMENT(loc_) : s4::parser::make_ASSIGN_SUB(loc_);
if (last == '-')
return s4::parser::make_DECREMENT(loc_);
return s4::parser::make_ASSIGN_SUB(loc_);
case '%': case '%':
if (curr != '=') if (curr != '=')
return s4::parser::make_MOD(loc_); return s4::parser::make_MOD(loc_);
reader_.advance(); reader_.advance();
return s4::parser::make_ASSIGN_MOD(loc_); return s4::parser::make_ASSIGN_MOD(loc_);
case '|': case '|':
if (curr != '|' && curr != '=') if (curr != '|' && curr != '=')
return s4::parser::make_BITWISE_OR(loc_); return s4::parser::make_BITWISE_OR(loc_);
reader_.advance(); reader_.advance();
return (last == '|') ? s4::parser::make_OR(loc_) : s4::parser::make_ASSIGN_BW_OR(loc_);
if (last == '|')
return s4::parser::make_OR(loc_);
return s4::parser::make_ASSIGN_BW_OR(loc_);
case '&': case '&':
if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') if (curr != '&' && curr != '=' && curr != '"' && curr != '\'')
return s4::parser::make_BITWISE_AND(loc_); return s4::parser::make_BITWISE_AND(loc_);
reader_.advance(); reader_.advance();
if (last == '"' || last == '\'')
return read_string(last, true); if (last == '&')
return (last == '&') ? s4::parser::make_AND(loc_) : s4::parser::make_ASSIGN_BW_AND(loc_); return s4::parser::make_AND(loc_);
if (last == '=')
return s4::parser::make_ASSIGN_BW_AND(loc_);
state_ = state::localize;
goto lex_string;
case '^': case '^':
if (curr != '=') if (curr != '=')
return s4::parser::make_BITWISE_EXOR(loc_); return s4::parser::make_BITWISE_EXOR(loc_);
reader_.advance(); reader_.advance();
return s4::parser::make_ASSIGN_BW_EXOR(loc_); return s4::parser::make_ASSIGN_BW_EXOR(loc_);
case '!': case '!':
if (curr != '=') if (curr != '=')
return s4::parser::make_NOT(loc_); return s4::parser::make_NOT(loc_);
reader_.advance(); reader_.advance();
return s4::parser::make_INEQUALITY(loc_); return s4::parser::make_INEQUALITY(loc_);
case '~': case '~':
@ -429,338 +423,144 @@ auto lexer::lex() -> xsk::gsc::s4::parser::symbol_type
case '<': case '<':
if (curr != '<' && curr != '=') if (curr != '<' && curr != '=')
return s4::parser::make_LESS(loc_); return s4::parser::make_LESS(loc_);
reader_.advance(); reader_.advance();
if (last == '<') if (last == '=')
{
reader_.advance();
return (last == '=') ? s4::parser::make_ASSIGN_LSHIFT(loc_) : s4::parser::make_LSHIFT(loc_);
}
return s4::parser::make_LESS_EQUAL(loc_); return s4::parser::make_LESS_EQUAL(loc_);
if (curr != '=')
return s4::parser::make_LSHIFT(loc_);
reader_.advance();
return s4::parser::make_ASSIGN_LSHIFT(loc_);
case '>': case '>':
if (curr != '>' && curr != '=') if (curr != '>' && curr != '=')
return s4::parser::make_GREATER(loc_); return s4::parser::make_GREATER(loc_);
reader_.advance(); reader_.advance();
if (last == '>')
{ if (last == '=')
reader_.advance();
return (last == '=') ? s4::parser::make_ASSIGN_RSHIFT(loc_) : s4::parser::make_RSHIFT(loc_);
}
return s4::parser::make_GREATER_EQUAL(loc_); return s4::parser::make_GREATER_EQUAL(loc_);
if (curr != '=')
return s4::parser::make_RSHIFT(loc_);
reader_.advance();
return s4::parser::make_ASSIGN_RSHIFT(loc_);
default: default:
lex_name_or_number:
if (last >= '0' && last <= '9') if (last >= '0' && last <= '9')
return lexer::read_number(last); goto lex_number;
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z') else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
return lexer::read_word(last); goto lex_name;
throw s4::parser::syntax_error(loc_, utils::string::va("bad token: \'%c\'", last)); throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
} }
}
}
auto lexer::read_string(char quote, bool localize) -> xsk::gsc::s4::parser::symbol_type lex_string:
{ if (state == reader::end)
if (localize) throw comp_error(loc_, "unmatched string start ('\"')");
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
reader_.advance(); reader_.advance();
if (last == '\n') while (true)
throw s4::parser::syntax_error(loc_, "unterminated string");
if (last == '\\') // process scapes
{ {
// TODO: if (last == '"')
}
if (last != '\\' && curr == quote)
break; break;
if (!buffer_.push(curr)) if (last == '\n')
throw error("gsc lexer: out of memory!"); throw comp_error(loc_, "unterminated string literal");
}
if (reader_.state == reader::end) if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (last == '\\')
{ {
throw s4::parser::syntax_error(loc_, utils::string::va("unmatched string start ('%s')", (quote == '"') ? "\"" : "\\'")); char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
} }
if (localize) if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
else if (!buffer_.push(last))
throw comp_error(loc_, "max string size exceeded");
reader_.advance();
}
if (state_ == state::localize)
return s4::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_); return s4::parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
return s4::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_); return s4::parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
}
auto lexer::read_number(char first) -> xsk::gsc::s4::parser::symbol_type lex_name:
{ buffer_.push(last);
if (first == '.')
while (true)
{ {
buffer_.push(first); if (state == reader::end)
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break; break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = true;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw s4::parser::syntax_error(loc_, "invalid number '.'");
}
if (data[i] == 'f' && i != len - 1)
throw s4::parser::syntax_error(loc_, "invalid number 'f'");
}
return s4::parser::make_FLOAT(std::string(data, len), loc_);
}
else
{
auto curr = reader_.current_byte;
if (first == '0' && curr == 'o')
{
buffer_.push(first);
buffer_.push('o');
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'o'))
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (curr == 'o' && last == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!(curr > 47 && curr < 56))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (buffer_.length < 3)
throw error("gsc lexer: invalid octal literal!");
return s4::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data + 2), loc_);
}
else if (first == '0' && curr == 'b')
{
buffer_.push(first);
buffer_.push('b');
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'b'))
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (curr == 'b' && last == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (curr != '0' && curr != '1')
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (buffer_.length < 3)
throw error("gsc lexer: invalid binary literal!");
return s4::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
}
else if (first == '0' && curr == 'x')
{
buffer_.push(first);
buffer_.push('x');
reader_.advance();
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'x'))
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (curr == 'x' && last == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!((curr > 47 && curr < 58) || (curr > 64 && curr < 71) || (curr > 96 && curr < 103)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (buffer_.length < 3)
throw error("gsc lexer: invalid hexadecimal literal!");
return s4::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
}
else
{
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto last = reader_.last_byte;
auto curr = reader_.current_byte;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if ((curr == '.' || curr == 'f') && last == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!(curr == 'f' || curr == '.' || (curr > 47 && curr < 58)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (reader_.last_byte == '\'')
throw s4::parser::syntax_error(loc_, "invalid number '\''");
auto data = buffer_.data;
auto len = buffer_.length;
auto dot = false;
for (auto i = 1; i < len; i++)
{
if (data[i] == '.')
{
if (dot)
throw s4::parser::syntax_error(loc_, "invalid number '.'");
dot = true;
}
if (data[i] == 'f' && i != len - 1)
throw s4::parser::syntax_error(loc_, "invalid number 'f'");
}
if (dot || data[len - 1] == 'f')
return s4::parser::make_FLOAT(std::string(data, len), loc_);
return s4::parser::make_INTEGER(std::string(data, len), loc_);
}
}
}
auto lexer::read_word(char first) -> xsk::gsc::s4::parser::symbol_type
{
auto path = false;
buffer_.push(first);
while (reader_.state == reader::ok)
{
auto curr = reader_.current_byte;
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58))) if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break; break;
if (curr == '\\') if (curr == '\\')
{ {
if (reader_.last_byte == '\\') if (last == '\\')
throw s4::parser::syntax_error(loc_, "invalid path '\\\\'"); throw comp_error(loc_, "invalid path '\\\\'");
path = true; path = true;
curr = '/'; if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
} }
else if (!buffer_.push(curr))
if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded");
throw error("gsc lexer: out of memory!");
reader_.advance(); reader_.advance();
} }
if(state_ == state::field)
if(buffer_.data[0] != '_')
{ {
for(auto i = 0; i < buffer_.length; i++) if (path)
throw comp_error(loc_, "invalid field token '\\'");
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
return s4::parser::make_SIZE(loc_);
}
return s4::parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
}
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
auto key = get_keyword(std::string_view(buffer_.data, buffer_.length));
if (key != keyword::KW_INVALID)
return keyword_token(key);
// TODO: call preprocessor(key);
throw comp_error(loc_, "unknown preprocessor directive");
state_ = state::start;
continue;
}
else
{
if (buffer_.data[0] != '_')
{
for (auto i = 0; i < buffer_.length; i++)
{ {
auto c = buffer_.data[i]; auto c = buffer_.data[i];
@ -777,36 +577,87 @@ auto lexer::read_word(char first) -> xsk::gsc::s4::parser::symbol_type
if (path) if (path)
{ {
if (buffer_.data[buffer_.length - 1] == '/') if (buffer_.data[buffer_.length - 1] == '/')
throw s4::parser::syntax_error(loc_, "invalid path end '\\'"); throw comp_error(loc_, "invalid path end '\\'");
return s4::parser::make_PATH(xsk::gsc::s4::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_); //return s4::parser::make_PATH(xsk::gsc::s4::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
return s4::parser::make_PATH(std::string(buffer_.data, buffer_.length), loc_);
} }
return s4::parser::make_IDENTIFIER(xsk::gsc::s4::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_); //return s4::parser::make_IDENTIFIER(xsk::gsc::s4::resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
} return s4::parser::make_IDENTIFIER(std::string(buffer_.data, buffer_.length), loc_);
auto lexer::read_dotsize() -> xsk::gsc::s4::parser::symbol_type
{
auto curr = reader_.current_byte;
if (curr > 47 && curr < 58)
{
return lexer::read_number('.');
} }
else if (curr == '_' || curr > 64 && curr < 91 || curr > 96 && curr < 123)
{
reader save;
save.state = reader_.state;
save.bytes_remaining = reader_.bytes_remaining;
save.buffer_pos = reader_.buffer_pos;
save.last_byte = reader_.last_byte;
save.current_byte = reader_.current_byte;
while (reader_.state == reader::ok) lex_number:
{ if (state_ == state::field)
curr = reader_.current_byte; buffer_.push('.');
if (!(curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123))) if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
{
buffer_.push(last);
auto dot = 0;
auto flt = 0;
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw comp_error(loc_, "invalid number literal");
if ((curr == '.' || curr == 'f') && last == '\'')
throw comp_error(loc_, "invalid number literal");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
break;
if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded");
reader_.advance();
}
if (last == '\'')
throw comp_error(loc_, "invalid number literal");
if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
throw comp_error(loc_, "invalid number literal");
if (state_ == state::field || dot || flt)
return s4::parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
return s4::parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
}
else if (curr == 'o')
{
reader_.advance();
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw comp_error(loc_, "invalid octal literal");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!(curr > 47 && curr < 56))
break; break;
if (!buffer_.push(curr)) if (!buffer_.push(curr))
@ -815,21 +666,81 @@ auto lexer::read_dotsize() -> xsk::gsc::s4::parser::symbol_type
reader_.advance(); reader_.advance();
} }
if (std::string_view(buffer_.data, buffer_.length) == "size") if (last == '\'' || buffer_.length <= 0)
throw comp_error(loc_, "invalid octal literal");
return s4::parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
}
else if (curr == 'b')
{ {
return s4::parser::make_SIZE(loc_); buffer_.push(last);
buffer_.push(curr);
reader_.advance();
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw comp_error(loc_, "invalid binary literal");
if (curr == '\'')
{
reader_.advance();
continue;
} }
reader_.state = save.state; if (curr != '0' && curr != '1')
reader_.bytes_remaining = save.bytes_remaining; break;
reader_.buffer_pos = save.buffer_pos;
reader_.last_byte = save.last_byte;
reader_.current_byte = save.current_byte;
return s4::parser::make_DOT(loc_); if (!buffer_.push(curr))
throw comp_error(loc_, "number literal size exceeded");
reader_.advance();
}
if (last == '\'' || buffer_.length < 3)
throw comp_error(loc_, "invalid binary literal");
return s4::parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
}
else if (curr == 'x')
{
buffer_.push(last);
buffer_.push(curr);
reader_.advance();
while (true)
{
if (state == reader::end)
break;
if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw comp_error(loc_, "invalid hexadecimal literal");
if (curr == '\'')
{
reader_.advance();
continue;
}
if (!((curr > 47 && curr < 58) || (curr > 64 && curr < 71) || (curr > 96 && curr < 103)))
break;
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
reader_.advance();
}
if (last == '\'' || buffer_.length < 3)
throw comp_error(loc_, "invalid hexadecimal literal");
return s4::parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
}
// cant get here!
} }
else return s4::parser::make_DOT(loc_);
} }
auto lexer::keyword_token(keyword k) -> xsk::gsc::s4::parser::symbol_type auto lexer::keyword_token(keyword k) -> xsk::gsc::s4::parser::symbol_type

View File

@ -12,11 +12,12 @@ namespace xsk::gsc::s4
enum class keyword; enum class keyword;
constexpr size_t max_buf_size = 0x2000;
struct buffer struct buffer
{ {
int size;
int length;
char* data; char* data;
int length;
buffer(); buffer();
~buffer(); ~buffer();
@ -25,13 +26,13 @@ struct buffer
struct reader struct reader
{ {
enum states { end, ok }; enum state_type : std::uint8_t { end, ok };
states state;
int bytes_remaining;
const char* buffer_pos; const char* buffer_pos;
std::uint32_t bytes_remaining;
char last_byte; char last_byte;
char current_byte; char current_byte;
state_type state;
reader(); reader();
@ -47,15 +48,17 @@ struct reader
class lexer class lexer
{ {
private: enum class state : std::uint8_t { start, string, localize, field, preprocessor };
reader reader_; reader reader_;
buffer buffer_; buffer buffer_;
location loc_; location loc_;
build mode_; build mode_;
bool in_dev_state_;
std::stack<location> locs_; std::stack<location> locs_;
std::stack<reader> readers_; std::stack<reader> readers_;
std::uint32_t header_top_; std::uint32_t header_top_;
state state_;
bool indev_;
public: public:
lexer(const std::string& name, const char* data, size_t size); lexer(const std::string& name, const char* data, size_t size);
@ -65,10 +68,6 @@ public:
void restrict_header(const xsk::gsc::location& loc); void restrict_header(const xsk::gsc::location& loc);
private: private:
auto read_string(char quote, bool localize) -> xsk::gsc::s4::parser::symbol_type;
auto read_number(char first) -> xsk::gsc::s4::parser::symbol_type;
auto read_word(char first) -> xsk::gsc::s4::parser::symbol_type;
auto read_dotsize() -> xsk::gsc::s4::parser::symbol_type;
auto keyword_token(keyword k) -> xsk::gsc::s4::parser::symbol_type; auto keyword_token(keyword k) -> xsk::gsc::s4::parser::symbol_type;
static auto keyword_is_token(keyword k) -> bool; static auto keyword_is_token(keyword k) -> bool;
static auto get_keyword(std::string_view str) -> keyword; static auto get_keyword(std::string_view str) -> keyword;

File diff suppressed because it is too large Load Diff

View File

@ -623,6 +623,7 @@ namespace xsk { namespace gsc { namespace s4 {
// stmt_while // stmt_while
char dummy68[sizeof (ast::stmt_while::ptr)]; char dummy68[sizeof (ast::stmt_while::ptr)];
// "field"
// "path" // "path"
// "identifier" // "identifier"
// "string literal" // "string literal"
@ -770,22 +771,23 @@ namespace xsk { namespace gsc { namespace s4 {
MUL = 88, // "*" MUL = 88, // "*"
DIV = 89, // "/" DIV = 89, // "/"
MOD = 90, // "%" MOD = 90, // "%"
PATH = 91, // "path" FIELD = 91, // "field"
IDENTIFIER = 92, // "identifier" PATH = 92, // "path"
STRING = 93, // "string literal" IDENTIFIER = 93, // "identifier"
ISTRING = 94, // "localized string" STRING = 94, // "string literal"
COLOR = 95, // "color" ISTRING = 95, // "localized string"
FLOAT = 96, // "float" COLOR = 96, // "color"
INTEGER = 97, // "integer" FLOAT = 97, // "float"
ADD_ARRAY = 98, // ADD_ARRAY INTEGER = 98, // "integer"
THEN = 99, // THEN ADD_ARRAY = 99, // ADD_ARRAY
TERN = 100, // TERN THEN = 100, // THEN
NEG = 101, // NEG TERN = 101, // TERN
ANIMREF = 102, // ANIMREF NEG = 102, // NEG
PREINC = 103, // PREINC ANIMREF = 103, // ANIMREF
PREDEC = 104, // PREDEC PREINC = 104, // PREINC
POSTINC = 105, // POSTINC PREDEC = 105, // PREDEC
POSTDEC = 106 // POSTDEC POSTINC = 106, // POSTINC
POSTDEC = 107 // POSTDEC
}; };
/// Backward compatibility alias (Bison 3.6). /// Backward compatibility alias (Bison 3.6).
typedef token_kind_type yytokentype; typedef token_kind_type yytokentype;
@ -802,7 +804,7 @@ namespace xsk { namespace gsc { namespace s4 {
{ {
enum symbol_kind_type enum symbol_kind_type
{ {
YYNTOKENS = 107, ///< Number of tokens. YYNTOKENS = 108, ///< Number of tokens.
S_YYEMPTY = -2, S_YYEMPTY = -2,
S_YYEOF = 0, // "end of file" S_YYEOF = 0, // "end of file"
S_YYerror = 1, // error S_YYerror = 1, // error
@ -895,105 +897,106 @@ namespace xsk { namespace gsc { namespace s4 {
S_MUL = 88, // "*" S_MUL = 88, // "*"
S_DIV = 89, // "/" S_DIV = 89, // "/"
S_MOD = 90, // "%" S_MOD = 90, // "%"
S_PATH = 91, // "path" S_FIELD = 91, // "field"
S_IDENTIFIER = 92, // "identifier" S_PATH = 92, // "path"
S_STRING = 93, // "string literal" S_IDENTIFIER = 93, // "identifier"
S_ISTRING = 94, // "localized string" S_STRING = 94, // "string literal"
S_COLOR = 95, // "color" S_ISTRING = 95, // "localized string"
S_FLOAT = 96, // "float" S_COLOR = 96, // "color"
S_INTEGER = 97, // "integer" S_FLOAT = 97, // "float"
S_ADD_ARRAY = 98, // ADD_ARRAY S_INTEGER = 98, // "integer"
S_THEN = 99, // THEN S_ADD_ARRAY = 99, // ADD_ARRAY
S_TERN = 100, // TERN S_THEN = 100, // THEN
S_NEG = 101, // NEG S_TERN = 101, // TERN
S_ANIMREF = 102, // ANIMREF S_NEG = 102, // NEG
S_PREINC = 103, // PREINC S_ANIMREF = 103, // ANIMREF
S_PREDEC = 104, // PREDEC S_PREINC = 104, // PREINC
S_POSTINC = 105, // POSTINC S_PREDEC = 105, // PREDEC
S_POSTDEC = 106, // POSTDEC S_POSTINC = 106, // POSTINC
S_YYACCEPT = 107, // $accept S_POSTDEC = 107, // POSTDEC
S_root = 108, // root S_YYACCEPT = 108, // $accept
S_program = 109, // program S_root = 109, // root
S_inline = 110, // inline S_program = 110, // program
S_include = 111, // include S_inline = 111, // inline
S_declaration = 112, // declaration S_include = 112, // include
S_decl_usingtree = 113, // decl_usingtree S_declaration = 113, // declaration
S_decl_constant = 114, // decl_constant S_decl_usingtree = 114, // decl_usingtree
S_decl_thread = 115, // decl_thread S_decl_constant = 115, // decl_constant
S_stmt = 116, // stmt S_decl_thread = 116, // decl_thread
S_stmt_dev = 117, // stmt_dev S_stmt = 117, // stmt
S_stmt_block = 118, // stmt_block S_stmt_dev = 118, // stmt_dev
S_stmt_list = 119, // stmt_list S_stmt_block = 119, // stmt_block
S_stmt_expr = 120, // stmt_expr S_stmt_list = 120, // stmt_list
S_stmt_call = 121, // stmt_call S_stmt_expr = 121, // stmt_expr
S_stmt_assign = 122, // stmt_assign S_stmt_call = 122, // stmt_call
S_stmt_endon = 123, // stmt_endon S_stmt_assign = 123, // stmt_assign
S_stmt_notify = 124, // stmt_notify S_stmt_endon = 124, // stmt_endon
S_stmt_wait = 125, // stmt_wait S_stmt_notify = 125, // stmt_notify
S_stmt_waittill = 126, // stmt_waittill S_stmt_wait = 126, // stmt_wait
S_stmt_waittillmatch = 127, // stmt_waittillmatch S_stmt_waittill = 127, // stmt_waittill
S_stmt_waittillframeend = 128, // stmt_waittillframeend S_stmt_waittillmatch = 128, // stmt_waittillmatch
S_stmt_waitframe = 129, // stmt_waitframe S_stmt_waittillframeend = 129, // stmt_waittillframeend
S_stmt_if = 130, // stmt_if S_stmt_waitframe = 130, // stmt_waitframe
S_stmt_ifelse = 131, // stmt_ifelse S_stmt_if = 131, // stmt_if
S_stmt_while = 132, // stmt_while S_stmt_ifelse = 132, // stmt_ifelse
S_stmt_dowhile = 133, // stmt_dowhile S_stmt_while = 133, // stmt_while
S_stmt_for = 134, // stmt_for S_stmt_dowhile = 134, // stmt_dowhile
S_stmt_foreach = 135, // stmt_foreach S_stmt_for = 135, // stmt_for
S_stmt_switch = 136, // stmt_switch S_stmt_foreach = 136, // stmt_foreach
S_stmt_case = 137, // stmt_case S_stmt_switch = 137, // stmt_switch
S_stmt_default = 138, // stmt_default S_stmt_case = 138, // stmt_case
S_stmt_break = 139, // stmt_break S_stmt_default = 139, // stmt_default
S_stmt_continue = 140, // stmt_continue S_stmt_break = 140, // stmt_break
S_stmt_return = 141, // stmt_return S_stmt_continue = 141, // stmt_continue
S_stmt_breakpoint = 142, // stmt_breakpoint S_stmt_return = 142, // stmt_return
S_stmt_prof_begin = 143, // stmt_prof_begin S_stmt_breakpoint = 143, // stmt_breakpoint
S_stmt_prof_end = 144, // stmt_prof_end S_stmt_prof_begin = 144, // stmt_prof_begin
S_expr = 145, // expr S_stmt_prof_end = 145, // stmt_prof_end
S_expr_or_empty = 146, // expr_or_empty S_expr = 146, // expr
S_expr_assign = 147, // expr_assign S_expr_or_empty = 147, // expr_or_empty
S_expr_increment = 148, // expr_increment S_expr_assign = 148, // expr_assign
S_expr_decrement = 149, // expr_decrement S_expr_increment = 149, // expr_increment
S_expr_ternary = 150, // expr_ternary S_expr_decrement = 150, // expr_decrement
S_expr_binary = 151, // expr_binary S_expr_ternary = 151, // expr_ternary
S_expr_primitive = 152, // expr_primitive S_expr_binary = 152, // expr_binary
S_expr_complement = 153, // expr_complement S_expr_primitive = 153, // expr_primitive
S_expr_not = 154, // expr_not S_expr_complement = 154, // expr_complement
S_expr_call = 155, // expr_call S_expr_not = 155, // expr_not
S_expr_method = 156, // expr_method S_expr_call = 156, // expr_call
S_expr_function = 157, // expr_function S_expr_method = 157, // expr_method
S_expr_pointer = 158, // expr_pointer S_expr_function = 158, // expr_function
S_expr_add_array = 159, // expr_add_array S_expr_pointer = 159, // expr_pointer
S_expr_parameters = 160, // expr_parameters S_expr_add_array = 160, // expr_add_array
S_expr_arguments = 161, // expr_arguments S_expr_parameters = 161, // expr_parameters
S_expr_arguments_no_empty = 162, // expr_arguments_no_empty S_expr_arguments = 162, // expr_arguments
S_expr_isdefined = 163, // expr_isdefined S_expr_arguments_no_empty = 163, // expr_arguments_no_empty
S_expr_istrue = 164, // expr_istrue S_expr_isdefined = 164, // expr_isdefined
S_expr_reference = 165, // expr_reference S_expr_istrue = 165, // expr_istrue
S_expr_array = 166, // expr_array S_expr_reference = 166, // expr_reference
S_expr_field = 167, // expr_field S_expr_array = 167, // expr_array
S_expr_size = 168, // expr_size S_expr_field = 168, // expr_field
S_expr_paren = 169, // expr_paren S_expr_size = 169, // expr_size
S_expr_object = 170, // expr_object S_expr_paren = 170, // expr_paren
S_expr_thisthread = 171, // expr_thisthread S_expr_object = 171, // expr_object
S_expr_empty_array = 172, // expr_empty_array S_expr_thisthread = 172, // expr_thisthread
S_expr_undefined = 173, // expr_undefined S_expr_empty_array = 173, // expr_empty_array
S_expr_game = 174, // expr_game S_expr_undefined = 174, // expr_undefined
S_expr_self = 175, // expr_self S_expr_game = 175, // expr_game
S_expr_anim = 176, // expr_anim S_expr_self = 176, // expr_self
S_expr_level = 177, // expr_level S_expr_anim = 177, // expr_anim
S_expr_animation = 178, // expr_animation S_expr_level = 178, // expr_level
S_expr_animtree = 179, // expr_animtree S_expr_animation = 179, // expr_animation
S_expr_identifier = 180, // expr_identifier S_expr_animtree = 180, // expr_animtree
S_expr_path = 181, // expr_path S_expr_identifier = 181, // expr_identifier
S_expr_istring = 182, // expr_istring S_expr_path = 182, // expr_path
S_expr_string = 183, // expr_string S_expr_istring = 183, // expr_istring
S_expr_color = 184, // expr_color S_expr_string = 184, // expr_string
S_expr_vector = 185, // expr_vector S_expr_color = 185, // expr_color
S_expr_float = 186, // expr_float S_expr_vector = 186, // expr_vector
S_expr_integer = 187, // expr_integer S_expr_float = 187, // expr_float
S_expr_false = 188, // expr_false S_expr_integer = 188, // expr_integer
S_expr_true = 189 // expr_true S_expr_false = 189, // expr_false
S_expr_true = 190 // expr_true
}; };
}; };
@ -1314,6 +1317,7 @@ namespace xsk { namespace gsc { namespace s4 {
value.move< ast::stmt_while::ptr > (std::move (that.value)); value.move< ast::stmt_while::ptr > (std::move (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2619,6 +2623,7 @@ switch (yykind)
value.template destroy< ast::stmt_while::ptr > (); value.template destroy< ast::stmt_while::ptr > ();
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -2735,7 +2740,7 @@ switch (yykind)
: super_type(token_type (tok), v, l) : super_type(token_type (tok), v, l)
#endif #endif
{ {
S4_ASSERT ((token::PATH <= tok && tok <= token::INTEGER)); S4_ASSERT ((token::FIELD <= tok && tok <= token::INTEGER));
} }
}; };
@ -4150,6 +4155,21 @@ switch (yykind)
return symbol_type (token::MOD, l); return symbol_type (token::MOD, l);
} }
#endif #endif
#if 201103L <= YY_CPLUSPLUS
static
symbol_type
make_FIELD (std::string v, location_type l)
{
return symbol_type (token::FIELD, std::move (v), std::move (l));
}
#else
static
symbol_type
make_FIELD (const std::string& v, const location_type& l)
{
return symbol_type (token::FIELD, v, l);
}
#endif
#if 201103L <= YY_CPLUSPLUS #if 201103L <= YY_CPLUSPLUS
static static
symbol_type symbol_type
@ -4734,7 +4754,7 @@ switch (yykind)
/// Constants. /// Constants.
enum enum
{ {
yylast_ = 2336, ///< Last index in yytable_. yylast_ = 2425, ///< Last index in yytable_.
yynnts_ = 83, ///< Number of nonterminal symbols. yynnts_ = 83, ///< Number of nonterminal symbols.
yyfinal_ = 21 ///< Termination state number. yyfinal_ = 21 ///< Termination state number.
}; };
@ -5046,6 +5066,7 @@ switch (yykind)
value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value)); value.copy< ast::stmt_while::ptr > (YY_MOVE (that.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5369,6 +5390,7 @@ switch (yykind)
value.move< ast::stmt_while::ptr > (YY_MOVE (s.value)); value.move< ast::stmt_while::ptr > (YY_MOVE (s.value));
break; break;
case symbol_kind::S_FIELD: // "field"
case symbol_kind::S_PATH: // "path" case symbol_kind::S_PATH: // "path"
case symbol_kind::S_IDENTIFIER: // "identifier" case symbol_kind::S_IDENTIFIER: // "identifier"
case symbol_kind::S_STRING: // "string literal" case symbol_kind::S_STRING: // "string literal"
@ -5442,7 +5464,7 @@ switch (yykind)
#line 13 "parser.ypp" #line 13 "parser.ypp"
} } } // xsk::gsc::s4 } } } // xsk::gsc::s4
#line 5446 "parser.hpp" #line 5468 "parser.hpp"