diff --git a/gen/h1/parser.ypp b/gen/h1/parser.ypp index 7083d3f8..a7aa26ec 100644 --- a/gen/h1/parser.ypp +++ b/gen/h1/parser.ypp @@ -41,14 +41,14 @@ using namespace xsk::gsc; xsk::gsc::h1::parser::symbol_type H1lex(xsk::gsc::h1::lexer& lexer); } -%token HSDEFINE "#define" -%token HSUNDEF "#undef" -%token HSIFDEF "#ifdef" -%token HSIFNDEF "#ifndef" -%token HSIF "#if" -%token HSELIF "#elif" -%token HSELSE "#else" -%token HSENDIF "#endif" +%token SH_DEFINE "#define" +%token SH_UNDEF "#undef" +%token SH_IFDEF "#ifdef" +%token SH_IFNDEF "#ifndef" +%token SH_IF "#if" +%token SH_ELIF "#elif" +%token SH_ELSE "#else" +%token SH_ENDIF "#endif" %token DEVBEGIN "/#" %token DEVEND "#/" %token INLINE "#inline" diff --git a/gen/h2/parser.ypp b/gen/h2/parser.ypp index 32b5ef5c..000cbabf 100644 --- a/gen/h2/parser.ypp +++ b/gen/h2/parser.ypp @@ -39,14 +39,14 @@ using namespace xsk::gsc; xsk::gsc::h2::parser::symbol_type H2lex(xsk::gsc::h2::lexer& lexer); } -%token HSDEFINE "#define" -%token HSUNDEF "#undef" -%token HSIFDEF "#ifdef" -%token HSIFNDEF "#ifndef" -%token HSIF "#if" -%token HSELIF "#elif" -%token HSELSE "#else" -%token HSENDIF "#endif" +%token SH_DEFINE "#define" +%token SH_UNDEF "#undef" +%token SH_IFDEF "#ifdef" +%token SH_IFNDEF "#ifndef" +%token SH_IF "#if" +%token SH_ELIF "#elif" +%token SH_ELSE "#else" +%token SH_ENDIF "#endif" %token DEVBEGIN "/#" %token DEVEND "#/" %token INLINE "#inline" diff --git a/gen/iw5/parser.ypp b/gen/iw5/parser.ypp index acc6f2ad..974baf69 100644 --- a/gen/iw5/parser.ypp +++ b/gen/iw5/parser.ypp @@ -39,14 +39,14 @@ using namespace xsk::gsc; xsk::gsc::iw5::parser::symbol_type IW5lex(xsk::gsc::iw5::lexer& lexer); } -%token HSDEFINE "#define" -%token HSUNDEF "#undef" -%token HSIFDEF "#ifdef" -%token HSIFNDEF "#ifndef" -%token HSIF "#if" -%token HSELIF "#elif" -%token HSELSE "#else" -%token HSENDIF "#endif" +%token SH_DEFINE "#define" +%token SH_UNDEF "#undef" +%token SH_IFDEF "#ifdef" +%token SH_IFNDEF "#ifndef" +%token SH_IF "#if" +%token SH_ELIF "#elif" +%token SH_ELSE "#else" +%token SH_ENDIF "#endif" %token DEVBEGIN "/#" %token DEVEND "#/" %token INLINE "#inline" diff --git a/gen/iw6/parser.ypp b/gen/iw6/parser.ypp index 9af9fcde..99a39f5b 100644 --- a/gen/iw6/parser.ypp +++ b/gen/iw6/parser.ypp @@ -39,14 +39,14 @@ using namespace xsk::gsc; xsk::gsc::iw6::parser::symbol_type IW6lex(xsk::gsc::iw6::lexer& lexer); } -%token HSDEFINE "#define" -%token HSUNDEF "#undef" -%token HSIFDEF "#ifdef" -%token HSIFNDEF "#ifndef" -%token HSIF "#if" -%token HSELIF "#elif" -%token HSELSE "#else" -%token HSENDIF "#endif" +%token SH_DEFINE "#define" +%token SH_UNDEF "#undef" +%token SH_IFDEF "#ifdef" +%token SH_IFNDEF "#ifndef" +%token SH_IF "#if" +%token SH_ELIF "#elif" +%token SH_ELSE "#else" +%token SH_ENDIF "#endif" %token DEVBEGIN "/#" %token DEVEND "#/" %token INLINE "#inline" diff --git a/gen/iw7/parser.ypp b/gen/iw7/parser.ypp index 03e7c60c..5220be26 100644 --- a/gen/iw7/parser.ypp +++ b/gen/iw7/parser.ypp @@ -39,14 +39,14 @@ using namespace xsk::gsc; xsk::gsc::iw7::parser::symbol_type IW7lex(xsk::gsc::iw7::lexer& lexer); } -%token HSDEFINE "#define" -%token HSUNDEF "#undef" -%token HSIFDEF "#ifdef" -%token HSIFNDEF "#ifndef" -%token HSIF "#if" -%token HSELIF "#elif" -%token HSELSE "#else" -%token HSENDIF "#endif" +%token SH_DEFINE "#define" +%token SH_UNDEF "#undef" +%token SH_IFDEF "#ifdef" +%token SH_IFNDEF "#ifndef" +%token SH_IF "#if" +%token SH_ELIF "#elif" +%token SH_ELSE "#else" +%token SH_ENDIF "#endif" %token DEVBEGIN "/#" %token DEVEND "#/" %token INLINE "#inline" diff --git a/gen/iw8/parser.ypp b/gen/iw8/parser.ypp index 2c516ab1..a33fc5ef 100644 --- a/gen/iw8/parser.ypp +++ b/gen/iw8/parser.ypp @@ -39,14 +39,14 @@ using namespace xsk::gsc; xsk::gsc::iw8::parser::symbol_type IW8lex(xsk::gsc::iw8::lexer& lexer); } -%token HSDEFINE "#define" -%token HSUNDEF "#undef" -%token HSIFDEF "#ifdef" -%token HSIFNDEF "#ifndef" -%token HSIF "#if" -%token HSELIF "#elif" -%token HSELSE "#else" -%token HSENDIF "#endif" +%token SH_DEFINE "#define" +%token SH_UNDEF "#undef" +%token SH_IFDEF "#ifdef" +%token SH_IFNDEF "#ifndef" +%token SH_IF "#if" +%token SH_ELIF "#elif" +%token SH_ELSE "#else" +%token SH_ENDIF "#endif" %token DEVBEGIN "/#" %token DEVEND "#/" %token INLINE "#inline" diff --git a/gen/s1/parser.ypp b/gen/s1/parser.ypp index 0414d06f..413c0d54 100644 --- a/gen/s1/parser.ypp +++ b/gen/s1/parser.ypp @@ -39,14 +39,14 @@ using namespace xsk::gsc; xsk::gsc::s1::parser::symbol_type S1lex(xsk::gsc::s1::lexer& lexer); } -%token HSDEFINE "#define" -%token HSUNDEF "#undef" -%token HSIFDEF "#ifdef" -%token HSIFNDEF "#ifndef" -%token HSIF "#if" -%token HSELIF "#elif" -%token HSELSE "#else" -%token HSENDIF "#endif" +%token SH_DEFINE "#define" +%token SH_UNDEF "#undef" +%token SH_IFDEF "#ifdef" +%token SH_IFNDEF "#ifndef" +%token SH_IF "#if" +%token SH_ELIF "#elif" +%token SH_ELSE "#else" +%token SH_ENDIF "#endif" %token DEVBEGIN "/#" %token DEVEND "#/" %token INLINE "#inline" diff --git a/gen/s2/parser.ypp b/gen/s2/parser.ypp index 03458e59..4fbeb2ab 100644 --- a/gen/s2/parser.ypp +++ b/gen/s2/parser.ypp @@ -39,14 +39,14 @@ using namespace xsk::gsc; xsk::gsc::s2::parser::symbol_type S2lex(xsk::gsc::s2::lexer& lexer); } -%token HSDEFINE "#define" -%token HSUNDEF "#undef" -%token HSIFDEF "#ifdef" -%token HSIFNDEF "#ifndef" -%token HSIF "#if" -%token HSELIF "#elif" -%token HSELSE "#else" -%token HSENDIF "#endif" +%token SH_DEFINE "#define" +%token SH_UNDEF "#undef" +%token SH_IFDEF "#ifdef" +%token SH_IFNDEF "#ifndef" +%token SH_IF "#if" +%token SH_ELIF "#elif" +%token SH_ELSE "#else" +%token SH_ENDIF "#endif" %token DEVBEGIN "/#" %token DEVEND "#/" %token INLINE "#inline" diff --git a/gen/s4/parser.ypp b/gen/s4/parser.ypp index 9725a61b..e6a63ab7 100644 --- a/gen/s4/parser.ypp +++ b/gen/s4/parser.ypp @@ -39,14 +39,14 @@ using namespace xsk::gsc; xsk::gsc::s4::parser::symbol_type S4lex(xsk::gsc::s4::lexer& lexer); } -%token HSDEFINE "#define" -%token HSUNDEF "#undef" -%token HSIFDEF "#ifdef" -%token HSIFNDEF "#ifndef" -%token HSIF "#if" -%token HSELIF "#elif" -%token HSELSE "#else" -%token HSENDIF "#endif" +%token SH_DEFINE "#define" +%token SH_UNDEF "#undef" +%token SH_IFDEF "#ifdef" +%token SH_IFNDEF "#ifndef" +%token SH_IF "#if" +%token SH_ELIF "#elif" +%token SH_ELSE "#else" +%token SH_ENDIF "#endif" %token DEVBEGIN "/#" %token DEVEND "#/" %token INLINE "#inline" diff --git a/src/h1/xsk/lexer.cpp b/src/h1/xsk/lexer.cpp index 5f9018aa..ca0a4e84 100644 --- a/src/h1/xsk/lexer.cpp +++ b/src/h1/xsk/lexer.cpp @@ -16,6 +16,56 @@ xsk::gsc::h1::parser::symbol_type H1lex(xsk::gsc::h1::lexer& lexer) namespace xsk::gsc::h1 { +const std::unordered_map keyword_map +{{ + { "#define", parser::token::SH_DEFINE }, + { "#undef", parser::token::SH_UNDEF }, + { "#ifdef", parser::token::SH_IFDEF }, + { "#ifndef", parser::token::SH_IFNDEF }, + { "#if", parser::token::SH_IF }, + { "#elif", parser::token::SH_ELIF }, + { "#else", parser::token::SH_ELSE }, + { "#endif", parser::token::SH_ENDIF }, + { "#inline", parser::token::INLINE }, + { "#include", parser::token::INCLUDE }, + { "#using_animtree", parser::token::USINGTREE }, + { "#animtree", parser::token::ANIMTREE }, + { "endon", parser::token::ENDON }, + { "notify", parser::token::NOTIFY }, + { "wait", parser::token::WAIT }, + { "waittill", parser::token::WAITTILL }, + { "waittillmatch", parser::token::WAITTILLMATCH }, + { "waittillframeend", parser::token::WAITTILLFRAMEEND }, + { "waitframe", parser::token::WAITFRAME }, + { "if", parser::token::IF }, + { "else", parser::token::ELSE }, + { "do", parser::token::DO }, + { "while", parser::token::WHILE }, + { "for", parser::token::FOR }, + { "foreach", parser::token::FOREACH }, + { "in", parser::token::IN }, + { "switch", parser::token::SWITCH }, + { "case", parser::token::CASE }, + { "default", parser::token::DEFAULT }, + { "break", parser::token::BREAK }, + { "continue", parser::token::CONTINUE }, + { "return", parser::token::RETURN }, + { "breakpoint", parser::token::BREAKPOINT }, + { "prof_begin", parser::token::PROFBEGIN }, + { "prof_end", parser::token::PROFEND }, + { "thread", parser::token::THREAD }, + { "childthread", parser::token::CHILDTHREAD }, + { "thisthread", parser::token::THISTHREAD }, + { "call", parser::token::CALL }, + { "true", parser::token::TRUE }, + { "false", parser::token::FALSE }, + { "undefined", parser::token::UNDEFINED }, + { "game", parser::token::GAME }, + { "self", parser::token::SELF }, + { "anim", parser::token::ANIM }, + { "level", parser::token::LEVEL }, +}}; + buffer::buffer() : length(0) { data = static_cast(std::malloc(max_buf_size)); @@ -35,10 +85,8 @@ bool buffer::push(char c) return true; } -reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) -{ - -} +reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), + last_byte(0), current_byte(0) { } void reader::init(const char* data, size_t size) { @@ -78,7 +126,7 @@ void reader::advance() } } -lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), +lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)), mode_(build::dev), header_top_(0), locs_(std::stack()), readers_(std::stack()) { reader_.init(data, size); @@ -97,6 +145,7 @@ void lexer::push_header(const std::string& file) locs_.push(loc_); loc_.initialize(std::get<0>(data)); reader_.init(std::get<1>(data), std::get<2>(data)); + clean_ = true; } catch (const std::exception& e) { @@ -145,7 +194,10 @@ auto lexer::lex() -> parser::symbol_type return parser::make_H1EOF(loc_); } - reader_.advance(); + if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n') + clean_ = false; + + advance(); switch (last) { @@ -157,12 +209,15 @@ auto lexer::lex() -> parser::symbol_type case '\n': loc_.lines(); loc_.step(); + clean_ = true; continue; + case '\\': + throw comp_error(loc_, "invalid token ('\\')"); case '/': if (curr != '/' && curr != '*' && curr != '#' && curr != '=') return parser::make_DIV(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_DIV(loc_); @@ -224,7 +279,30 @@ auto lexer::lex() -> parser::symbol_type { while (true) { - if (state == reader::end || curr == '\n') + if (state == reader::end) + break; + + if (last == '\\' && curr == '\r' || curr == '\n') + { + reader_.advance(); + + if (state == reader::end) + break; + + if (last == '\r') + { + if (curr != '\n') + throw comp_error(loc_, "invalid token ('\')"); + + reader_.advance(); + } + + loc_.lines(); + loc_.step(); + continue; + } + + if (curr == '\n') break; reader_.advance(); @@ -238,15 +316,23 @@ auto lexer::lex() -> parser::symbol_type throw comp_error(loc_, "unmatched devblock end ('#/')"); indev_ = false; - reader_.advance(); + advance(); return parser::make_DEVEND(loc_); } buffer_.push(last); - reader_.advance(); + advance(); + + while (state == reader::ok) + { + if (last != ' ' || last != '\t') + break; + + advance(); + } if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) - throw comp_error(loc_, "unterminated preprocessor directive ('#')"); + throw comp_error(loc_, "invalid preprocessor directive ('#')"); state_ = state::preprocessor; goto lex_name; @@ -254,7 +340,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '/' && curr != '=') return parser::make_MUL(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_MUL(loc_); @@ -264,9 +350,9 @@ auto lexer::lex() -> parser::symbol_type state_ = state::string; goto lex_string; case '.': - reader_.advance(); + advance(); - if(state == reader::end) + if (state == reader::end) throw comp_error(loc_, "unterminated field ('.')"); state_ = state::field; @@ -291,7 +377,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != ':') return parser::make_COLON(loc_); - reader_.advance(); + advance(); return parser::make_DOUBLECOLON(loc_); case '?': return parser::make_QMARK(loc_); @@ -299,13 +385,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_ASSIGN(loc_); - reader_.advance(); + advance(); return parser::make_EQUALITY(loc_); case '+': if (curr != '+' && curr != '=') return parser::make_ADD(loc_); - reader_.advance(); + advance(); if (last == '+') return parser::make_INCREMENT(loc_); @@ -315,7 +401,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '-' && curr != '=') return parser::make_SUB(loc_); - reader_.advance(); + advance(); if (last == '-') return parser::make_DECREMENT(loc_); @@ -325,24 +411,24 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_MOD(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_MOD(loc_); case '|': if (curr != '|' && curr != '=') return parser::make_BITWISE_OR(loc_); - reader_.advance(); + advance(); if (last == '|') return parser::make_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_); case '&': - if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') + if (curr != '&' && curr != '=' && curr != '"') return parser::make_BITWISE_AND(loc_); - reader_.advance(); + advance(); if (last == '&') return parser::make_AND(loc_); @@ -356,13 +442,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_BITWISE_EXOR(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_BW_EXOR(loc_); case '!': if (curr != '=') return parser::make_NOT(loc_); - reader_.advance(); + advance(); return parser::make_INEQUALITY(loc_); case '~': return parser::make_COMPLEMENT(loc_); @@ -370,20 +456,20 @@ auto lexer::lex() -> parser::symbol_type if (curr != '<' && curr != '=') return parser::make_LESS(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_LESS_EQUAL(loc_); if (curr != '=') return parser::make_LSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_LSHIFT(loc_); case '>': if (curr != '>' && curr != '=') return parser::make_GREATER(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_GREATER_EQUAL(loc_); @@ -391,7 +477,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_RSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_RSHIFT(loc_); default: lex_name_or_number: @@ -404,24 +490,27 @@ lex_name_or_number: } lex_string: - if (state == reader::end) - throw comp_error(loc_, "unmatched string start ('\"')"); - - reader_.advance(); - while (true) { - if (last == '"') - break; - - if (last == '\n') - throw comp_error(loc_, "unterminated string literal"); - if (state == reader::end) throw comp_error(loc_, "unmatched string start ('\"')"); - if (last == '\\') + if (curr == '"') { + advance(); + break; + } + + if (curr == '\n') + throw comp_error(loc_, "unterminated string literal"); + + if (curr == '\\') + { + advance(); + + if (state == reader::end) + throw comp_error(loc_, "invalid token ('\')"); + char c = curr; switch (curr) { @@ -435,13 +524,11 @@ lex_string: if (!buffer_.push(c)) throw comp_error(loc_, "max string size exceeded"); - - reader_.advance(); } - else if (!buffer_.push(last)) + else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if (state_ == state::localize) @@ -472,7 +559,7 @@ lex_name: else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if(state_ == state::field) @@ -491,24 +578,23 @@ lex_name: { if (path) throw comp_error(loc_, "invalid preprocessor directive"); - + auto token = parser::token::H1UNDEF; if (buffer_.length < 16) { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) { - if (itr->second > parser::token::HSENDIF) + if (itr->second > parser::token::SH_ENDIF) return parser::symbol_type(itr->second, loc_); - + token = itr->second; } } - // TODO: call preprocessor(token); - throw comp_error(loc_, "unknown preprocessor directive"); + preprocessor(token); state_ = state::start; continue; } @@ -518,7 +604,7 @@ lex_name: { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) return parser::symbol_type(itr->second, loc_); } @@ -557,7 +643,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -571,7 +657,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'') @@ -587,7 +673,7 @@ lex_number: } else if (curr == 'o') { - reader_.advance(); + advance(); while (true) { @@ -599,7 +685,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -609,7 +695,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length <= 0) @@ -621,7 +707,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -633,7 +719,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -643,7 +729,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -655,7 +741,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -667,7 +753,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -677,7 +763,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -689,54 +775,89 @@ lex_number: } } -const std::unordered_map lexer::keyword_map -{{ - { "#define", parser::token::HSDEFINE }, - { "#undef", parser::token::HSUNDEF }, - { "#ifdef", parser::token::HSIFDEF }, - { "#ifndef", parser::token::HSIFNDEF }, - { "#if", parser::token::HSIF }, - { "#elif", parser::token::HSELIF }, - { "#else", parser::token::HSELSE }, - { "#endif", parser::token::HSENDIF }, - { "#inline", parser::token::INLINE }, - { "#include", parser::token::INCLUDE }, - { "#using_animtree", parser::token::USINGTREE }, - { "#animtree", parser::token::ANIMTREE }, - { "endon", parser::token::ENDON }, - { "notify", parser::token::NOTIFY }, - { "wait", parser::token::WAIT }, - { "waittill", parser::token::WAITTILL }, - { "waittillmatch", parser::token::WAITTILLMATCH }, - { "waittillframeend", parser::token::WAITTILLFRAMEEND }, - { "waitframe", parser::token::WAITFRAME }, - { "if", parser::token::IF }, - { "else", parser::token::ELSE }, - { "do", parser::token::DO }, - { "while", parser::token::WHILE }, - { "for", parser::token::FOR }, - { "foreach", parser::token::FOREACH }, - { "in", parser::token::IN }, - { "switch", parser::token::SWITCH }, - { "case", parser::token::CASE }, - { "default", parser::token::DEFAULT }, - { "break", parser::token::BREAK }, - { "continue", parser::token::CONTINUE }, - { "return", parser::token::RETURN }, - { "breakpoint", parser::token::BREAKPOINT }, - { "prof_begin", parser::token::PROFBEGIN }, - { "prof_end", parser::token::PROFEND }, - { "thread", parser::token::THREAD }, - { "childthread", parser::token::CHILDTHREAD }, - { "thisthread", parser::token::THISTHREAD }, - { "call", parser::token::CALL }, - { "true", parser::token::TRUE }, - { "false", parser::token::FALSE }, - { "undefined", parser::token::UNDEFINED }, - { "game", parser::token::GAME }, - { "self", parser::token::SELF }, - { "anim", parser::token::ANIM }, - { "level", parser::token::LEVEL }, -}}; +void lexer::advance() +{ + reader_.advance(); + + // dont wrap comment marks '/\/' '/\*' outside strings + if (state_ == state::start && reader_.last_byte == '/') + return; + + while (reader_.current_byte == '\\') + { + if (reader_.bytes_remaining == 1) + throw comp_error(loc_, "invalid token ('\\')"); + + if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n') + break; + + if (reader_.buffer_pos[1] == '\r') + { + if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n') + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 3; + reader_.bytes_remaining -= 3; + } + + if ((reader_.buffer_pos[1] == '\n')) + { + if (reader_.bytes_remaining == 2) + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 2; + reader_.bytes_remaining -= 2; + } + + if (reader_.bytes_remaining == 0) + { + reader_.state = reader::end; + reader_.current_byte = 0; + } + else + { + reader_.current_byte = *reader_.buffer_pos; + } + + loc_.lines(); + loc_.step(); + } +} + +void lexer::preprocessor(parser::token::token_kind_type token) +{ + if (!clean_) + throw comp_error(loc_, "invalid token ('#')"); + + switch (token) + { + case parser::token::SH_DEFINE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_UNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELSE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ENDIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + default: + throw comp_error(loc_, "unknown preprocessor directive"); + } +} } // namespace xsk::gsc::h1 diff --git a/src/h1/xsk/lexer.hpp b/src/h1/xsk/lexer.hpp index d31fddef..ddf472da 100644 --- a/src/h1/xsk/lexer.hpp +++ b/src/h1/xsk/lexer.hpp @@ -5,8 +5,6 @@ #pragma once -#include "h1.hpp" - namespace xsk::gsc::h1 { @@ -57,6 +55,7 @@ class lexer state state_; build mode_; bool indev_; + bool clean_; public: lexer(const std::string& name, const char* data, size_t size); @@ -66,7 +65,8 @@ public: void ban_header(const location& loc); private: - static const std::unordered_map keyword_map; + void advance(); + void preprocessor(parser::token::token_kind_type token); }; } // namespace xsk::gsc::h1 diff --git a/src/h1/xsk/parser.hpp b/src/h1/xsk/parser.hpp index b291f270..d6e34997 100644 --- a/src/h1/xsk/parser.hpp +++ b/src/h1/xsk/parser.hpp @@ -679,14 +679,14 @@ namespace xsk { namespace gsc { namespace h1 { H1EOF = 0, // "end of file" H1error = 1, // error H1UNDEF = 2, // "invalid token" - HSDEFINE = 3, // "#define" - HSUNDEF = 4, // "#undef" - HSIFDEF = 5, // "#ifdef" - HSIFNDEF = 6, // "#ifndef" - HSIF = 7, // "#if" - HSELIF = 8, // "#elif" - HSELSE = 9, // "#else" - HSENDIF = 10, // "#endif" + SH_DEFINE = 3, // "#define" + SH_UNDEF = 4, // "#undef" + SH_IFDEF = 5, // "#ifdef" + SH_IFNDEF = 6, // "#ifndef" + SH_IF = 7, // "#if" + SH_ELIF = 8, // "#elif" + SH_ELSE = 9, // "#else" + SH_ENDIF = 10, // "#endif" DEVBEGIN = 11, // "/#" DEVEND = 12, // "#/" INLINE = 13, // "#inline" @@ -811,14 +811,14 @@ namespace xsk { namespace gsc { namespace h1 { S_YYEOF = 0, // "end of file" S_YYerror = 1, // error S_YYUNDEF = 2, // "invalid token" - S_HSDEFINE = 3, // "#define" - S_HSUNDEF = 4, // "#undef" - S_HSIFDEF = 5, // "#ifdef" - S_HSIFNDEF = 6, // "#ifndef" - S_HSIF = 7, // "#if" - S_HSELIF = 8, // "#elif" - S_HSELSE = 9, // "#else" - S_HSENDIF = 10, // "#endif" + S_SH_DEFINE = 3, // "#define" + S_SH_UNDEF = 4, // "#undef" + S_SH_IFDEF = 5, // "#ifdef" + S_SH_IFNDEF = 6, // "#ifndef" + S_SH_IF = 7, // "#if" + S_SH_ELIF = 8, // "#elif" + S_SH_ELSE = 9, // "#else" + S_SH_ENDIF = 10, // "#endif" S_DEVBEGIN = 11, // "/#" S_DEVEND = 12, // "#/" S_INLINE = 13, // "#inline" @@ -2800,121 +2800,121 @@ switch (yykind) #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSDEFINE (location_type l) + make_SH_DEFINE (location_type l) { - return symbol_type (token::HSDEFINE, std::move (l)); + return symbol_type (token::SH_DEFINE, std::move (l)); } #else static symbol_type - make_HSDEFINE (const location_type& l) + make_SH_DEFINE (const location_type& l) { - return symbol_type (token::HSDEFINE, l); + return symbol_type (token::SH_DEFINE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSUNDEF (location_type l) + make_SH_UNDEF (location_type l) { - return symbol_type (token::HSUNDEF, std::move (l)); + return symbol_type (token::SH_UNDEF, std::move (l)); } #else static symbol_type - make_HSUNDEF (const location_type& l) + make_SH_UNDEF (const location_type& l) { - return symbol_type (token::HSUNDEF, l); + return symbol_type (token::SH_UNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFDEF (location_type l) + make_SH_IFDEF (location_type l) { - return symbol_type (token::HSIFDEF, std::move (l)); + return symbol_type (token::SH_IFDEF, std::move (l)); } #else static symbol_type - make_HSIFDEF (const location_type& l) + make_SH_IFDEF (const location_type& l) { - return symbol_type (token::HSIFDEF, l); + return symbol_type (token::SH_IFDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFNDEF (location_type l) + make_SH_IFNDEF (location_type l) { - return symbol_type (token::HSIFNDEF, std::move (l)); + return symbol_type (token::SH_IFNDEF, std::move (l)); } #else static symbol_type - make_HSIFNDEF (const location_type& l) + make_SH_IFNDEF (const location_type& l) { - return symbol_type (token::HSIFNDEF, l); + return symbol_type (token::SH_IFNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIF (location_type l) + make_SH_IF (location_type l) { - return symbol_type (token::HSIF, std::move (l)); + return symbol_type (token::SH_IF, std::move (l)); } #else static symbol_type - make_HSIF (const location_type& l) + make_SH_IF (const location_type& l) { - return symbol_type (token::HSIF, l); + return symbol_type (token::SH_IF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELIF (location_type l) + make_SH_ELIF (location_type l) { - return symbol_type (token::HSELIF, std::move (l)); + return symbol_type (token::SH_ELIF, std::move (l)); } #else static symbol_type - make_HSELIF (const location_type& l) + make_SH_ELIF (const location_type& l) { - return symbol_type (token::HSELIF, l); + return symbol_type (token::SH_ELIF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELSE (location_type l) + make_SH_ELSE (location_type l) { - return symbol_type (token::HSELSE, std::move (l)); + return symbol_type (token::SH_ELSE, std::move (l)); } #else static symbol_type - make_HSELSE (const location_type& l) + make_SH_ELSE (const location_type& l) { - return symbol_type (token::HSELSE, l); + return symbol_type (token::SH_ELSE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSENDIF (location_type l) + make_SH_ENDIF (location_type l) { - return symbol_type (token::HSENDIF, std::move (l)); + return symbol_type (token::SH_ENDIF, std::move (l)); } #else static symbol_type - make_HSENDIF (const location_type& l) + make_SH_ENDIF (const location_type& l) { - return symbol_type (token::HSENDIF, l); + return symbol_type (token::SH_ENDIF, l); } #endif #if 201103L <= YY_CPLUSPLUS diff --git a/src/h2/xsk/lexer.cpp b/src/h2/xsk/lexer.cpp index e71428f7..05b98a02 100644 --- a/src/h2/xsk/lexer.cpp +++ b/src/h2/xsk/lexer.cpp @@ -16,6 +16,56 @@ xsk::gsc::h2::parser::symbol_type H2lex(xsk::gsc::h2::lexer& lexer) namespace xsk::gsc::h2 { +const std::unordered_map keyword_map +{{ + { "#define", parser::token::SH_DEFINE }, + { "#undef", parser::token::SH_UNDEF }, + { "#ifdef", parser::token::SH_IFDEF }, + { "#ifndef", parser::token::SH_IFNDEF }, + { "#if", parser::token::SH_IF }, + { "#elif", parser::token::SH_ELIF }, + { "#else", parser::token::SH_ELSE }, + { "#endif", parser::token::SH_ENDIF }, + { "#inline", parser::token::INLINE }, + { "#include", parser::token::INCLUDE }, + { "#using_animtree", parser::token::USINGTREE }, + { "#animtree", parser::token::ANIMTREE }, + { "endon", parser::token::ENDON }, + { "notify", parser::token::NOTIFY }, + { "wait", parser::token::WAIT }, + { "waittill", parser::token::WAITTILL }, + { "waittillmatch", parser::token::WAITTILLMATCH }, + { "waittillframeend", parser::token::WAITTILLFRAMEEND }, + { "waitframe", parser::token::WAITFRAME }, + { "if", parser::token::IF }, + { "else", parser::token::ELSE }, + { "do", parser::token::DO }, + { "while", parser::token::WHILE }, + { "for", parser::token::FOR }, + { "foreach", parser::token::FOREACH }, + { "in", parser::token::IN }, + { "switch", parser::token::SWITCH }, + { "case", parser::token::CASE }, + { "default", parser::token::DEFAULT }, + { "break", parser::token::BREAK }, + { "continue", parser::token::CONTINUE }, + { "return", parser::token::RETURN }, + { "breakpoint", parser::token::BREAKPOINT }, + { "prof_begin", parser::token::PROFBEGIN }, + { "prof_end", parser::token::PROFEND }, + { "thread", parser::token::THREAD }, + { "childthread", parser::token::CHILDTHREAD }, + { "thisthread", parser::token::THISTHREAD }, + { "call", parser::token::CALL }, + { "true", parser::token::TRUE }, + { "false", parser::token::FALSE }, + { "undefined", parser::token::UNDEFINED }, + { "game", parser::token::GAME }, + { "self", parser::token::SELF }, + { "anim", parser::token::ANIM }, + { "level", parser::token::LEVEL }, +}}; + buffer::buffer() : length(0) { data = static_cast(std::malloc(max_buf_size)); @@ -35,10 +85,8 @@ bool buffer::push(char c) return true; } -reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) -{ - -} +reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), + last_byte(0), current_byte(0) { } void reader::init(const char* data, size_t size) { @@ -78,7 +126,7 @@ void reader::advance() } } -lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), +lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)), mode_(build::dev), header_top_(0), locs_(std::stack()), readers_(std::stack()) { reader_.init(data, size); @@ -97,6 +145,7 @@ void lexer::push_header(const std::string& file) locs_.push(loc_); loc_.initialize(std::get<0>(data)); reader_.init(std::get<1>(data), std::get<2>(data)); + clean_ = true; } catch (const std::exception& e) { @@ -145,7 +194,10 @@ auto lexer::lex() -> parser::symbol_type return parser::make_H2EOF(loc_); } - reader_.advance(); + if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n') + clean_ = false; + + advance(); switch (last) { @@ -157,12 +209,15 @@ auto lexer::lex() -> parser::symbol_type case '\n': loc_.lines(); loc_.step(); + clean_ = true; continue; + case '\\': + throw comp_error(loc_, "invalid token ('\\')"); case '/': if (curr != '/' && curr != '*' && curr != '#' && curr != '=') return parser::make_DIV(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_DIV(loc_); @@ -224,7 +279,30 @@ auto lexer::lex() -> parser::symbol_type { while (true) { - if (state == reader::end || curr == '\n') + if (state == reader::end) + break; + + if (last == '\\' && curr == '\r' || curr == '\n') + { + reader_.advance(); + + if (state == reader::end) + break; + + if (last == '\r') + { + if (curr != '\n') + throw comp_error(loc_, "invalid token ('\')"); + + reader_.advance(); + } + + loc_.lines(); + loc_.step(); + continue; + } + + if (curr == '\n') break; reader_.advance(); @@ -238,15 +316,23 @@ auto lexer::lex() -> parser::symbol_type throw comp_error(loc_, "unmatched devblock end ('#/')"); indev_ = false; - reader_.advance(); + advance(); return parser::make_DEVEND(loc_); } buffer_.push(last); - reader_.advance(); + advance(); + + while (state == reader::ok) + { + if (last != ' ' || last != '\t') + break; + + advance(); + } if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) - throw comp_error(loc_, "unterminated preprocessor directive ('#')"); + throw comp_error(loc_, "invalid preprocessor directive ('#')"); state_ = state::preprocessor; goto lex_name; @@ -254,7 +340,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '/' && curr != '=') return parser::make_MUL(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_MUL(loc_); @@ -264,9 +350,9 @@ auto lexer::lex() -> parser::symbol_type state_ = state::string; goto lex_string; case '.': - reader_.advance(); + advance(); - if(state == reader::end) + if (state == reader::end) throw comp_error(loc_, "unterminated field ('.')"); state_ = state::field; @@ -291,7 +377,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != ':') return parser::make_COLON(loc_); - reader_.advance(); + advance(); return parser::make_DOUBLECOLON(loc_); case '?': return parser::make_QMARK(loc_); @@ -299,13 +385,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_ASSIGN(loc_); - reader_.advance(); + advance(); return parser::make_EQUALITY(loc_); case '+': if (curr != '+' && curr != '=') return parser::make_ADD(loc_); - reader_.advance(); + advance(); if (last == '+') return parser::make_INCREMENT(loc_); @@ -315,7 +401,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '-' && curr != '=') return parser::make_SUB(loc_); - reader_.advance(); + advance(); if (last == '-') return parser::make_DECREMENT(loc_); @@ -325,24 +411,24 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_MOD(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_MOD(loc_); case '|': if (curr != '|' && curr != '=') return parser::make_BITWISE_OR(loc_); - reader_.advance(); + advance(); if (last == '|') return parser::make_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_); case '&': - if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') + if (curr != '&' && curr != '=' && curr != '"') return parser::make_BITWISE_AND(loc_); - reader_.advance(); + advance(); if (last == '&') return parser::make_AND(loc_); @@ -356,13 +442,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_BITWISE_EXOR(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_BW_EXOR(loc_); case '!': if (curr != '=') return parser::make_NOT(loc_); - reader_.advance(); + advance(); return parser::make_INEQUALITY(loc_); case '~': return parser::make_COMPLEMENT(loc_); @@ -370,20 +456,20 @@ auto lexer::lex() -> parser::symbol_type if (curr != '<' && curr != '=') return parser::make_LESS(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_LESS_EQUAL(loc_); if (curr != '=') return parser::make_LSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_LSHIFT(loc_); case '>': if (curr != '>' && curr != '=') return parser::make_GREATER(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_GREATER_EQUAL(loc_); @@ -391,7 +477,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_RSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_RSHIFT(loc_); default: lex_name_or_number: @@ -404,24 +490,27 @@ lex_name_or_number: } lex_string: - if (state == reader::end) - throw comp_error(loc_, "unmatched string start ('\"')"); - - reader_.advance(); - while (true) { - if (last == '"') - break; - - if (last == '\n') - throw comp_error(loc_, "unterminated string literal"); - if (state == reader::end) throw comp_error(loc_, "unmatched string start ('\"')"); - if (last == '\\') + if (curr == '"') { + advance(); + break; + } + + if (curr == '\n') + throw comp_error(loc_, "unterminated string literal"); + + if (curr == '\\') + { + advance(); + + if (state == reader::end) + throw comp_error(loc_, "invalid token ('\')"); + char c = curr; switch (curr) { @@ -435,13 +524,11 @@ lex_string: if (!buffer_.push(c)) throw comp_error(loc_, "max string size exceeded"); - - reader_.advance(); } - else if (!buffer_.push(last)) + else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if (state_ == state::localize) @@ -472,7 +559,7 @@ lex_name: else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if(state_ == state::field) @@ -491,24 +578,23 @@ lex_name: { if (path) throw comp_error(loc_, "invalid preprocessor directive"); - + auto token = parser::token::H2UNDEF; if (buffer_.length < 16) { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) { - if (itr->second > parser::token::HSENDIF) + if (itr->second > parser::token::SH_ENDIF) return parser::symbol_type(itr->second, loc_); - + token = itr->second; } } - // TODO: call preprocessor(token); - throw comp_error(loc_, "unknown preprocessor directive"); + preprocessor(token); state_ = state::start; continue; } @@ -518,7 +604,7 @@ lex_name: { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) return parser::symbol_type(itr->second, loc_); } @@ -557,7 +643,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -571,7 +657,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'') @@ -587,7 +673,7 @@ lex_number: } else if (curr == 'o') { - reader_.advance(); + advance(); while (true) { @@ -599,7 +685,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -609,7 +695,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length <= 0) @@ -621,7 +707,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -633,7 +719,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -643,7 +729,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -655,7 +741,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -667,7 +753,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -677,7 +763,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -689,54 +775,89 @@ lex_number: } } -const std::unordered_map lexer::keyword_map -{{ - { "#define", parser::token::HSDEFINE }, - { "#undef", parser::token::HSUNDEF }, - { "#ifdef", parser::token::HSIFDEF }, - { "#ifndef", parser::token::HSIFNDEF }, - { "#if", parser::token::HSIF }, - { "#elif", parser::token::HSELIF }, - { "#else", parser::token::HSELSE }, - { "#endif", parser::token::HSENDIF }, - { "#inline", parser::token::INLINE }, - { "#include", parser::token::INCLUDE }, - { "#using_animtree", parser::token::USINGTREE }, - { "#animtree", parser::token::ANIMTREE }, - { "endon", parser::token::ENDON }, - { "notify", parser::token::NOTIFY }, - { "wait", parser::token::WAIT }, - { "waittill", parser::token::WAITTILL }, - { "waittillmatch", parser::token::WAITTILLMATCH }, - { "waittillframeend", parser::token::WAITTILLFRAMEEND }, - { "waitframe", parser::token::WAITFRAME }, - { "if", parser::token::IF }, - { "else", parser::token::ELSE }, - { "do", parser::token::DO }, - { "while", parser::token::WHILE }, - { "for", parser::token::FOR }, - { "foreach", parser::token::FOREACH }, - { "in", parser::token::IN }, - { "switch", parser::token::SWITCH }, - { "case", parser::token::CASE }, - { "default", parser::token::DEFAULT }, - { "break", parser::token::BREAK }, - { "continue", parser::token::CONTINUE }, - { "return", parser::token::RETURN }, - { "breakpoint", parser::token::BREAKPOINT }, - { "prof_begin", parser::token::PROFBEGIN }, - { "prof_end", parser::token::PROFEND }, - { "thread", parser::token::THREAD }, - { "childthread", parser::token::CHILDTHREAD }, - { "thisthread", parser::token::THISTHREAD }, - { "call", parser::token::CALL }, - { "true", parser::token::TRUE }, - { "false", parser::token::FALSE }, - { "undefined", parser::token::UNDEFINED }, - { "game", parser::token::GAME }, - { "self", parser::token::SELF }, - { "anim", parser::token::ANIM }, - { "level", parser::token::LEVEL }, -}}; +void lexer::advance() +{ + reader_.advance(); + + // dont wrap comment marks '/\/' '/\*' outside strings + if (state_ == state::start && reader_.last_byte == '/') + return; + + while (reader_.current_byte == '\\') + { + if (reader_.bytes_remaining == 1) + throw comp_error(loc_, "invalid token ('\\')"); + + if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n') + break; + + if (reader_.buffer_pos[1] == '\r') + { + if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n') + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 3; + reader_.bytes_remaining -= 3; + } + + if ((reader_.buffer_pos[1] == '\n')) + { + if (reader_.bytes_remaining == 2) + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 2; + reader_.bytes_remaining -= 2; + } + + if (reader_.bytes_remaining == 0) + { + reader_.state = reader::end; + reader_.current_byte = 0; + } + else + { + reader_.current_byte = *reader_.buffer_pos; + } + + loc_.lines(); + loc_.step(); + } +} + +void lexer::preprocessor(parser::token::token_kind_type token) +{ + if (!clean_) + throw comp_error(loc_, "invalid token ('#')"); + + switch (token) + { + case parser::token::SH_DEFINE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_UNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELSE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ENDIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + default: + throw comp_error(loc_, "unknown preprocessor directive"); + } +} } // namespace xsk::gsc::h2 diff --git a/src/h2/xsk/lexer.hpp b/src/h2/xsk/lexer.hpp index 1c386d53..c2b9d7fa 100644 --- a/src/h2/xsk/lexer.hpp +++ b/src/h2/xsk/lexer.hpp @@ -55,6 +55,7 @@ class lexer state state_; build mode_; bool indev_; + bool clean_; public: lexer(const std::string& name, const char* data, size_t size); @@ -64,7 +65,8 @@ public: void ban_header(const location& loc); private: - static const std::unordered_map keyword_map; + void advance(); + void preprocessor(parser::token::token_kind_type token); }; } // namespace xsk::gsc::h2 diff --git a/src/h2/xsk/parser.hpp b/src/h2/xsk/parser.hpp index ad697b98..aa8e2f1e 100644 --- a/src/h2/xsk/parser.hpp +++ b/src/h2/xsk/parser.hpp @@ -677,14 +677,14 @@ namespace xsk { namespace gsc { namespace h2 { H2EOF = 0, // "end of file" H2error = 1, // error H2UNDEF = 2, // "invalid token" - HSDEFINE = 3, // "#define" - HSUNDEF = 4, // "#undef" - HSIFDEF = 5, // "#ifdef" - HSIFNDEF = 6, // "#ifndef" - HSIF = 7, // "#if" - HSELIF = 8, // "#elif" - HSELSE = 9, // "#else" - HSENDIF = 10, // "#endif" + SH_DEFINE = 3, // "#define" + SH_UNDEF = 4, // "#undef" + SH_IFDEF = 5, // "#ifdef" + SH_IFNDEF = 6, // "#ifndef" + SH_IF = 7, // "#if" + SH_ELIF = 8, // "#elif" + SH_ELSE = 9, // "#else" + SH_ENDIF = 10, // "#endif" DEVBEGIN = 11, // "/#" DEVEND = 12, // "#/" INLINE = 13, // "#inline" @@ -809,14 +809,14 @@ namespace xsk { namespace gsc { namespace h2 { S_YYEOF = 0, // "end of file" S_YYerror = 1, // error S_YYUNDEF = 2, // "invalid token" - S_HSDEFINE = 3, // "#define" - S_HSUNDEF = 4, // "#undef" - S_HSIFDEF = 5, // "#ifdef" - S_HSIFNDEF = 6, // "#ifndef" - S_HSIF = 7, // "#if" - S_HSELIF = 8, // "#elif" - S_HSELSE = 9, // "#else" - S_HSENDIF = 10, // "#endif" + S_SH_DEFINE = 3, // "#define" + S_SH_UNDEF = 4, // "#undef" + S_SH_IFDEF = 5, // "#ifdef" + S_SH_IFNDEF = 6, // "#ifndef" + S_SH_IF = 7, // "#if" + S_SH_ELIF = 8, // "#elif" + S_SH_ELSE = 9, // "#else" + S_SH_ENDIF = 10, // "#endif" S_DEVBEGIN = 11, // "/#" S_DEVEND = 12, // "#/" S_INLINE = 13, // "#inline" @@ -2798,121 +2798,121 @@ switch (yykind) #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSDEFINE (location_type l) + make_SH_DEFINE (location_type l) { - return symbol_type (token::HSDEFINE, std::move (l)); + return symbol_type (token::SH_DEFINE, std::move (l)); } #else static symbol_type - make_HSDEFINE (const location_type& l) + make_SH_DEFINE (const location_type& l) { - return symbol_type (token::HSDEFINE, l); + return symbol_type (token::SH_DEFINE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSUNDEF (location_type l) + make_SH_UNDEF (location_type l) { - return symbol_type (token::HSUNDEF, std::move (l)); + return symbol_type (token::SH_UNDEF, std::move (l)); } #else static symbol_type - make_HSUNDEF (const location_type& l) + make_SH_UNDEF (const location_type& l) { - return symbol_type (token::HSUNDEF, l); + return symbol_type (token::SH_UNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFDEF (location_type l) + make_SH_IFDEF (location_type l) { - return symbol_type (token::HSIFDEF, std::move (l)); + return symbol_type (token::SH_IFDEF, std::move (l)); } #else static symbol_type - make_HSIFDEF (const location_type& l) + make_SH_IFDEF (const location_type& l) { - return symbol_type (token::HSIFDEF, l); + return symbol_type (token::SH_IFDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFNDEF (location_type l) + make_SH_IFNDEF (location_type l) { - return symbol_type (token::HSIFNDEF, std::move (l)); + return symbol_type (token::SH_IFNDEF, std::move (l)); } #else static symbol_type - make_HSIFNDEF (const location_type& l) + make_SH_IFNDEF (const location_type& l) { - return symbol_type (token::HSIFNDEF, l); + return symbol_type (token::SH_IFNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIF (location_type l) + make_SH_IF (location_type l) { - return symbol_type (token::HSIF, std::move (l)); + return symbol_type (token::SH_IF, std::move (l)); } #else static symbol_type - make_HSIF (const location_type& l) + make_SH_IF (const location_type& l) { - return symbol_type (token::HSIF, l); + return symbol_type (token::SH_IF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELIF (location_type l) + make_SH_ELIF (location_type l) { - return symbol_type (token::HSELIF, std::move (l)); + return symbol_type (token::SH_ELIF, std::move (l)); } #else static symbol_type - make_HSELIF (const location_type& l) + make_SH_ELIF (const location_type& l) { - return symbol_type (token::HSELIF, l); + return symbol_type (token::SH_ELIF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELSE (location_type l) + make_SH_ELSE (location_type l) { - return symbol_type (token::HSELSE, std::move (l)); + return symbol_type (token::SH_ELSE, std::move (l)); } #else static symbol_type - make_HSELSE (const location_type& l) + make_SH_ELSE (const location_type& l) { - return symbol_type (token::HSELSE, l); + return symbol_type (token::SH_ELSE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSENDIF (location_type l) + make_SH_ENDIF (location_type l) { - return symbol_type (token::HSENDIF, std::move (l)); + return symbol_type (token::SH_ENDIF, std::move (l)); } #else static symbol_type - make_HSENDIF (const location_type& l) + make_SH_ENDIF (const location_type& l) { - return symbol_type (token::HSENDIF, l); + return symbol_type (token::SH_ENDIF, l); } #endif #if 201103L <= YY_CPLUSPLUS diff --git a/src/iw5/xsk/lexer.cpp b/src/iw5/xsk/lexer.cpp index ec43dd4f..1a9182b4 100644 --- a/src/iw5/xsk/lexer.cpp +++ b/src/iw5/xsk/lexer.cpp @@ -16,6 +16,55 @@ xsk::gsc::iw5::parser::symbol_type IW5lex(xsk::gsc::iw5::lexer& lexer) namespace xsk::gsc::iw5 { +const std::unordered_map keyword_map +{{ + { "#define", parser::token::SH_DEFINE }, + { "#undef", parser::token::SH_UNDEF }, + { "#ifdef", parser::token::SH_IFDEF }, + { "#ifndef", parser::token::SH_IFNDEF }, + { "#if", parser::token::SH_IF }, + { "#elif", parser::token::SH_ELIF }, + { "#else", parser::token::SH_ELSE }, + { "#endif", parser::token::SH_ENDIF }, + { "#inline", parser::token::INLINE }, + { "#include", parser::token::INCLUDE }, + { "#using_animtree", parser::token::USINGTREE }, + { "#animtree", parser::token::ANIMTREE }, + { "endon", parser::token::ENDON }, + { "notify", parser::token::NOTIFY }, + { "wait", parser::token::WAIT }, + { "waittill", parser::token::WAITTILL }, + { "waittillmatch", parser::token::WAITTILLMATCH }, + { "waittillframeend", parser::token::WAITTILLFRAMEEND }, + { "if", parser::token::IF }, + { "else", parser::token::ELSE }, + { "do", parser::token::DO }, + { "while", parser::token::WHILE }, + { "for", parser::token::FOR }, + { "foreach", parser::token::FOREACH }, + { "in", parser::token::IN }, + { "switch", parser::token::SWITCH }, + { "case", parser::token::CASE }, + { "default", parser::token::DEFAULT }, + { "break", parser::token::BREAK }, + { "continue", parser::token::CONTINUE }, + { "return", parser::token::RETURN }, + { "breakpoint", parser::token::BREAKPOINT }, + { "prof_begin", parser::token::PROFBEGIN }, + { "prof_end", parser::token::PROFEND }, + { "thread", parser::token::THREAD }, + { "childthread", parser::token::CHILDTHREAD }, + { "thisthread", parser::token::THISTHREAD }, + { "call", parser::token::CALL }, + { "true", parser::token::TRUE }, + { "false", parser::token::FALSE }, + { "undefined", parser::token::UNDEFINED }, + { "game", parser::token::GAME }, + { "self", parser::token::SELF }, + { "anim", parser::token::ANIM }, + { "level", parser::token::LEVEL }, +}}; + buffer::buffer() : length(0) { data = static_cast(std::malloc(max_buf_size)); @@ -35,10 +84,8 @@ bool buffer::push(char c) return true; } -reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) -{ - -} +reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), + last_byte(0), current_byte(0) { } void reader::init(const char* data, size_t size) { @@ -78,7 +125,7 @@ void reader::advance() } } -lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), +lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)), mode_(build::dev), header_top_(0), locs_(std::stack()), readers_(std::stack()) { reader_.init(data, size); @@ -97,6 +144,7 @@ void lexer::push_header(const std::string& file) locs_.push(loc_); loc_.initialize(std::get<0>(data)); reader_.init(std::get<1>(data), std::get<2>(data)); + clean_ = true; } catch (const std::exception& e) { @@ -145,7 +193,10 @@ auto lexer::lex() -> parser::symbol_type return parser::make_IW5EOF(loc_); } - reader_.advance(); + if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n') + clean_ = false; + + advance(); switch (last) { @@ -157,12 +208,15 @@ auto lexer::lex() -> parser::symbol_type case '\n': loc_.lines(); loc_.step(); + clean_ = true; continue; + case '\\': + throw comp_error(loc_, "invalid token ('\\')"); case '/': if (curr != '/' && curr != '*' && curr != '#' && curr != '=') return parser::make_DIV(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_DIV(loc_); @@ -224,7 +278,30 @@ auto lexer::lex() -> parser::symbol_type { while (true) { - if (state == reader::end || curr == '\n') + if (state == reader::end) + break; + + if (last == '\\' && curr == '\r' || curr == '\n') + { + reader_.advance(); + + if (state == reader::end) + break; + + if (last == '\r') + { + if (curr != '\n') + throw comp_error(loc_, "invalid token ('\')"); + + reader_.advance(); + } + + loc_.lines(); + loc_.step(); + continue; + } + + if (curr == '\n') break; reader_.advance(); @@ -238,15 +315,23 @@ auto lexer::lex() -> parser::symbol_type throw comp_error(loc_, "unmatched devblock end ('#/')"); indev_ = false; - reader_.advance(); + advance(); return parser::make_DEVEND(loc_); } buffer_.push(last); - reader_.advance(); + advance(); + + while (state == reader::ok) + { + if (last != ' ' || last != '\t') + break; + + advance(); + } if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) - throw comp_error(loc_, "unterminated preprocessor directive ('#')"); + throw comp_error(loc_, "invalid preprocessor directive ('#')"); state_ = state::preprocessor; goto lex_name; @@ -254,7 +339,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '/' && curr != '=') return parser::make_MUL(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_MUL(loc_); @@ -264,9 +349,9 @@ auto lexer::lex() -> parser::symbol_type state_ = state::string; goto lex_string; case '.': - reader_.advance(); + advance(); - if(state == reader::end) + if (state == reader::end) throw comp_error(loc_, "unterminated field ('.')"); state_ = state::field; @@ -291,7 +376,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != ':') return parser::make_COLON(loc_); - reader_.advance(); + advance(); return parser::make_DOUBLECOLON(loc_); case '?': return parser::make_QMARK(loc_); @@ -299,13 +384,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_ASSIGN(loc_); - reader_.advance(); + advance(); return parser::make_EQUALITY(loc_); case '+': if (curr != '+' && curr != '=') return parser::make_ADD(loc_); - reader_.advance(); + advance(); if (last == '+') return parser::make_INCREMENT(loc_); @@ -315,7 +400,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '-' && curr != '=') return parser::make_SUB(loc_); - reader_.advance(); + advance(); if (last == '-') return parser::make_DECREMENT(loc_); @@ -325,24 +410,24 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_MOD(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_MOD(loc_); case '|': if (curr != '|' && curr != '=') return parser::make_BITWISE_OR(loc_); - reader_.advance(); + advance(); if (last == '|') return parser::make_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_); case '&': - if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') + if (curr != '&' && curr != '=' && curr != '"') return parser::make_BITWISE_AND(loc_); - reader_.advance(); + advance(); if (last == '&') return parser::make_AND(loc_); @@ -356,13 +441,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_BITWISE_EXOR(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_BW_EXOR(loc_); case '!': if (curr != '=') return parser::make_NOT(loc_); - reader_.advance(); + advance(); return parser::make_INEQUALITY(loc_); case '~': return parser::make_COMPLEMENT(loc_); @@ -370,20 +455,20 @@ auto lexer::lex() -> parser::symbol_type if (curr != '<' && curr != '=') return parser::make_LESS(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_LESS_EQUAL(loc_); if (curr != '=') return parser::make_LSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_LSHIFT(loc_); case '>': if (curr != '>' && curr != '=') return parser::make_GREATER(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_GREATER_EQUAL(loc_); @@ -391,7 +476,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_RSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_RSHIFT(loc_); default: lex_name_or_number: @@ -404,24 +489,27 @@ lex_name_or_number: } lex_string: - if (state == reader::end) - throw comp_error(loc_, "unmatched string start ('\"')"); - - reader_.advance(); - while (true) { - if (last == '"') - break; - - if (last == '\n') - throw comp_error(loc_, "unterminated string literal"); - if (state == reader::end) throw comp_error(loc_, "unmatched string start ('\"')"); - if (last == '\\') + if (curr == '"') { + advance(); + break; + } + + if (curr == '\n') + throw comp_error(loc_, "unterminated string literal"); + + if (curr == '\\') + { + advance(); + + if (state == reader::end) + throw comp_error(loc_, "invalid token ('\')"); + char c = curr; switch (curr) { @@ -435,13 +523,11 @@ lex_string: if (!buffer_.push(c)) throw comp_error(loc_, "max string size exceeded"); - - reader_.advance(); } - else if (!buffer_.push(last)) + else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if (state_ == state::localize) @@ -472,7 +558,7 @@ lex_name: else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if(state_ == state::field) @@ -491,24 +577,23 @@ lex_name: { if (path) throw comp_error(loc_, "invalid preprocessor directive"); - + auto token = parser::token::IW5UNDEF; if (buffer_.length < 16) { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) { - if (itr->second > parser::token::HSENDIF) + if (itr->second > parser::token::SH_ENDIF) return parser::symbol_type(itr->second, loc_); - + token = itr->second; } } - // TODO: call preprocessor(token); - throw comp_error(loc_, "unknown preprocessor directive"); + preprocessor(token); state_ = state::start; continue; } @@ -518,7 +603,7 @@ lex_name: { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) return parser::symbol_type(itr->second, loc_); } @@ -557,7 +642,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -571,7 +656,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'') @@ -587,7 +672,7 @@ lex_number: } else if (curr == 'o') { - reader_.advance(); + advance(); while (true) { @@ -599,7 +684,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -609,7 +694,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length <= 0) @@ -621,7 +706,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -633,7 +718,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -643,7 +728,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -655,7 +740,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -667,7 +752,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -677,7 +762,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -689,53 +774,89 @@ lex_number: } } -const std::unordered_map lexer::keyword_map -{{ - { "#define", parser::token::HSDEFINE }, - { "#undef", parser::token::HSUNDEF }, - { "#ifdef", parser::token::HSIFDEF }, - { "#ifndef", parser::token::HSIFNDEF }, - { "#if", parser::token::HSIF }, - { "#elif", parser::token::HSELIF }, - { "#else", parser::token::HSELSE }, - { "#endif", parser::token::HSENDIF }, - { "#inline", parser::token::INLINE }, - { "#include", parser::token::INCLUDE }, - { "#using_animtree", parser::token::USINGTREE }, - { "#animtree", parser::token::ANIMTREE }, - { "endon", parser::token::ENDON }, - { "notify", parser::token::NOTIFY }, - { "wait", parser::token::WAIT }, - { "waittill", parser::token::WAITTILL }, - { "waittillmatch", parser::token::WAITTILLMATCH }, - { "waittillframeend", parser::token::WAITTILLFRAMEEND }, - { "if", parser::token::IF }, - { "else", parser::token::ELSE }, - { "do", parser::token::DO }, - { "while", parser::token::WHILE }, - { "for", parser::token::FOR }, - { "foreach", parser::token::FOREACH }, - { "in", parser::token::IN }, - { "switch", parser::token::SWITCH }, - { "case", parser::token::CASE }, - { "default", parser::token::DEFAULT }, - { "break", parser::token::BREAK }, - { "continue", parser::token::CONTINUE }, - { "return", parser::token::RETURN }, - { "breakpoint", parser::token::BREAKPOINT }, - { "prof_begin", parser::token::PROFBEGIN }, - { "prof_end", parser::token::PROFEND }, - { "thread", parser::token::THREAD }, - { "childthread", parser::token::CHILDTHREAD }, - { "thisthread", parser::token::THISTHREAD }, - { "call", parser::token::CALL }, - { "true", parser::token::TRUE }, - { "false", parser::token::FALSE }, - { "undefined", parser::token::UNDEFINED }, - { "game", parser::token::GAME }, - { "self", parser::token::SELF }, - { "anim", parser::token::ANIM }, - { "level", parser::token::LEVEL }, -}}; +void lexer::advance() +{ + reader_.advance(); + + // dont wrap comment marks '/\/' '/\*' outside strings + if (state_ == state::start && reader_.last_byte == '/') + return; + + while (reader_.current_byte == '\\') + { + if (reader_.bytes_remaining == 1) + throw comp_error(loc_, "invalid token ('\\')"); + + if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n') + break; + + if (reader_.buffer_pos[1] == '\r') + { + if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n') + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 3; + reader_.bytes_remaining -= 3; + } + + if ((reader_.buffer_pos[1] == '\n')) + { + if (reader_.bytes_remaining == 2) + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 2; + reader_.bytes_remaining -= 2; + } + + if (reader_.bytes_remaining == 0) + { + reader_.state = reader::end; + reader_.current_byte = 0; + } + else + { + reader_.current_byte = *reader_.buffer_pos; + } + + loc_.lines(); + loc_.step(); + } +} + +void lexer::preprocessor(parser::token::token_kind_type token) +{ + if (!clean_) + throw comp_error(loc_, "invalid token ('#')"); + + switch (token) + { + case parser::token::SH_DEFINE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_UNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELSE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ENDIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + default: + throw comp_error(loc_, "unknown preprocessor directive"); + } +} } // namespace xsk::gsc::iw5 diff --git a/src/iw5/xsk/lexer.hpp b/src/iw5/xsk/lexer.hpp index 901b358d..16a47d33 100644 --- a/src/iw5/xsk/lexer.hpp +++ b/src/iw5/xsk/lexer.hpp @@ -5,8 +5,6 @@ #pragma once -#include "iw5.hpp" - namespace xsk::gsc::iw5 { @@ -57,6 +55,7 @@ class lexer state state_; build mode_; bool indev_; + bool clean_; public: lexer(const std::string& name, const char* data, size_t size); @@ -66,7 +65,8 @@ public: void ban_header(const location& loc); private: - static const std::unordered_map keyword_map; + void advance(); + void preprocessor(parser::token::token_kind_type token); }; } // namespace xsk::gsc::iw5 diff --git a/src/iw5/xsk/parser.hpp b/src/iw5/xsk/parser.hpp index b724cae5..e1e8b5a0 100644 --- a/src/iw5/xsk/parser.hpp +++ b/src/iw5/xsk/parser.hpp @@ -674,14 +674,14 @@ namespace xsk { namespace gsc { namespace iw5 { IW5EOF = 0, // "end of file" IW5error = 1, // error IW5UNDEF = 2, // "invalid token" - HSDEFINE = 3, // "#define" - HSUNDEF = 4, // "#undef" - HSIFDEF = 5, // "#ifdef" - HSIFNDEF = 6, // "#ifndef" - HSIF = 7, // "#if" - HSELIF = 8, // "#elif" - HSELSE = 9, // "#else" - HSENDIF = 10, // "#endif" + SH_DEFINE = 3, // "#define" + SH_UNDEF = 4, // "#undef" + SH_IFDEF = 5, // "#ifdef" + SH_IFNDEF = 6, // "#ifndef" + SH_IF = 7, // "#if" + SH_ELIF = 8, // "#elif" + SH_ELSE = 9, // "#else" + SH_ENDIF = 10, // "#endif" DEVBEGIN = 11, // "/#" DEVEND = 12, // "#/" INLINE = 13, // "#inline" @@ -805,14 +805,14 @@ namespace xsk { namespace gsc { namespace iw5 { S_YYEOF = 0, // "end of file" S_YYerror = 1, // error S_YYUNDEF = 2, // "invalid token" - S_HSDEFINE = 3, // "#define" - S_HSUNDEF = 4, // "#undef" - S_HSIFDEF = 5, // "#ifdef" - S_HSIFNDEF = 6, // "#ifndef" - S_HSIF = 7, // "#if" - S_HSELIF = 8, // "#elif" - S_HSELSE = 9, // "#else" - S_HSENDIF = 10, // "#endif" + S_SH_DEFINE = 3, // "#define" + S_SH_UNDEF = 4, // "#undef" + S_SH_IFDEF = 5, // "#ifdef" + S_SH_IFNDEF = 6, // "#ifndef" + S_SH_IF = 7, // "#if" + S_SH_ELIF = 8, // "#elif" + S_SH_ELSE = 9, // "#else" + S_SH_ENDIF = 10, // "#endif" S_DEVBEGIN = 11, // "/#" S_DEVEND = 12, // "#/" S_INLINE = 13, // "#inline" @@ -2770,121 +2770,121 @@ switch (yykind) #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSDEFINE (location_type l) + make_SH_DEFINE (location_type l) { - return symbol_type (token::HSDEFINE, std::move (l)); + return symbol_type (token::SH_DEFINE, std::move (l)); } #else static symbol_type - make_HSDEFINE (const location_type& l) + make_SH_DEFINE (const location_type& l) { - return symbol_type (token::HSDEFINE, l); + return symbol_type (token::SH_DEFINE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSUNDEF (location_type l) + make_SH_UNDEF (location_type l) { - return symbol_type (token::HSUNDEF, std::move (l)); + return symbol_type (token::SH_UNDEF, std::move (l)); } #else static symbol_type - make_HSUNDEF (const location_type& l) + make_SH_UNDEF (const location_type& l) { - return symbol_type (token::HSUNDEF, l); + return symbol_type (token::SH_UNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFDEF (location_type l) + make_SH_IFDEF (location_type l) { - return symbol_type (token::HSIFDEF, std::move (l)); + return symbol_type (token::SH_IFDEF, std::move (l)); } #else static symbol_type - make_HSIFDEF (const location_type& l) + make_SH_IFDEF (const location_type& l) { - return symbol_type (token::HSIFDEF, l); + return symbol_type (token::SH_IFDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFNDEF (location_type l) + make_SH_IFNDEF (location_type l) { - return symbol_type (token::HSIFNDEF, std::move (l)); + return symbol_type (token::SH_IFNDEF, std::move (l)); } #else static symbol_type - make_HSIFNDEF (const location_type& l) + make_SH_IFNDEF (const location_type& l) { - return symbol_type (token::HSIFNDEF, l); + return symbol_type (token::SH_IFNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIF (location_type l) + make_SH_IF (location_type l) { - return symbol_type (token::HSIF, std::move (l)); + return symbol_type (token::SH_IF, std::move (l)); } #else static symbol_type - make_HSIF (const location_type& l) + make_SH_IF (const location_type& l) { - return symbol_type (token::HSIF, l); + return symbol_type (token::SH_IF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELIF (location_type l) + make_SH_ELIF (location_type l) { - return symbol_type (token::HSELIF, std::move (l)); + return symbol_type (token::SH_ELIF, std::move (l)); } #else static symbol_type - make_HSELIF (const location_type& l) + make_SH_ELIF (const location_type& l) { - return symbol_type (token::HSELIF, l); + return symbol_type (token::SH_ELIF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELSE (location_type l) + make_SH_ELSE (location_type l) { - return symbol_type (token::HSELSE, std::move (l)); + return symbol_type (token::SH_ELSE, std::move (l)); } #else static symbol_type - make_HSELSE (const location_type& l) + make_SH_ELSE (const location_type& l) { - return symbol_type (token::HSELSE, l); + return symbol_type (token::SH_ELSE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSENDIF (location_type l) + make_SH_ENDIF (location_type l) { - return symbol_type (token::HSENDIF, std::move (l)); + return symbol_type (token::SH_ENDIF, std::move (l)); } #else static symbol_type - make_HSENDIF (const location_type& l) + make_SH_ENDIF (const location_type& l) { - return symbol_type (token::HSENDIF, l); + return symbol_type (token::SH_ENDIF, l); } #endif #if 201103L <= YY_CPLUSPLUS diff --git a/src/iw6/xsk/lexer.cpp b/src/iw6/xsk/lexer.cpp index 0981964f..56771da2 100644 --- a/src/iw6/xsk/lexer.cpp +++ b/src/iw6/xsk/lexer.cpp @@ -16,6 +16,55 @@ xsk::gsc::iw6::parser::symbol_type IW6lex(xsk::gsc::iw6::lexer& lexer) namespace xsk::gsc::iw6 { +const std::unordered_map keyword_map +{{ + { "#define", parser::token::SH_DEFINE }, + { "#undef", parser::token::SH_UNDEF }, + { "#ifdef", parser::token::SH_IFDEF }, + { "#ifndef", parser::token::SH_IFNDEF }, + { "#if", parser::token::SH_IF }, + { "#elif", parser::token::SH_ELIF }, + { "#else", parser::token::SH_ELSE }, + { "#endif", parser::token::SH_ENDIF }, + { "#inline", parser::token::INLINE }, + { "#include", parser::token::INCLUDE }, + { "#using_animtree", parser::token::USINGTREE }, + { "#animtree", parser::token::ANIMTREE }, + { "endon", parser::token::ENDON }, + { "notify", parser::token::NOTIFY }, + { "wait", parser::token::WAIT }, + { "waittill", parser::token::WAITTILL }, + { "waittillmatch", parser::token::WAITTILLMATCH }, + { "waittillframeend", parser::token::WAITTILLFRAMEEND }, + { "if", parser::token::IF }, + { "else", parser::token::ELSE }, + { "do", parser::token::DO }, + { "while", parser::token::WHILE }, + { "for", parser::token::FOR }, + { "foreach", parser::token::FOREACH }, + { "in", parser::token::IN }, + { "switch", parser::token::SWITCH }, + { "case", parser::token::CASE }, + { "default", parser::token::DEFAULT }, + { "break", parser::token::BREAK }, + { "continue", parser::token::CONTINUE }, + { "return", parser::token::RETURN }, + { "breakpoint", parser::token::BREAKPOINT }, + { "prof_begin", parser::token::PROFBEGIN }, + { "prof_end", parser::token::PROFEND }, + { "thread", parser::token::THREAD }, + { "childthread", parser::token::CHILDTHREAD }, + { "thisthread", parser::token::THISTHREAD }, + { "call", parser::token::CALL }, + { "true", parser::token::TRUE }, + { "false", parser::token::FALSE }, + { "undefined", parser::token::UNDEFINED }, + { "game", parser::token::GAME }, + { "self", parser::token::SELF }, + { "anim", parser::token::ANIM }, + { "level", parser::token::LEVEL }, +}}; + buffer::buffer() : length(0) { data = static_cast(std::malloc(max_buf_size)); @@ -35,10 +84,8 @@ bool buffer::push(char c) return true; } -reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) -{ - -} +reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), + last_byte(0), current_byte(0) { } void reader::init(const char* data, size_t size) { @@ -78,7 +125,7 @@ void reader::advance() } } -lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), +lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)), mode_(build::dev), header_top_(0), locs_(std::stack()), readers_(std::stack()) { reader_.init(data, size); @@ -97,6 +144,7 @@ void lexer::push_header(const std::string& file) locs_.push(loc_); loc_.initialize(std::get<0>(data)); reader_.init(std::get<1>(data), std::get<2>(data)); + clean_ = true; } catch (const std::exception& e) { @@ -145,7 +193,10 @@ auto lexer::lex() -> parser::symbol_type return parser::make_IW6EOF(loc_); } - reader_.advance(); + if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n') + clean_ = false; + + advance(); switch (last) { @@ -157,12 +208,15 @@ auto lexer::lex() -> parser::symbol_type case '\n': loc_.lines(); loc_.step(); + clean_ = true; continue; + case '\\': + throw comp_error(loc_, "invalid token ('\\')"); case '/': if (curr != '/' && curr != '*' && curr != '#' && curr != '=') return parser::make_DIV(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_DIV(loc_); @@ -224,7 +278,30 @@ auto lexer::lex() -> parser::symbol_type { while (true) { - if (state == reader::end || curr == '\n') + if (state == reader::end) + break; + + if (last == '\\' && curr == '\r' || curr == '\n') + { + reader_.advance(); + + if (state == reader::end) + break; + + if (last == '\r') + { + if (curr != '\n') + throw comp_error(loc_, "invalid token ('\')"); + + reader_.advance(); + } + + loc_.lines(); + loc_.step(); + continue; + } + + if (curr == '\n') break; reader_.advance(); @@ -238,15 +315,23 @@ auto lexer::lex() -> parser::symbol_type throw comp_error(loc_, "unmatched devblock end ('#/')"); indev_ = false; - reader_.advance(); + advance(); return parser::make_DEVEND(loc_); } buffer_.push(last); - reader_.advance(); + advance(); + + while (state == reader::ok) + { + if (last != ' ' || last != '\t') + break; + + advance(); + } if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) - throw comp_error(loc_, "unterminated preprocessor directive ('#')"); + throw comp_error(loc_, "invalid preprocessor directive ('#')"); state_ = state::preprocessor; goto lex_name; @@ -254,7 +339,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '/' && curr != '=') return parser::make_MUL(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_MUL(loc_); @@ -264,9 +349,9 @@ auto lexer::lex() -> parser::symbol_type state_ = state::string; goto lex_string; case '.': - reader_.advance(); + advance(); - if(state == reader::end) + if (state == reader::end) throw comp_error(loc_, "unterminated field ('.')"); state_ = state::field; @@ -291,7 +376,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != ':') return parser::make_COLON(loc_); - reader_.advance(); + advance(); return parser::make_DOUBLECOLON(loc_); case '?': return parser::make_QMARK(loc_); @@ -299,13 +384,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_ASSIGN(loc_); - reader_.advance(); + advance(); return parser::make_EQUALITY(loc_); case '+': if (curr != '+' && curr != '=') return parser::make_ADD(loc_); - reader_.advance(); + advance(); if (last == '+') return parser::make_INCREMENT(loc_); @@ -315,7 +400,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '-' && curr != '=') return parser::make_SUB(loc_); - reader_.advance(); + advance(); if (last == '-') return parser::make_DECREMENT(loc_); @@ -325,24 +410,24 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_MOD(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_MOD(loc_); case '|': if (curr != '|' && curr != '=') return parser::make_BITWISE_OR(loc_); - reader_.advance(); + advance(); if (last == '|') return parser::make_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_); case '&': - if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') + if (curr != '&' && curr != '=' && curr != '"') return parser::make_BITWISE_AND(loc_); - reader_.advance(); + advance(); if (last == '&') return parser::make_AND(loc_); @@ -356,13 +441,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_BITWISE_EXOR(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_BW_EXOR(loc_); case '!': if (curr != '=') return parser::make_NOT(loc_); - reader_.advance(); + advance(); return parser::make_INEQUALITY(loc_); case '~': return parser::make_COMPLEMENT(loc_); @@ -370,20 +455,20 @@ auto lexer::lex() -> parser::symbol_type if (curr != '<' && curr != '=') return parser::make_LESS(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_LESS_EQUAL(loc_); if (curr != '=') return parser::make_LSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_LSHIFT(loc_); case '>': if (curr != '>' && curr != '=') return parser::make_GREATER(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_GREATER_EQUAL(loc_); @@ -391,7 +476,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_RSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_RSHIFT(loc_); default: lex_name_or_number: @@ -404,24 +489,27 @@ lex_name_or_number: } lex_string: - if (state == reader::end) - throw comp_error(loc_, "unmatched string start ('\"')"); - - reader_.advance(); - while (true) { - if (last == '"') - break; - - if (last == '\n') - throw comp_error(loc_, "unterminated string literal"); - if (state == reader::end) throw comp_error(loc_, "unmatched string start ('\"')"); - if (last == '\\') + if (curr == '"') { + advance(); + break; + } + + if (curr == '\n') + throw comp_error(loc_, "unterminated string literal"); + + if (curr == '\\') + { + advance(); + + if (state == reader::end) + throw comp_error(loc_, "invalid token ('\')"); + char c = curr; switch (curr) { @@ -435,13 +523,11 @@ lex_string: if (!buffer_.push(c)) throw comp_error(loc_, "max string size exceeded"); - - reader_.advance(); } - else if (!buffer_.push(last)) + else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if (state_ == state::localize) @@ -472,7 +558,7 @@ lex_name: else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if(state_ == state::field) @@ -491,24 +577,23 @@ lex_name: { if (path) throw comp_error(loc_, "invalid preprocessor directive"); - + auto token = parser::token::IW6UNDEF; if (buffer_.length < 16) { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) { - if (itr->second > parser::token::HSENDIF) + if (itr->second > parser::token::SH_ENDIF) return parser::symbol_type(itr->second, loc_); - + token = itr->second; } } - // TODO: call preprocessor(token); - throw comp_error(loc_, "unknown preprocessor directive"); + preprocessor(token); state_ = state::start; continue; } @@ -518,7 +603,7 @@ lex_name: { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) return parser::symbol_type(itr->second, loc_); } @@ -557,7 +642,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -571,7 +656,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'') @@ -587,7 +672,7 @@ lex_number: } else if (curr == 'o') { - reader_.advance(); + advance(); while (true) { @@ -599,7 +684,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -609,7 +694,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length <= 0) @@ -621,7 +706,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -633,7 +718,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -643,7 +728,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -655,7 +740,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -667,7 +752,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -677,7 +762,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -689,53 +774,89 @@ lex_number: } } -const std::unordered_map lexer::keyword_map -{{ - { "#define", parser::token::HSDEFINE }, - { "#undef", parser::token::HSUNDEF }, - { "#ifdef", parser::token::HSIFDEF }, - { "#ifndef", parser::token::HSIFNDEF }, - { "#if", parser::token::HSIF }, - { "#elif", parser::token::HSELIF }, - { "#else", parser::token::HSELSE }, - { "#endif", parser::token::HSENDIF }, - { "#inline", parser::token::INLINE }, - { "#include", parser::token::INCLUDE }, - { "#using_animtree", parser::token::USINGTREE }, - { "#animtree", parser::token::ANIMTREE }, - { "endon", parser::token::ENDON }, - { "notify", parser::token::NOTIFY }, - { "wait", parser::token::WAIT }, - { "waittill", parser::token::WAITTILL }, - { "waittillmatch", parser::token::WAITTILLMATCH }, - { "waittillframeend", parser::token::WAITTILLFRAMEEND }, - { "if", parser::token::IF }, - { "else", parser::token::ELSE }, - { "do", parser::token::DO }, - { "while", parser::token::WHILE }, - { "for", parser::token::FOR }, - { "foreach", parser::token::FOREACH }, - { "in", parser::token::IN }, - { "switch", parser::token::SWITCH }, - { "case", parser::token::CASE }, - { "default", parser::token::DEFAULT }, - { "break", parser::token::BREAK }, - { "continue", parser::token::CONTINUE }, - { "return", parser::token::RETURN }, - { "breakpoint", parser::token::BREAKPOINT }, - { "prof_begin", parser::token::PROFBEGIN }, - { "prof_end", parser::token::PROFEND }, - { "thread", parser::token::THREAD }, - { "childthread", parser::token::CHILDTHREAD }, - { "thisthread", parser::token::THISTHREAD }, - { "call", parser::token::CALL }, - { "true", parser::token::TRUE }, - { "false", parser::token::FALSE }, - { "undefined", parser::token::UNDEFINED }, - { "game", parser::token::GAME }, - { "self", parser::token::SELF }, - { "anim", parser::token::ANIM }, - { "level", parser::token::LEVEL }, -}}; +void lexer::advance() +{ + reader_.advance(); + + // dont wrap comment marks '/\/' '/\*' outside strings + if (state_ == state::start && reader_.last_byte == '/') + return; + + while (reader_.current_byte == '\\') + { + if (reader_.bytes_remaining == 1) + throw comp_error(loc_, "invalid token ('\\')"); + + if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n') + break; + + if (reader_.buffer_pos[1] == '\r') + { + if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n') + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 3; + reader_.bytes_remaining -= 3; + } + + if ((reader_.buffer_pos[1] == '\n')) + { + if (reader_.bytes_remaining == 2) + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 2; + reader_.bytes_remaining -= 2; + } + + if (reader_.bytes_remaining == 0) + { + reader_.state = reader::end; + reader_.current_byte = 0; + } + else + { + reader_.current_byte = *reader_.buffer_pos; + } + + loc_.lines(); + loc_.step(); + } +} + +void lexer::preprocessor(parser::token::token_kind_type token) +{ + if (!clean_) + throw comp_error(loc_, "invalid token ('#')"); + + switch (token) + { + case parser::token::SH_DEFINE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_UNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELSE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ENDIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + default: + throw comp_error(loc_, "unknown preprocessor directive"); + } +} } // namespace xsk::gsc::iw6 diff --git a/src/iw6/xsk/lexer.hpp b/src/iw6/xsk/lexer.hpp index 65a597b3..13c0ee00 100644 --- a/src/iw6/xsk/lexer.hpp +++ b/src/iw6/xsk/lexer.hpp @@ -5,8 +5,6 @@ #pragma once -#include "iw6.hpp" - namespace xsk::gsc::iw6 { @@ -57,6 +55,7 @@ class lexer state state_; build mode_; bool indev_; + bool clean_; public: lexer(const std::string& name, const char* data, size_t size); @@ -66,7 +65,8 @@ public: void ban_header(const location& loc); private: - static const std::unordered_map keyword_map; + void advance(); + void preprocessor(parser::token::token_kind_type token); }; } // namespace xsk::gsc::iw6 diff --git a/src/iw6/xsk/parser.hpp b/src/iw6/xsk/parser.hpp index 6a41447c..f5b85ea4 100644 --- a/src/iw6/xsk/parser.hpp +++ b/src/iw6/xsk/parser.hpp @@ -674,14 +674,14 @@ namespace xsk { namespace gsc { namespace iw6 { IW6EOF = 0, // "end of file" IW6error = 1, // error IW6UNDEF = 2, // "invalid token" - HSDEFINE = 3, // "#define" - HSUNDEF = 4, // "#undef" - HSIFDEF = 5, // "#ifdef" - HSIFNDEF = 6, // "#ifndef" - HSIF = 7, // "#if" - HSELIF = 8, // "#elif" - HSELSE = 9, // "#else" - HSENDIF = 10, // "#endif" + SH_DEFINE = 3, // "#define" + SH_UNDEF = 4, // "#undef" + SH_IFDEF = 5, // "#ifdef" + SH_IFNDEF = 6, // "#ifndef" + SH_IF = 7, // "#if" + SH_ELIF = 8, // "#elif" + SH_ELSE = 9, // "#else" + SH_ENDIF = 10, // "#endif" DEVBEGIN = 11, // "/#" DEVEND = 12, // "#/" INLINE = 13, // "#inline" @@ -805,14 +805,14 @@ namespace xsk { namespace gsc { namespace iw6 { S_YYEOF = 0, // "end of file" S_YYerror = 1, // error S_YYUNDEF = 2, // "invalid token" - S_HSDEFINE = 3, // "#define" - S_HSUNDEF = 4, // "#undef" - S_HSIFDEF = 5, // "#ifdef" - S_HSIFNDEF = 6, // "#ifndef" - S_HSIF = 7, // "#if" - S_HSELIF = 8, // "#elif" - S_HSELSE = 9, // "#else" - S_HSENDIF = 10, // "#endif" + S_SH_DEFINE = 3, // "#define" + S_SH_UNDEF = 4, // "#undef" + S_SH_IFDEF = 5, // "#ifdef" + S_SH_IFNDEF = 6, // "#ifndef" + S_SH_IF = 7, // "#if" + S_SH_ELIF = 8, // "#elif" + S_SH_ELSE = 9, // "#else" + S_SH_ENDIF = 10, // "#endif" S_DEVBEGIN = 11, // "/#" S_DEVEND = 12, // "#/" S_INLINE = 13, // "#inline" @@ -2770,121 +2770,121 @@ switch (yykind) #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSDEFINE (location_type l) + make_SH_DEFINE (location_type l) { - return symbol_type (token::HSDEFINE, std::move (l)); + return symbol_type (token::SH_DEFINE, std::move (l)); } #else static symbol_type - make_HSDEFINE (const location_type& l) + make_SH_DEFINE (const location_type& l) { - return symbol_type (token::HSDEFINE, l); + return symbol_type (token::SH_DEFINE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSUNDEF (location_type l) + make_SH_UNDEF (location_type l) { - return symbol_type (token::HSUNDEF, std::move (l)); + return symbol_type (token::SH_UNDEF, std::move (l)); } #else static symbol_type - make_HSUNDEF (const location_type& l) + make_SH_UNDEF (const location_type& l) { - return symbol_type (token::HSUNDEF, l); + return symbol_type (token::SH_UNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFDEF (location_type l) + make_SH_IFDEF (location_type l) { - return symbol_type (token::HSIFDEF, std::move (l)); + return symbol_type (token::SH_IFDEF, std::move (l)); } #else static symbol_type - make_HSIFDEF (const location_type& l) + make_SH_IFDEF (const location_type& l) { - return symbol_type (token::HSIFDEF, l); + return symbol_type (token::SH_IFDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFNDEF (location_type l) + make_SH_IFNDEF (location_type l) { - return symbol_type (token::HSIFNDEF, std::move (l)); + return symbol_type (token::SH_IFNDEF, std::move (l)); } #else static symbol_type - make_HSIFNDEF (const location_type& l) + make_SH_IFNDEF (const location_type& l) { - return symbol_type (token::HSIFNDEF, l); + return symbol_type (token::SH_IFNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIF (location_type l) + make_SH_IF (location_type l) { - return symbol_type (token::HSIF, std::move (l)); + return symbol_type (token::SH_IF, std::move (l)); } #else static symbol_type - make_HSIF (const location_type& l) + make_SH_IF (const location_type& l) { - return symbol_type (token::HSIF, l); + return symbol_type (token::SH_IF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELIF (location_type l) + make_SH_ELIF (location_type l) { - return symbol_type (token::HSELIF, std::move (l)); + return symbol_type (token::SH_ELIF, std::move (l)); } #else static symbol_type - make_HSELIF (const location_type& l) + make_SH_ELIF (const location_type& l) { - return symbol_type (token::HSELIF, l); + return symbol_type (token::SH_ELIF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELSE (location_type l) + make_SH_ELSE (location_type l) { - return symbol_type (token::HSELSE, std::move (l)); + return symbol_type (token::SH_ELSE, std::move (l)); } #else static symbol_type - make_HSELSE (const location_type& l) + make_SH_ELSE (const location_type& l) { - return symbol_type (token::HSELSE, l); + return symbol_type (token::SH_ELSE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSENDIF (location_type l) + make_SH_ENDIF (location_type l) { - return symbol_type (token::HSENDIF, std::move (l)); + return symbol_type (token::SH_ENDIF, std::move (l)); } #else static symbol_type - make_HSENDIF (const location_type& l) + make_SH_ENDIF (const location_type& l) { - return symbol_type (token::HSENDIF, l); + return symbol_type (token::SH_ENDIF, l); } #endif #if 201103L <= YY_CPLUSPLUS diff --git a/src/iw7/xsk/lexer.cpp b/src/iw7/xsk/lexer.cpp index 773c8136..6a43c714 100644 --- a/src/iw7/xsk/lexer.cpp +++ b/src/iw7/xsk/lexer.cpp @@ -16,6 +16,55 @@ xsk::gsc::iw7::parser::symbol_type IW7lex(xsk::gsc::iw7::lexer& lexer) namespace xsk::gsc::iw7 { +const std::unordered_map keyword_map +{{ + { "#define", parser::token::SH_DEFINE }, + { "#undef", parser::token::SH_UNDEF }, + { "#ifdef", parser::token::SH_IFDEF }, + { "#ifndef", parser::token::SH_IFNDEF }, + { "#if", parser::token::SH_IF }, + { "#elif", parser::token::SH_ELIF }, + { "#else", parser::token::SH_ELSE }, + { "#endif", parser::token::SH_ENDIF }, + { "#inline", parser::token::INLINE }, + { "#include", parser::token::INCLUDE }, + { "#using_animtree", parser::token::USINGTREE }, + { "#animtree", parser::token::ANIMTREE }, + { "endon", parser::token::ENDON }, + { "notify", parser::token::NOTIFY }, + { "wait", parser::token::WAIT }, + { "waittill", parser::token::WAITTILL }, + { "waittillmatch", parser::token::WAITTILLMATCH }, + { "waittillframeend", parser::token::WAITTILLFRAMEEND }, + { "if", parser::token::IF }, + { "else", parser::token::ELSE }, + { "do", parser::token::DO }, + { "while", parser::token::WHILE }, + { "for", parser::token::FOR }, + { "foreach", parser::token::FOREACH }, + { "in", parser::token::IN }, + { "switch", parser::token::SWITCH }, + { "case", parser::token::CASE }, + { "default", parser::token::DEFAULT }, + { "break", parser::token::BREAK }, + { "continue", parser::token::CONTINUE }, + { "return", parser::token::RETURN }, + { "breakpoint", parser::token::BREAKPOINT }, + { "prof_begin", parser::token::PROFBEGIN }, + { "prof_end", parser::token::PROFEND }, + { "thread", parser::token::THREAD }, + { "childthread", parser::token::CHILDTHREAD }, + { "thisthread", parser::token::THISTHREAD }, + { "call", parser::token::CALL }, + { "true", parser::token::TRUE }, + { "false", parser::token::FALSE }, + { "undefined", parser::token::UNDEFINED }, + { "game", parser::token::GAME }, + { "self", parser::token::SELF }, + { "anim", parser::token::ANIM }, + { "level", parser::token::LEVEL }, +}}; + buffer::buffer() : length(0) { data = static_cast(std::malloc(max_buf_size)); @@ -35,10 +84,8 @@ bool buffer::push(char c) return true; } -reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) -{ - -} +reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), + last_byte(0), current_byte(0) { } void reader::init(const char* data, size_t size) { @@ -78,7 +125,7 @@ void reader::advance() } } -lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), +lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)), mode_(build::dev), header_top_(0), locs_(std::stack()), readers_(std::stack()) { reader_.init(data, size); @@ -97,6 +144,7 @@ void lexer::push_header(const std::string& file) locs_.push(loc_); loc_.initialize(std::get<0>(data)); reader_.init(std::get<1>(data), std::get<2>(data)); + clean_ = true; } catch (const std::exception& e) { @@ -145,7 +193,10 @@ auto lexer::lex() -> parser::symbol_type return parser::make_IW7EOF(loc_); } - reader_.advance(); + if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n') + clean_ = false; + + advance(); switch (last) { @@ -157,12 +208,15 @@ auto lexer::lex() -> parser::symbol_type case '\n': loc_.lines(); loc_.step(); + clean_ = true; continue; + case '\\': + throw comp_error(loc_, "invalid token ('\\')"); case '/': if (curr != '/' && curr != '*' && curr != '#' && curr != '=') return parser::make_DIV(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_DIV(loc_); @@ -224,7 +278,30 @@ auto lexer::lex() -> parser::symbol_type { while (true) { - if (state == reader::end || curr == '\n') + if (state == reader::end) + break; + + if (last == '\\' && curr == '\r' || curr == '\n') + { + reader_.advance(); + + if (state == reader::end) + break; + + if (last == '\r') + { + if (curr != '\n') + throw comp_error(loc_, "invalid token ('\')"); + + reader_.advance(); + } + + loc_.lines(); + loc_.step(); + continue; + } + + if (curr == '\n') break; reader_.advance(); @@ -238,15 +315,23 @@ auto lexer::lex() -> parser::symbol_type throw comp_error(loc_, "unmatched devblock end ('#/')"); indev_ = false; - reader_.advance(); + advance(); return parser::make_DEVEND(loc_); } buffer_.push(last); - reader_.advance(); + advance(); + + while (state == reader::ok) + { + if (last != ' ' || last != '\t') + break; + + advance(); + } if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) - throw comp_error(loc_, "unterminated preprocessor directive ('#')"); + throw comp_error(loc_, "invalid preprocessor directive ('#')"); state_ = state::preprocessor; goto lex_name; @@ -254,7 +339,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '/' && curr != '=') return parser::make_MUL(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_MUL(loc_); @@ -264,9 +349,9 @@ auto lexer::lex() -> parser::symbol_type state_ = state::string; goto lex_string; case '.': - reader_.advance(); + advance(); - if(state == reader::end) + if (state == reader::end) throw comp_error(loc_, "unterminated field ('.')"); state_ = state::field; @@ -291,7 +376,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != ':') return parser::make_COLON(loc_); - reader_.advance(); + advance(); return parser::make_DOUBLECOLON(loc_); case '?': return parser::make_QMARK(loc_); @@ -299,13 +384,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_ASSIGN(loc_); - reader_.advance(); + advance(); return parser::make_EQUALITY(loc_); case '+': if (curr != '+' && curr != '=') return parser::make_ADD(loc_); - reader_.advance(); + advance(); if (last == '+') return parser::make_INCREMENT(loc_); @@ -315,7 +400,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '-' && curr != '=') return parser::make_SUB(loc_); - reader_.advance(); + advance(); if (last == '-') return parser::make_DECREMENT(loc_); @@ -325,24 +410,24 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_MOD(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_MOD(loc_); case '|': if (curr != '|' && curr != '=') return parser::make_BITWISE_OR(loc_); - reader_.advance(); + advance(); if (last == '|') return parser::make_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_); case '&': - if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') + if (curr != '&' && curr != '=' && curr != '"') return parser::make_BITWISE_AND(loc_); - reader_.advance(); + advance(); if (last == '&') return parser::make_AND(loc_); @@ -356,13 +441,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_BITWISE_EXOR(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_BW_EXOR(loc_); case '!': if (curr != '=') return parser::make_NOT(loc_); - reader_.advance(); + advance(); return parser::make_INEQUALITY(loc_); case '~': return parser::make_COMPLEMENT(loc_); @@ -370,20 +455,20 @@ auto lexer::lex() -> parser::symbol_type if (curr != '<' && curr != '=') return parser::make_LESS(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_LESS_EQUAL(loc_); if (curr != '=') return parser::make_LSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_LSHIFT(loc_); case '>': if (curr != '>' && curr != '=') return parser::make_GREATER(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_GREATER_EQUAL(loc_); @@ -391,7 +476,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_RSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_RSHIFT(loc_); default: lex_name_or_number: @@ -404,24 +489,27 @@ lex_name_or_number: } lex_string: - if (state == reader::end) - throw comp_error(loc_, "unmatched string start ('\"')"); - - reader_.advance(); - while (true) { - if (last == '"') - break; - - if (last == '\n') - throw comp_error(loc_, "unterminated string literal"); - if (state == reader::end) throw comp_error(loc_, "unmatched string start ('\"')"); - if (last == '\\') + if (curr == '"') { + advance(); + break; + } + + if (curr == '\n') + throw comp_error(loc_, "unterminated string literal"); + + if (curr == '\\') + { + advance(); + + if (state == reader::end) + throw comp_error(loc_, "invalid token ('\')"); + char c = curr; switch (curr) { @@ -435,13 +523,11 @@ lex_string: if (!buffer_.push(c)) throw comp_error(loc_, "max string size exceeded"); - - reader_.advance(); } - else if (!buffer_.push(last)) + else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if (state_ == state::localize) @@ -472,7 +558,7 @@ lex_name: else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if(state_ == state::field) @@ -491,24 +577,23 @@ lex_name: { if (path) throw comp_error(loc_, "invalid preprocessor directive"); - + auto token = parser::token::IW7UNDEF; if (buffer_.length < 16) { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) { - if (itr->second > parser::token::HSENDIF) + if (itr->second > parser::token::SH_ENDIF) return parser::symbol_type(itr->second, loc_); - + token = itr->second; } } - // TODO: call preprocessor(token); - throw comp_error(loc_, "unknown preprocessor directive"); + preprocessor(token); state_ = state::start; continue; } @@ -518,7 +603,7 @@ lex_name: { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) return parser::symbol_type(itr->second, loc_); } @@ -557,7 +642,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -571,7 +656,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'') @@ -587,7 +672,7 @@ lex_number: } else if (curr == 'o') { - reader_.advance(); + advance(); while (true) { @@ -599,7 +684,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -609,7 +694,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length <= 0) @@ -621,7 +706,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -633,7 +718,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -643,7 +728,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -655,7 +740,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -667,7 +752,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -677,7 +762,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -689,53 +774,89 @@ lex_number: } } -const std::unordered_map lexer::keyword_map -{{ - { "#define", parser::token::HSDEFINE }, - { "#undef", parser::token::HSUNDEF }, - { "#ifdef", parser::token::HSIFDEF }, - { "#ifndef", parser::token::HSIFNDEF }, - { "#if", parser::token::HSIF }, - { "#elif", parser::token::HSELIF }, - { "#else", parser::token::HSELSE }, - { "#endif", parser::token::HSENDIF }, - { "#inline", parser::token::INLINE }, - { "#include", parser::token::INCLUDE }, - { "#using_animtree", parser::token::USINGTREE }, - { "#animtree", parser::token::ANIMTREE }, - { "endon", parser::token::ENDON }, - { "notify", parser::token::NOTIFY }, - { "wait", parser::token::WAIT }, - { "waittill", parser::token::WAITTILL }, - { "waittillmatch", parser::token::WAITTILLMATCH }, - { "waittillframeend", parser::token::WAITTILLFRAMEEND }, - { "if", parser::token::IF }, - { "else", parser::token::ELSE }, - { "do", parser::token::DO }, - { "while", parser::token::WHILE }, - { "for", parser::token::FOR }, - { "foreach", parser::token::FOREACH }, - { "in", parser::token::IN }, - { "switch", parser::token::SWITCH }, - { "case", parser::token::CASE }, - { "default", parser::token::DEFAULT }, - { "break", parser::token::BREAK }, - { "continue", parser::token::CONTINUE }, - { "return", parser::token::RETURN }, - { "breakpoint", parser::token::BREAKPOINT }, - { "prof_begin", parser::token::PROFBEGIN }, - { "prof_end", parser::token::PROFEND }, - { "thread", parser::token::THREAD }, - { "childthread", parser::token::CHILDTHREAD }, - { "thisthread", parser::token::THISTHREAD }, - { "call", parser::token::CALL }, - { "true", parser::token::TRUE }, - { "false", parser::token::FALSE }, - { "undefined", parser::token::UNDEFINED }, - { "game", parser::token::GAME }, - { "self", parser::token::SELF }, - { "anim", parser::token::ANIM }, - { "level", parser::token::LEVEL }, -}}; +void lexer::advance() +{ + reader_.advance(); + + // dont wrap comment marks '/\/' '/\*' outside strings + if (state_ == state::start && reader_.last_byte == '/') + return; + + while (reader_.current_byte == '\\') + { + if (reader_.bytes_remaining == 1) + throw comp_error(loc_, "invalid token ('\\')"); + + if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n') + break; + + if (reader_.buffer_pos[1] == '\r') + { + if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n') + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 3; + reader_.bytes_remaining -= 3; + } + + if ((reader_.buffer_pos[1] == '\n')) + { + if (reader_.bytes_remaining == 2) + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 2; + reader_.bytes_remaining -= 2; + } + + if (reader_.bytes_remaining == 0) + { + reader_.state = reader::end; + reader_.current_byte = 0; + } + else + { + reader_.current_byte = *reader_.buffer_pos; + } + + loc_.lines(); + loc_.step(); + } +} + +void lexer::preprocessor(parser::token::token_kind_type token) +{ + if (!clean_) + throw comp_error(loc_, "invalid token ('#')"); + + switch (token) + { + case parser::token::SH_DEFINE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_UNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELSE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ENDIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + default: + throw comp_error(loc_, "unknown preprocessor directive"); + } +} } // namespace xsk::gsc::iw7 diff --git a/src/iw7/xsk/lexer.hpp b/src/iw7/xsk/lexer.hpp index 3ac25c49..6fd768a8 100644 --- a/src/iw7/xsk/lexer.hpp +++ b/src/iw7/xsk/lexer.hpp @@ -5,8 +5,6 @@ #pragma once -#include "iw7.hpp" - namespace xsk::gsc::iw7 { @@ -57,6 +55,7 @@ class lexer state state_; build mode_; bool indev_; + bool clean_; public: lexer(const std::string& name, const char* data, size_t size); @@ -66,7 +65,8 @@ public: void ban_header(const location& loc); private: - static const std::unordered_map keyword_map; + void advance(); + void preprocessor(parser::token::token_kind_type token); }; } // namespace xsk::gsc::iw7 diff --git a/src/iw7/xsk/parser.hpp b/src/iw7/xsk/parser.hpp index c91192f7..b362f84f 100644 --- a/src/iw7/xsk/parser.hpp +++ b/src/iw7/xsk/parser.hpp @@ -674,14 +674,14 @@ namespace xsk { namespace gsc { namespace iw7 { IW7EOF = 0, // "end of file" IW7error = 1, // error IW7UNDEF = 2, // "invalid token" - HSDEFINE = 3, // "#define" - HSUNDEF = 4, // "#undef" - HSIFDEF = 5, // "#ifdef" - HSIFNDEF = 6, // "#ifndef" - HSIF = 7, // "#if" - HSELIF = 8, // "#elif" - HSELSE = 9, // "#else" - HSENDIF = 10, // "#endif" + SH_DEFINE = 3, // "#define" + SH_UNDEF = 4, // "#undef" + SH_IFDEF = 5, // "#ifdef" + SH_IFNDEF = 6, // "#ifndef" + SH_IF = 7, // "#if" + SH_ELIF = 8, // "#elif" + SH_ELSE = 9, // "#else" + SH_ENDIF = 10, // "#endif" DEVBEGIN = 11, // "/#" DEVEND = 12, // "#/" INLINE = 13, // "#inline" @@ -805,14 +805,14 @@ namespace xsk { namespace gsc { namespace iw7 { S_YYEOF = 0, // "end of file" S_YYerror = 1, // error S_YYUNDEF = 2, // "invalid token" - S_HSDEFINE = 3, // "#define" - S_HSUNDEF = 4, // "#undef" - S_HSIFDEF = 5, // "#ifdef" - S_HSIFNDEF = 6, // "#ifndef" - S_HSIF = 7, // "#if" - S_HSELIF = 8, // "#elif" - S_HSELSE = 9, // "#else" - S_HSENDIF = 10, // "#endif" + S_SH_DEFINE = 3, // "#define" + S_SH_UNDEF = 4, // "#undef" + S_SH_IFDEF = 5, // "#ifdef" + S_SH_IFNDEF = 6, // "#ifndef" + S_SH_IF = 7, // "#if" + S_SH_ELIF = 8, // "#elif" + S_SH_ELSE = 9, // "#else" + S_SH_ENDIF = 10, // "#endif" S_DEVBEGIN = 11, // "/#" S_DEVEND = 12, // "#/" S_INLINE = 13, // "#inline" @@ -2770,121 +2770,121 @@ switch (yykind) #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSDEFINE (location_type l) + make_SH_DEFINE (location_type l) { - return symbol_type (token::HSDEFINE, std::move (l)); + return symbol_type (token::SH_DEFINE, std::move (l)); } #else static symbol_type - make_HSDEFINE (const location_type& l) + make_SH_DEFINE (const location_type& l) { - return symbol_type (token::HSDEFINE, l); + return symbol_type (token::SH_DEFINE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSUNDEF (location_type l) + make_SH_UNDEF (location_type l) { - return symbol_type (token::HSUNDEF, std::move (l)); + return symbol_type (token::SH_UNDEF, std::move (l)); } #else static symbol_type - make_HSUNDEF (const location_type& l) + make_SH_UNDEF (const location_type& l) { - return symbol_type (token::HSUNDEF, l); + return symbol_type (token::SH_UNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFDEF (location_type l) + make_SH_IFDEF (location_type l) { - return symbol_type (token::HSIFDEF, std::move (l)); + return symbol_type (token::SH_IFDEF, std::move (l)); } #else static symbol_type - make_HSIFDEF (const location_type& l) + make_SH_IFDEF (const location_type& l) { - return symbol_type (token::HSIFDEF, l); + return symbol_type (token::SH_IFDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFNDEF (location_type l) + make_SH_IFNDEF (location_type l) { - return symbol_type (token::HSIFNDEF, std::move (l)); + return symbol_type (token::SH_IFNDEF, std::move (l)); } #else static symbol_type - make_HSIFNDEF (const location_type& l) + make_SH_IFNDEF (const location_type& l) { - return symbol_type (token::HSIFNDEF, l); + return symbol_type (token::SH_IFNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIF (location_type l) + make_SH_IF (location_type l) { - return symbol_type (token::HSIF, std::move (l)); + return symbol_type (token::SH_IF, std::move (l)); } #else static symbol_type - make_HSIF (const location_type& l) + make_SH_IF (const location_type& l) { - return symbol_type (token::HSIF, l); + return symbol_type (token::SH_IF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELIF (location_type l) + make_SH_ELIF (location_type l) { - return symbol_type (token::HSELIF, std::move (l)); + return symbol_type (token::SH_ELIF, std::move (l)); } #else static symbol_type - make_HSELIF (const location_type& l) + make_SH_ELIF (const location_type& l) { - return symbol_type (token::HSELIF, l); + return symbol_type (token::SH_ELIF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELSE (location_type l) + make_SH_ELSE (location_type l) { - return symbol_type (token::HSELSE, std::move (l)); + return symbol_type (token::SH_ELSE, std::move (l)); } #else static symbol_type - make_HSELSE (const location_type& l) + make_SH_ELSE (const location_type& l) { - return symbol_type (token::HSELSE, l); + return symbol_type (token::SH_ELSE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSENDIF (location_type l) + make_SH_ENDIF (location_type l) { - return symbol_type (token::HSENDIF, std::move (l)); + return symbol_type (token::SH_ENDIF, std::move (l)); } #else static symbol_type - make_HSENDIF (const location_type& l) + make_SH_ENDIF (const location_type& l) { - return symbol_type (token::HSENDIF, l); + return symbol_type (token::SH_ENDIF, l); } #endif #if 201103L <= YY_CPLUSPLUS diff --git a/src/iw8/xsk/lexer.cpp b/src/iw8/xsk/lexer.cpp index 333af505..d9db82f2 100644 --- a/src/iw8/xsk/lexer.cpp +++ b/src/iw8/xsk/lexer.cpp @@ -16,6 +16,58 @@ xsk::gsc::iw8::parser::symbol_type IW8lex(xsk::gsc::iw8::lexer& lexer) namespace xsk::gsc::iw8 { +const std::unordered_map keyword_map +{{ + { "#define", parser::token::SH_DEFINE }, + { "#undef", parser::token::SH_UNDEF }, + { "#ifdef", parser::token::SH_IFDEF }, + { "#ifndef", parser::token::SH_IFNDEF }, + { "#if", parser::token::SH_IF }, + { "#elif", parser::token::SH_ELIF }, + { "#else", parser::token::SH_ELSE }, + { "#endif", parser::token::SH_ENDIF }, + { "#inline", parser::token::INLINE }, + { "#include", parser::token::INCLUDE }, + { "#using_animtree", parser::token::USINGTREE }, + { "#animtree", parser::token::ANIMTREE }, + { "endon", parser::token::ENDON }, + { "notify", parser::token::NOTIFY }, + { "wait", parser::token::WAIT }, + { "waittill", parser::token::WAITTILL }, + { "waittillmatch", parser::token::WAITTILLMATCH }, + { "waittillframeend", parser::token::WAITTILLFRAMEEND }, + { "waitframe", parser::token::WAITFRAME }, + { "if", parser::token::IF }, + { "else", parser::token::ELSE }, + { "do", parser::token::DO }, + { "while", parser::token::WHILE }, + { "for", parser::token::FOR }, + { "foreach", parser::token::FOREACH }, + { "in", parser::token::IN }, + { "switch", parser::token::SWITCH }, + { "case", parser::token::CASE }, + { "default", parser::token::DEFAULT }, + { "break", parser::token::BREAK }, + { "continue", parser::token::CONTINUE }, + { "return", parser::token::RETURN }, + { "breakpoint", parser::token::BREAKPOINT }, + { "prof_begin", parser::token::PROFBEGIN }, + { "prof_end", parser::token::PROFEND }, + { "thread", parser::token::THREAD }, + { "childthread", parser::token::CHILDTHREAD }, + { "thisthread", parser::token::THISTHREAD }, + { "call", parser::token::CALL }, + { "true", parser::token::TRUE }, + { "false", parser::token::FALSE }, + { "undefined", parser::token::UNDEFINED }, + { "game", parser::token::GAME }, + { "self", parser::token::SELF }, + { "anim", parser::token::ANIM }, + { "level", parser::token::LEVEL }, + { "isdefined", parser::token::ISDEFINED }, + { "istrue", parser::token::ISTRUE }, +}}; + buffer::buffer() : length(0) { data = static_cast(std::malloc(max_buf_size)); @@ -35,10 +87,8 @@ bool buffer::push(char c) return true; } -reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) -{ - -} +reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), + last_byte(0), current_byte(0) { } void reader::init(const char* data, size_t size) { @@ -78,7 +128,7 @@ void reader::advance() } } -lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), +lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)), mode_(build::dev), header_top_(0), locs_(std::stack()), readers_(std::stack()) { reader_.init(data, size); @@ -97,6 +147,7 @@ void lexer::push_header(const std::string& file) locs_.push(loc_); loc_.initialize(std::get<0>(data)); reader_.init(std::get<1>(data), std::get<2>(data)); + clean_ = true; } catch (const std::exception& e) { @@ -145,7 +196,10 @@ auto lexer::lex() -> parser::symbol_type return parser::make_IW8EOF(loc_); } - reader_.advance(); + if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n') + clean_ = false; + + advance(); switch (last) { @@ -157,12 +211,15 @@ auto lexer::lex() -> parser::symbol_type case '\n': loc_.lines(); loc_.step(); + clean_ = true; continue; + case '\\': + throw comp_error(loc_, "invalid token ('\\')"); case '/': if (curr != '/' && curr != '*' && curr != '#' && curr != '=') return parser::make_DIV(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_DIV(loc_); @@ -224,7 +281,30 @@ auto lexer::lex() -> parser::symbol_type { while (true) { - if (state == reader::end || curr == '\n') + if (state == reader::end) + break; + + if (last == '\\' && curr == '\r' || curr == '\n') + { + reader_.advance(); + + if (state == reader::end) + break; + + if (last == '\r') + { + if (curr != '\n') + throw comp_error(loc_, "invalid token ('\')"); + + reader_.advance(); + } + + loc_.lines(); + loc_.step(); + continue; + } + + if (curr == '\n') break; reader_.advance(); @@ -238,15 +318,23 @@ auto lexer::lex() -> parser::symbol_type throw comp_error(loc_, "unmatched devblock end ('#/')"); indev_ = false; - reader_.advance(); + advance(); return parser::make_DEVEND(loc_); } buffer_.push(last); - reader_.advance(); + advance(); + + while (state == reader::ok) + { + if (last != ' ' || last != '\t') + break; + + advance(); + } if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) - throw comp_error(loc_, "unterminated preprocessor directive ('#')"); + throw comp_error(loc_, "invalid preprocessor directive ('#')"); state_ = state::preprocessor; goto lex_name; @@ -254,7 +342,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '/' && curr != '=') return parser::make_MUL(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_MUL(loc_); @@ -264,9 +352,9 @@ auto lexer::lex() -> parser::symbol_type state_ = state::string; goto lex_string; case '.': - reader_.advance(); + advance(); - if(state == reader::end) + if (state == reader::end) throw comp_error(loc_, "unterminated field ('.')"); state_ = state::field; @@ -291,7 +379,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != ':') return parser::make_COLON(loc_); - reader_.advance(); + advance(); return parser::make_DOUBLECOLON(loc_); case '?': return parser::make_QMARK(loc_); @@ -299,13 +387,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_ASSIGN(loc_); - reader_.advance(); + advance(); return parser::make_EQUALITY(loc_); case '+': if (curr != '+' && curr != '=') return parser::make_ADD(loc_); - reader_.advance(); + advance(); if (last == '+') return parser::make_INCREMENT(loc_); @@ -315,7 +403,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '-' && curr != '=') return parser::make_SUB(loc_); - reader_.advance(); + advance(); if (last == '-') return parser::make_DECREMENT(loc_); @@ -325,24 +413,24 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_MOD(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_MOD(loc_); case '|': if (curr != '|' && curr != '=') return parser::make_BITWISE_OR(loc_); - reader_.advance(); + advance(); if (last == '|') return parser::make_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_); case '&': - if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') + if (curr != '&' && curr != '=' && curr != '"') return parser::make_BITWISE_AND(loc_); - reader_.advance(); + advance(); if (last == '&') return parser::make_AND(loc_); @@ -356,13 +444,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_BITWISE_EXOR(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_BW_EXOR(loc_); case '!': if (curr != '=') return parser::make_NOT(loc_); - reader_.advance(); + advance(); return parser::make_INEQUALITY(loc_); case '~': return parser::make_COMPLEMENT(loc_); @@ -370,20 +458,20 @@ auto lexer::lex() -> parser::symbol_type if (curr != '<' && curr != '=') return parser::make_LESS(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_LESS_EQUAL(loc_); if (curr != '=') return parser::make_LSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_LSHIFT(loc_); case '>': if (curr != '>' && curr != '=') return parser::make_GREATER(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_GREATER_EQUAL(loc_); @@ -391,7 +479,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_RSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_RSHIFT(loc_); default: lex_name_or_number: @@ -404,24 +492,27 @@ lex_name_or_number: } lex_string: - if (state == reader::end) - throw comp_error(loc_, "unmatched string start ('\"')"); - - reader_.advance(); - while (true) { - if (last == '"') - break; - - if (last == '\n') - throw comp_error(loc_, "unterminated string literal"); - if (state == reader::end) throw comp_error(loc_, "unmatched string start ('\"')"); - if (last == '\\') + if (curr == '"') { + advance(); + break; + } + + if (curr == '\n') + throw comp_error(loc_, "unterminated string literal"); + + if (curr == '\\') + { + advance(); + + if (state == reader::end) + throw comp_error(loc_, "invalid token ('\')"); + char c = curr; switch (curr) { @@ -435,13 +526,11 @@ lex_string: if (!buffer_.push(c)) throw comp_error(loc_, "max string size exceeded"); - - reader_.advance(); } - else if (!buffer_.push(last)) + else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if (state_ == state::localize) @@ -472,7 +561,7 @@ lex_name: else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if(state_ == state::field) @@ -491,24 +580,23 @@ lex_name: { if (path) throw comp_error(loc_, "invalid preprocessor directive"); - + auto token = parser::token::IW8UNDEF; if (buffer_.length < 16) { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) { - if (itr->second > parser::token::HSENDIF) + if (itr->second > parser::token::SH_ENDIF) return parser::symbol_type(itr->second, loc_); - + token = itr->second; } } - // TODO: call preprocessor(token); - throw comp_error(loc_, "unknown preprocessor directive"); + preprocessor(token); state_ = state::start; continue; } @@ -529,7 +617,7 @@ lex_name: { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) return parser::symbol_type(itr->second, loc_); } @@ -568,7 +656,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -582,7 +670,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'') @@ -598,7 +686,7 @@ lex_number: } else if (curr == 'o') { - reader_.advance(); + advance(); while (true) { @@ -610,7 +698,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -620,7 +708,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length <= 0) @@ -632,7 +720,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -644,7 +732,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -654,7 +742,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -666,7 +754,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -678,7 +766,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -688,7 +776,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -700,56 +788,89 @@ lex_number: } } -const std::unordered_map lexer::keyword_map -{{ - { "#define", parser::token::HSDEFINE }, - { "#undef", parser::token::HSUNDEF }, - { "#ifdef", parser::token::HSIFDEF }, - { "#ifndef", parser::token::HSIFNDEF }, - { "#if", parser::token::HSIF }, - { "#elif", parser::token::HSELIF }, - { "#else", parser::token::HSELSE }, - { "#endif", parser::token::HSENDIF }, - { "#inline", parser::token::INLINE }, - { "#include", parser::token::INCLUDE }, - { "#using_animtree", parser::token::USINGTREE }, - { "#animtree", parser::token::ANIMTREE }, - { "endon", parser::token::ENDON }, - { "notify", parser::token::NOTIFY }, - { "wait", parser::token::WAIT }, - { "waittill", parser::token::WAITTILL }, - { "waittillmatch", parser::token::WAITTILLMATCH }, - { "waittillframeend", parser::token::WAITTILLFRAMEEND }, - { "waitframe", parser::token::WAITFRAME }, - { "if", parser::token::IF }, - { "else", parser::token::ELSE }, - { "do", parser::token::DO }, - { "while", parser::token::WHILE }, - { "for", parser::token::FOR }, - { "foreach", parser::token::FOREACH }, - { "in", parser::token::IN }, - { "switch", parser::token::SWITCH }, - { "case", parser::token::CASE }, - { "default", parser::token::DEFAULT }, - { "break", parser::token::BREAK }, - { "continue", parser::token::CONTINUE }, - { "return", parser::token::RETURN }, - { "breakpoint", parser::token::BREAKPOINT }, - { "prof_begin", parser::token::PROFBEGIN }, - { "prof_end", parser::token::PROFEND }, - { "thread", parser::token::THREAD }, - { "childthread", parser::token::CHILDTHREAD }, - { "thisthread", parser::token::THISTHREAD }, - { "call", parser::token::CALL }, - { "true", parser::token::TRUE }, - { "false", parser::token::FALSE }, - { "undefined", parser::token::UNDEFINED }, - { "game", parser::token::GAME }, - { "self", parser::token::SELF }, - { "anim", parser::token::ANIM }, - { "level", parser::token::LEVEL }, - { "isdefined", parser::token::ISDEFINED }, - { "istrue", parser::token::ISTRUE }, -}}; +void lexer::advance() +{ + reader_.advance(); + + // dont wrap comment marks '/\/' '/\*' outside strings + if (state_ == state::start && reader_.last_byte == '/') + return; + + while (reader_.current_byte == '\\') + { + if (reader_.bytes_remaining == 1) + throw comp_error(loc_, "invalid token ('\\')"); + + if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n') + break; + + if (reader_.buffer_pos[1] == '\r') + { + if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n') + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 3; + reader_.bytes_remaining -= 3; + } + + if ((reader_.buffer_pos[1] == '\n')) + { + if (reader_.bytes_remaining == 2) + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 2; + reader_.bytes_remaining -= 2; + } + + if (reader_.bytes_remaining == 0) + { + reader_.state = reader::end; + reader_.current_byte = 0; + } + else + { + reader_.current_byte = *reader_.buffer_pos; + } + + loc_.lines(); + loc_.step(); + } +} + +void lexer::preprocessor(parser::token::token_kind_type token) +{ + if (!clean_) + throw comp_error(loc_, "invalid token ('#')"); + + switch (token) + { + case parser::token::SH_DEFINE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_UNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELSE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ENDIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + default: + throw comp_error(loc_, "unknown preprocessor directive"); + } +} } // namespace xsk::gsc::iw8 diff --git a/src/iw8/xsk/lexer.hpp b/src/iw8/xsk/lexer.hpp index 3e2a8183..3295f083 100644 --- a/src/iw8/xsk/lexer.hpp +++ b/src/iw8/xsk/lexer.hpp @@ -5,8 +5,6 @@ #pragma once -#include "iw8.hpp" - namespace xsk::gsc::iw8 { @@ -57,6 +55,7 @@ class lexer state state_; build mode_; bool indev_; + bool clean_; public: lexer(const std::string& name, const char* data, size_t size); @@ -66,7 +65,8 @@ public: void ban_header(const location& loc); private: - static const std::unordered_map keyword_map; + void advance(); + void preprocessor(parser::token::token_kind_type token); }; } // namespace xsk::gsc::iw8 diff --git a/src/iw8/xsk/parser.hpp b/src/iw8/xsk/parser.hpp index b8455412..f44072aa 100644 --- a/src/iw8/xsk/parser.hpp +++ b/src/iw8/xsk/parser.hpp @@ -683,14 +683,14 @@ namespace xsk { namespace gsc { namespace iw8 { IW8EOF = 0, // "end of file" IW8error = 1, // error IW8UNDEF = 2, // "invalid token" - HSDEFINE = 3, // "#define" - HSUNDEF = 4, // "#undef" - HSIFDEF = 5, // "#ifdef" - HSIFNDEF = 6, // "#ifndef" - HSIF = 7, // "#if" - HSELIF = 8, // "#elif" - HSELSE = 9, // "#else" - HSENDIF = 10, // "#endif" + SH_DEFINE = 3, // "#define" + SH_UNDEF = 4, // "#undef" + SH_IFDEF = 5, // "#ifdef" + SH_IFNDEF = 6, // "#ifndef" + SH_IF = 7, // "#if" + SH_ELIF = 8, // "#elif" + SH_ELSE = 9, // "#else" + SH_ENDIF = 10, // "#endif" DEVBEGIN = 11, // "/#" DEVEND = 12, // "#/" INLINE = 13, // "#inline" @@ -817,14 +817,14 @@ namespace xsk { namespace gsc { namespace iw8 { S_YYEOF = 0, // "end of file" S_YYerror = 1, // error S_YYUNDEF = 2, // "invalid token" - S_HSDEFINE = 3, // "#define" - S_HSUNDEF = 4, // "#undef" - S_HSIFDEF = 5, // "#ifdef" - S_HSIFNDEF = 6, // "#ifndef" - S_HSIF = 7, // "#if" - S_HSELIF = 8, // "#elif" - S_HSELSE = 9, // "#else" - S_HSENDIF = 10, // "#endif" + S_SH_DEFINE = 3, // "#define" + S_SH_UNDEF = 4, // "#undef" + S_SH_IFDEF = 5, // "#ifdef" + S_SH_IFNDEF = 6, // "#ifndef" + S_SH_IF = 7, // "#if" + S_SH_ELIF = 8, // "#elif" + S_SH_ELSE = 9, // "#else" + S_SH_ENDIF = 10, // "#endif" S_DEVBEGIN = 11, // "/#" S_DEVEND = 12, // "#/" S_INLINE = 13, // "#inline" @@ -2854,121 +2854,121 @@ switch (yykind) #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSDEFINE (location_type l) + make_SH_DEFINE (location_type l) { - return symbol_type (token::HSDEFINE, std::move (l)); + return symbol_type (token::SH_DEFINE, std::move (l)); } #else static symbol_type - make_HSDEFINE (const location_type& l) + make_SH_DEFINE (const location_type& l) { - return symbol_type (token::HSDEFINE, l); + return symbol_type (token::SH_DEFINE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSUNDEF (location_type l) + make_SH_UNDEF (location_type l) { - return symbol_type (token::HSUNDEF, std::move (l)); + return symbol_type (token::SH_UNDEF, std::move (l)); } #else static symbol_type - make_HSUNDEF (const location_type& l) + make_SH_UNDEF (const location_type& l) { - return symbol_type (token::HSUNDEF, l); + return symbol_type (token::SH_UNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFDEF (location_type l) + make_SH_IFDEF (location_type l) { - return symbol_type (token::HSIFDEF, std::move (l)); + return symbol_type (token::SH_IFDEF, std::move (l)); } #else static symbol_type - make_HSIFDEF (const location_type& l) + make_SH_IFDEF (const location_type& l) { - return symbol_type (token::HSIFDEF, l); + return symbol_type (token::SH_IFDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFNDEF (location_type l) + make_SH_IFNDEF (location_type l) { - return symbol_type (token::HSIFNDEF, std::move (l)); + return symbol_type (token::SH_IFNDEF, std::move (l)); } #else static symbol_type - make_HSIFNDEF (const location_type& l) + make_SH_IFNDEF (const location_type& l) { - return symbol_type (token::HSIFNDEF, l); + return symbol_type (token::SH_IFNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIF (location_type l) + make_SH_IF (location_type l) { - return symbol_type (token::HSIF, std::move (l)); + return symbol_type (token::SH_IF, std::move (l)); } #else static symbol_type - make_HSIF (const location_type& l) + make_SH_IF (const location_type& l) { - return symbol_type (token::HSIF, l); + return symbol_type (token::SH_IF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELIF (location_type l) + make_SH_ELIF (location_type l) { - return symbol_type (token::HSELIF, std::move (l)); + return symbol_type (token::SH_ELIF, std::move (l)); } #else static symbol_type - make_HSELIF (const location_type& l) + make_SH_ELIF (const location_type& l) { - return symbol_type (token::HSELIF, l); + return symbol_type (token::SH_ELIF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELSE (location_type l) + make_SH_ELSE (location_type l) { - return symbol_type (token::HSELSE, std::move (l)); + return symbol_type (token::SH_ELSE, std::move (l)); } #else static symbol_type - make_HSELSE (const location_type& l) + make_SH_ELSE (const location_type& l) { - return symbol_type (token::HSELSE, l); + return symbol_type (token::SH_ELSE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSENDIF (location_type l) + make_SH_ENDIF (location_type l) { - return symbol_type (token::HSENDIF, std::move (l)); + return symbol_type (token::SH_ENDIF, std::move (l)); } #else static symbol_type - make_HSENDIF (const location_type& l) + make_SH_ENDIF (const location_type& l) { - return symbol_type (token::HSENDIF, l); + return symbol_type (token::SH_ENDIF, l); } #endif #if 201103L <= YY_CPLUSPLUS diff --git a/src/s1/xsk/lexer.cpp b/src/s1/xsk/lexer.cpp index c9d6dcc8..9d336402 100644 --- a/src/s1/xsk/lexer.cpp +++ b/src/s1/xsk/lexer.cpp @@ -16,6 +16,56 @@ xsk::gsc::s1::parser::symbol_type S1lex(xsk::gsc::s1::lexer& lexer) namespace xsk::gsc::s1 { +const std::unordered_map keyword_map +{{ + { "#define", parser::token::SH_DEFINE }, + { "#undef", parser::token::SH_UNDEF }, + { "#ifdef", parser::token::SH_IFDEF }, + { "#ifndef", parser::token::SH_IFNDEF }, + { "#if", parser::token::SH_IF }, + { "#elif", parser::token::SH_ELIF }, + { "#else", parser::token::SH_ELSE }, + { "#endif", parser::token::SH_ENDIF }, + { "#inline", parser::token::INLINE }, + { "#include", parser::token::INCLUDE }, + { "#using_animtree", parser::token::USINGTREE }, + { "#animtree", parser::token::ANIMTREE }, + { "endon", parser::token::ENDON }, + { "notify", parser::token::NOTIFY }, + { "wait", parser::token::WAIT }, + { "waittill", parser::token::WAITTILL }, + { "waittillmatch", parser::token::WAITTILLMATCH }, + { "waittillframeend", parser::token::WAITTILLFRAMEEND }, + { "waitframe", parser::token::WAITFRAME }, + { "if", parser::token::IF }, + { "else", parser::token::ELSE }, + { "do", parser::token::DO }, + { "while", parser::token::WHILE }, + { "for", parser::token::FOR }, + { "foreach", parser::token::FOREACH }, + { "in", parser::token::IN }, + { "switch", parser::token::SWITCH }, + { "case", parser::token::CASE }, + { "default", parser::token::DEFAULT }, + { "break", parser::token::BREAK }, + { "continue", parser::token::CONTINUE }, + { "return", parser::token::RETURN }, + { "breakpoint", parser::token::BREAKPOINT }, + { "prof_begin", parser::token::PROFBEGIN }, + { "prof_end", parser::token::PROFEND }, + { "thread", parser::token::THREAD }, + { "childthread", parser::token::CHILDTHREAD }, + { "thisthread", parser::token::THISTHREAD }, + { "call", parser::token::CALL }, + { "true", parser::token::TRUE }, + { "false", parser::token::FALSE }, + { "undefined", parser::token::UNDEFINED }, + { "game", parser::token::GAME }, + { "self", parser::token::SELF }, + { "anim", parser::token::ANIM }, + { "level", parser::token::LEVEL }, +}}; + buffer::buffer() : length(0) { data = static_cast(std::malloc(max_buf_size)); @@ -35,10 +85,8 @@ bool buffer::push(char c) return true; } -reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) -{ - -} +reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), + last_byte(0), current_byte(0) { } void reader::init(const char* data, size_t size) { @@ -78,7 +126,7 @@ void reader::advance() } } -lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), +lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)), mode_(build::dev), header_top_(0), locs_(std::stack()), readers_(std::stack()) { reader_.init(data, size); @@ -97,6 +145,7 @@ void lexer::push_header(const std::string& file) locs_.push(loc_); loc_.initialize(std::get<0>(data)); reader_.init(std::get<1>(data), std::get<2>(data)); + clean_ = true; } catch (const std::exception& e) { @@ -145,7 +194,10 @@ auto lexer::lex() -> parser::symbol_type return parser::make_S1EOF(loc_); } - reader_.advance(); + if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n') + clean_ = false; + + advance(); switch (last) { @@ -157,12 +209,15 @@ auto lexer::lex() -> parser::symbol_type case '\n': loc_.lines(); loc_.step(); + clean_ = true; continue; + case '\\': + throw comp_error(loc_, "invalid token ('\\')"); case '/': if (curr != '/' && curr != '*' && curr != '#' && curr != '=') return parser::make_DIV(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_DIV(loc_); @@ -224,7 +279,30 @@ auto lexer::lex() -> parser::symbol_type { while (true) { - if (state == reader::end || curr == '\n') + if (state == reader::end) + break; + + if (last == '\\' && curr == '\r' || curr == '\n') + { + reader_.advance(); + + if (state == reader::end) + break; + + if (last == '\r') + { + if (curr != '\n') + throw comp_error(loc_, "invalid token ('\')"); + + reader_.advance(); + } + + loc_.lines(); + loc_.step(); + continue; + } + + if (curr == '\n') break; reader_.advance(); @@ -238,15 +316,23 @@ auto lexer::lex() -> parser::symbol_type throw comp_error(loc_, "unmatched devblock end ('#/')"); indev_ = false; - reader_.advance(); + advance(); return parser::make_DEVEND(loc_); } buffer_.push(last); - reader_.advance(); + advance(); + + while (state == reader::ok) + { + if (last != ' ' || last != '\t') + break; + + advance(); + } if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) - throw comp_error(loc_, "unterminated preprocessor directive ('#')"); + throw comp_error(loc_, "invalid preprocessor directive ('#')"); state_ = state::preprocessor; goto lex_name; @@ -254,7 +340,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '/' && curr != '=') return parser::make_MUL(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_MUL(loc_); @@ -264,9 +350,9 @@ auto lexer::lex() -> parser::symbol_type state_ = state::string; goto lex_string; case '.': - reader_.advance(); + advance(); - if(state == reader::end) + if (state == reader::end) throw comp_error(loc_, "unterminated field ('.')"); state_ = state::field; @@ -291,7 +377,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != ':') return parser::make_COLON(loc_); - reader_.advance(); + advance(); return parser::make_DOUBLECOLON(loc_); case '?': return parser::make_QMARK(loc_); @@ -299,13 +385,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_ASSIGN(loc_); - reader_.advance(); + advance(); return parser::make_EQUALITY(loc_); case '+': if (curr != '+' && curr != '=') return parser::make_ADD(loc_); - reader_.advance(); + advance(); if (last == '+') return parser::make_INCREMENT(loc_); @@ -315,7 +401,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '-' && curr != '=') return parser::make_SUB(loc_); - reader_.advance(); + advance(); if (last == '-') return parser::make_DECREMENT(loc_); @@ -325,24 +411,24 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_MOD(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_MOD(loc_); case '|': if (curr != '|' && curr != '=') return parser::make_BITWISE_OR(loc_); - reader_.advance(); + advance(); if (last == '|') return parser::make_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_); case '&': - if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') + if (curr != '&' && curr != '=' && curr != '"') return parser::make_BITWISE_AND(loc_); - reader_.advance(); + advance(); if (last == '&') return parser::make_AND(loc_); @@ -356,13 +442,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_BITWISE_EXOR(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_BW_EXOR(loc_); case '!': if (curr != '=') return parser::make_NOT(loc_); - reader_.advance(); + advance(); return parser::make_INEQUALITY(loc_); case '~': return parser::make_COMPLEMENT(loc_); @@ -370,20 +456,20 @@ auto lexer::lex() -> parser::symbol_type if (curr != '<' && curr != '=') return parser::make_LESS(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_LESS_EQUAL(loc_); if (curr != '=') return parser::make_LSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_LSHIFT(loc_); case '>': if (curr != '>' && curr != '=') return parser::make_GREATER(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_GREATER_EQUAL(loc_); @@ -391,7 +477,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_RSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_RSHIFT(loc_); default: lex_name_or_number: @@ -404,24 +490,27 @@ lex_name_or_number: } lex_string: - if (state == reader::end) - throw comp_error(loc_, "unmatched string start ('\"')"); - - reader_.advance(); - while (true) { - if (last == '"') - break; - - if (last == '\n') - throw comp_error(loc_, "unterminated string literal"); - if (state == reader::end) throw comp_error(loc_, "unmatched string start ('\"')"); - if (last == '\\') + if (curr == '"') { + advance(); + break; + } + + if (curr == '\n') + throw comp_error(loc_, "unterminated string literal"); + + if (curr == '\\') + { + advance(); + + if (state == reader::end) + throw comp_error(loc_, "invalid token ('\')"); + char c = curr; switch (curr) { @@ -435,13 +524,11 @@ lex_string: if (!buffer_.push(c)) throw comp_error(loc_, "max string size exceeded"); - - reader_.advance(); } - else if (!buffer_.push(last)) + else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if (state_ == state::localize) @@ -472,7 +559,7 @@ lex_name: else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if(state_ == state::field) @@ -491,24 +578,23 @@ lex_name: { if (path) throw comp_error(loc_, "invalid preprocessor directive"); - + auto token = parser::token::S1UNDEF; if (buffer_.length < 16) { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) { - if (itr->second > parser::token::HSENDIF) + if (itr->second > parser::token::SH_ENDIF) return parser::symbol_type(itr->second, loc_); - + token = itr->second; } } - // TODO: call preprocessor(token); - throw comp_error(loc_, "unknown preprocessor directive"); + preprocessor(token); state_ = state::start; continue; } @@ -518,7 +604,7 @@ lex_name: { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) return parser::symbol_type(itr->second, loc_); } @@ -557,7 +643,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -571,7 +657,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'') @@ -587,7 +673,7 @@ lex_number: } else if (curr == 'o') { - reader_.advance(); + advance(); while (true) { @@ -599,7 +685,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -609,7 +695,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length <= 0) @@ -621,7 +707,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -633,7 +719,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -643,7 +729,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -655,7 +741,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -667,7 +753,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -677,7 +763,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -689,54 +775,89 @@ lex_number: } } -const std::unordered_map lexer::keyword_map -{{ - { "#define", parser::token::HSDEFINE }, - { "#undef", parser::token::HSUNDEF }, - { "#ifdef", parser::token::HSIFDEF }, - { "#ifndef", parser::token::HSIFNDEF }, - { "#if", parser::token::HSIF }, - { "#elif", parser::token::HSELIF }, - { "#else", parser::token::HSELSE }, - { "#endif", parser::token::HSENDIF }, - { "#inline", parser::token::INLINE }, - { "#include", parser::token::INCLUDE }, - { "#using_animtree", parser::token::USINGTREE }, - { "#animtree", parser::token::ANIMTREE }, - { "endon", parser::token::ENDON }, - { "notify", parser::token::NOTIFY }, - { "wait", parser::token::WAIT }, - { "waittill", parser::token::WAITTILL }, - { "waittillmatch", parser::token::WAITTILLMATCH }, - { "waittillframeend", parser::token::WAITTILLFRAMEEND }, - { "waitframe", parser::token::WAITFRAME }, - { "if", parser::token::IF }, - { "else", parser::token::ELSE }, - { "do", parser::token::DO }, - { "while", parser::token::WHILE }, - { "for", parser::token::FOR }, - { "foreach", parser::token::FOREACH }, - { "in", parser::token::IN }, - { "switch", parser::token::SWITCH }, - { "case", parser::token::CASE }, - { "default", parser::token::DEFAULT }, - { "break", parser::token::BREAK }, - { "continue", parser::token::CONTINUE }, - { "return", parser::token::RETURN }, - { "breakpoint", parser::token::BREAKPOINT }, - { "prof_begin", parser::token::PROFBEGIN }, - { "prof_end", parser::token::PROFEND }, - { "thread", parser::token::THREAD }, - { "childthread", parser::token::CHILDTHREAD }, - { "thisthread", parser::token::THISTHREAD }, - { "call", parser::token::CALL }, - { "true", parser::token::TRUE }, - { "false", parser::token::FALSE }, - { "undefined", parser::token::UNDEFINED }, - { "game", parser::token::GAME }, - { "self", parser::token::SELF }, - { "anim", parser::token::ANIM }, - { "level", parser::token::LEVEL }, -}}; +void lexer::advance() +{ + reader_.advance(); + + // dont wrap comment marks '/\/' '/\*' outside strings + if (state_ == state::start && reader_.last_byte == '/') + return; + + while (reader_.current_byte == '\\') + { + if (reader_.bytes_remaining == 1) + throw comp_error(loc_, "invalid token ('\\')"); + + if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n') + break; + + if (reader_.buffer_pos[1] == '\r') + { + if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n') + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 3; + reader_.bytes_remaining -= 3; + } + + if ((reader_.buffer_pos[1] == '\n')) + { + if (reader_.bytes_remaining == 2) + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 2; + reader_.bytes_remaining -= 2; + } + + if (reader_.bytes_remaining == 0) + { + reader_.state = reader::end; + reader_.current_byte = 0; + } + else + { + reader_.current_byte = *reader_.buffer_pos; + } + + loc_.lines(); + loc_.step(); + } +} + +void lexer::preprocessor(parser::token::token_kind_type token) +{ + if (!clean_) + throw comp_error(loc_, "invalid token ('#')"); + + switch (token) + { + case parser::token::SH_DEFINE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_UNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELSE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ENDIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + default: + throw comp_error(loc_, "unknown preprocessor directive"); + } +} } // namespace xsk::gsc::s1 diff --git a/src/s1/xsk/lexer.hpp b/src/s1/xsk/lexer.hpp index 572607b7..34d4ddf8 100644 --- a/src/s1/xsk/lexer.hpp +++ b/src/s1/xsk/lexer.hpp @@ -5,8 +5,6 @@ #pragma once -#include "s1.hpp" - namespace xsk::gsc::s1 { @@ -57,6 +55,7 @@ class lexer state state_; build mode_; bool indev_; + bool clean_; public: lexer(const std::string& name, const char* data, size_t size); @@ -66,7 +65,8 @@ public: void ban_header(const location& loc); private: - static const std::unordered_map keyword_map; + void advance(); + void preprocessor(parser::token::token_kind_type token); }; } // namespace xsk::gsc::s1 diff --git a/src/s1/xsk/parser.hpp b/src/s1/xsk/parser.hpp index ec35a13f..74cbd907 100644 --- a/src/s1/xsk/parser.hpp +++ b/src/s1/xsk/parser.hpp @@ -677,14 +677,14 @@ namespace xsk { namespace gsc { namespace s1 { S1EOF = 0, // "end of file" S1error = 1, // error S1UNDEF = 2, // "invalid token" - HSDEFINE = 3, // "#define" - HSUNDEF = 4, // "#undef" - HSIFDEF = 5, // "#ifdef" - HSIFNDEF = 6, // "#ifndef" - HSIF = 7, // "#if" - HSELIF = 8, // "#elif" - HSELSE = 9, // "#else" - HSENDIF = 10, // "#endif" + SH_DEFINE = 3, // "#define" + SH_UNDEF = 4, // "#undef" + SH_IFDEF = 5, // "#ifdef" + SH_IFNDEF = 6, // "#ifndef" + SH_IF = 7, // "#if" + SH_ELIF = 8, // "#elif" + SH_ELSE = 9, // "#else" + SH_ENDIF = 10, // "#endif" DEVBEGIN = 11, // "/#" DEVEND = 12, // "#/" INLINE = 13, // "#inline" @@ -809,14 +809,14 @@ namespace xsk { namespace gsc { namespace s1 { S_YYEOF = 0, // "end of file" S_YYerror = 1, // error S_YYUNDEF = 2, // "invalid token" - S_HSDEFINE = 3, // "#define" - S_HSUNDEF = 4, // "#undef" - S_HSIFDEF = 5, // "#ifdef" - S_HSIFNDEF = 6, // "#ifndef" - S_HSIF = 7, // "#if" - S_HSELIF = 8, // "#elif" - S_HSELSE = 9, // "#else" - S_HSENDIF = 10, // "#endif" + S_SH_DEFINE = 3, // "#define" + S_SH_UNDEF = 4, // "#undef" + S_SH_IFDEF = 5, // "#ifdef" + S_SH_IFNDEF = 6, // "#ifndef" + S_SH_IF = 7, // "#if" + S_SH_ELIF = 8, // "#elif" + S_SH_ELSE = 9, // "#else" + S_SH_ENDIF = 10, // "#endif" S_DEVBEGIN = 11, // "/#" S_DEVEND = 12, // "#/" S_INLINE = 13, // "#inline" @@ -2798,121 +2798,121 @@ switch (yykind) #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSDEFINE (location_type l) + make_SH_DEFINE (location_type l) { - return symbol_type (token::HSDEFINE, std::move (l)); + return symbol_type (token::SH_DEFINE, std::move (l)); } #else static symbol_type - make_HSDEFINE (const location_type& l) + make_SH_DEFINE (const location_type& l) { - return symbol_type (token::HSDEFINE, l); + return symbol_type (token::SH_DEFINE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSUNDEF (location_type l) + make_SH_UNDEF (location_type l) { - return symbol_type (token::HSUNDEF, std::move (l)); + return symbol_type (token::SH_UNDEF, std::move (l)); } #else static symbol_type - make_HSUNDEF (const location_type& l) + make_SH_UNDEF (const location_type& l) { - return symbol_type (token::HSUNDEF, l); + return symbol_type (token::SH_UNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFDEF (location_type l) + make_SH_IFDEF (location_type l) { - return symbol_type (token::HSIFDEF, std::move (l)); + return symbol_type (token::SH_IFDEF, std::move (l)); } #else static symbol_type - make_HSIFDEF (const location_type& l) + make_SH_IFDEF (const location_type& l) { - return symbol_type (token::HSIFDEF, l); + return symbol_type (token::SH_IFDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFNDEF (location_type l) + make_SH_IFNDEF (location_type l) { - return symbol_type (token::HSIFNDEF, std::move (l)); + return symbol_type (token::SH_IFNDEF, std::move (l)); } #else static symbol_type - make_HSIFNDEF (const location_type& l) + make_SH_IFNDEF (const location_type& l) { - return symbol_type (token::HSIFNDEF, l); + return symbol_type (token::SH_IFNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIF (location_type l) + make_SH_IF (location_type l) { - return symbol_type (token::HSIF, std::move (l)); + return symbol_type (token::SH_IF, std::move (l)); } #else static symbol_type - make_HSIF (const location_type& l) + make_SH_IF (const location_type& l) { - return symbol_type (token::HSIF, l); + return symbol_type (token::SH_IF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELIF (location_type l) + make_SH_ELIF (location_type l) { - return symbol_type (token::HSELIF, std::move (l)); + return symbol_type (token::SH_ELIF, std::move (l)); } #else static symbol_type - make_HSELIF (const location_type& l) + make_SH_ELIF (const location_type& l) { - return symbol_type (token::HSELIF, l); + return symbol_type (token::SH_ELIF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELSE (location_type l) + make_SH_ELSE (location_type l) { - return symbol_type (token::HSELSE, std::move (l)); + return symbol_type (token::SH_ELSE, std::move (l)); } #else static symbol_type - make_HSELSE (const location_type& l) + make_SH_ELSE (const location_type& l) { - return symbol_type (token::HSELSE, l); + return symbol_type (token::SH_ELSE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSENDIF (location_type l) + make_SH_ENDIF (location_type l) { - return symbol_type (token::HSENDIF, std::move (l)); + return symbol_type (token::SH_ENDIF, std::move (l)); } #else static symbol_type - make_HSENDIF (const location_type& l) + make_SH_ENDIF (const location_type& l) { - return symbol_type (token::HSENDIF, l); + return symbol_type (token::SH_ENDIF, l); } #endif #if 201103L <= YY_CPLUSPLUS diff --git a/src/s2/xsk/lexer.cpp b/src/s2/xsk/lexer.cpp index c0a05704..8853ab41 100644 --- a/src/s2/xsk/lexer.cpp +++ b/src/s2/xsk/lexer.cpp @@ -16,6 +16,56 @@ xsk::gsc::s2::parser::symbol_type S2lex(xsk::gsc::s2::lexer& lexer) namespace xsk::gsc::s2 { +const std::unordered_map keyword_map +{{ + { "#define", parser::token::SH_DEFINE }, + { "#undef", parser::token::SH_UNDEF }, + { "#ifdef", parser::token::SH_IFDEF }, + { "#ifndef", parser::token::SH_IFNDEF }, + { "#if", parser::token::SH_IF }, + { "#elif", parser::token::SH_ELIF }, + { "#else", parser::token::SH_ELSE }, + { "#endif", parser::token::SH_ENDIF }, + { "#inline", parser::token::INLINE }, + { "#include", parser::token::INCLUDE }, + { "#using_animtree", parser::token::USINGTREE }, + { "#animtree", parser::token::ANIMTREE }, + { "endon", parser::token::ENDON }, + { "notify", parser::token::NOTIFY }, + { "wait", parser::token::WAIT }, + { "waittill", parser::token::WAITTILL }, + { "waittillmatch", parser::token::WAITTILLMATCH }, + { "waittillframeend", parser::token::WAITTILLFRAMEEND }, + { "waitframe", parser::token::WAITFRAME }, + { "if", parser::token::IF }, + { "else", parser::token::ELSE }, + { "do", parser::token::DO }, + { "while", parser::token::WHILE }, + { "for", parser::token::FOR }, + { "foreach", parser::token::FOREACH }, + { "in", parser::token::IN }, + { "switch", parser::token::SWITCH }, + { "case", parser::token::CASE }, + { "default", parser::token::DEFAULT }, + { "break", parser::token::BREAK }, + { "continue", parser::token::CONTINUE }, + { "return", parser::token::RETURN }, + { "breakpoint", parser::token::BREAKPOINT }, + { "prof_begin", parser::token::PROFBEGIN }, + { "prof_end", parser::token::PROFEND }, + { "thread", parser::token::THREAD }, + { "childthread", parser::token::CHILDTHREAD }, + { "thisthread", parser::token::THISTHREAD }, + { "call", parser::token::CALL }, + { "true", parser::token::TRUE }, + { "false", parser::token::FALSE }, + { "undefined", parser::token::UNDEFINED }, + { "game", parser::token::GAME }, + { "self", parser::token::SELF }, + { "anim", parser::token::ANIM }, + { "level", parser::token::LEVEL }, +}}; + buffer::buffer() : length(0) { data = static_cast(std::malloc(max_buf_size)); @@ -35,10 +85,8 @@ bool buffer::push(char c) return true; } -reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) -{ - -} +reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), + last_byte(0), current_byte(0) { } void reader::init(const char* data, size_t size) { @@ -78,7 +126,7 @@ void reader::advance() } } -lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), +lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)), mode_(build::dev), header_top_(0), locs_(std::stack()), readers_(std::stack()) { reader_.init(data, size); @@ -97,6 +145,7 @@ void lexer::push_header(const std::string& file) locs_.push(loc_); loc_.initialize(std::get<0>(data)); reader_.init(std::get<1>(data), std::get<2>(data)); + clean_ = true; } catch (const std::exception& e) { @@ -145,7 +194,10 @@ auto lexer::lex() -> parser::symbol_type return parser::make_S2EOF(loc_); } - reader_.advance(); + if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n') + clean_ = false; + + advance(); switch (last) { @@ -157,12 +209,15 @@ auto lexer::lex() -> parser::symbol_type case '\n': loc_.lines(); loc_.step(); + clean_ = true; continue; + case '\\': + throw comp_error(loc_, "invalid token ('\\')"); case '/': if (curr != '/' && curr != '*' && curr != '#' && curr != '=') return parser::make_DIV(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_DIV(loc_); @@ -224,7 +279,30 @@ auto lexer::lex() -> parser::symbol_type { while (true) { - if (state == reader::end || curr == '\n') + if (state == reader::end) + break; + + if (last == '\\' && curr == '\r' || curr == '\n') + { + reader_.advance(); + + if (state == reader::end) + break; + + if (last == '\r') + { + if (curr != '\n') + throw comp_error(loc_, "invalid token ('\')"); + + reader_.advance(); + } + + loc_.lines(); + loc_.step(); + continue; + } + + if (curr == '\n') break; reader_.advance(); @@ -238,15 +316,23 @@ auto lexer::lex() -> parser::symbol_type throw comp_error(loc_, "unmatched devblock end ('#/')"); indev_ = false; - reader_.advance(); + advance(); return parser::make_DEVEND(loc_); } buffer_.push(last); - reader_.advance(); + advance(); + + while (state == reader::ok) + { + if (last != ' ' || last != '\t') + break; + + advance(); + } if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) - throw comp_error(loc_, "unterminated preprocessor directive ('#')"); + throw comp_error(loc_, "invalid preprocessor directive ('#')"); state_ = state::preprocessor; goto lex_name; @@ -254,7 +340,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '/' && curr != '=') return parser::make_MUL(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_MUL(loc_); @@ -264,9 +350,9 @@ auto lexer::lex() -> parser::symbol_type state_ = state::string; goto lex_string; case '.': - reader_.advance(); + advance(); - if(state == reader::end) + if (state == reader::end) throw comp_error(loc_, "unterminated field ('.')"); state_ = state::field; @@ -291,7 +377,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != ':') return parser::make_COLON(loc_); - reader_.advance(); + advance(); return parser::make_DOUBLECOLON(loc_); case '?': return parser::make_QMARK(loc_); @@ -299,13 +385,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_ASSIGN(loc_); - reader_.advance(); + advance(); return parser::make_EQUALITY(loc_); case '+': if (curr != '+' && curr != '=') return parser::make_ADD(loc_); - reader_.advance(); + advance(); if (last == '+') return parser::make_INCREMENT(loc_); @@ -315,7 +401,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '-' && curr != '=') return parser::make_SUB(loc_); - reader_.advance(); + advance(); if (last == '-') return parser::make_DECREMENT(loc_); @@ -325,24 +411,24 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_MOD(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_MOD(loc_); case '|': if (curr != '|' && curr != '=') return parser::make_BITWISE_OR(loc_); - reader_.advance(); + advance(); if (last == '|') return parser::make_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_); case '&': - if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') + if (curr != '&' && curr != '=' && curr != '"') return parser::make_BITWISE_AND(loc_); - reader_.advance(); + advance(); if (last == '&') return parser::make_AND(loc_); @@ -356,13 +442,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_BITWISE_EXOR(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_BW_EXOR(loc_); case '!': if (curr != '=') return parser::make_NOT(loc_); - reader_.advance(); + advance(); return parser::make_INEQUALITY(loc_); case '~': return parser::make_COMPLEMENT(loc_); @@ -370,20 +456,20 @@ auto lexer::lex() -> parser::symbol_type if (curr != '<' && curr != '=') return parser::make_LESS(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_LESS_EQUAL(loc_); if (curr != '=') return parser::make_LSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_LSHIFT(loc_); case '>': if (curr != '>' && curr != '=') return parser::make_GREATER(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_GREATER_EQUAL(loc_); @@ -391,7 +477,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_RSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_RSHIFT(loc_); default: lex_name_or_number: @@ -404,24 +490,27 @@ lex_name_or_number: } lex_string: - if (state == reader::end) - throw comp_error(loc_, "unmatched string start ('\"')"); - - reader_.advance(); - while (true) { - if (last == '"') - break; - - if (last == '\n') - throw comp_error(loc_, "unterminated string literal"); - if (state == reader::end) throw comp_error(loc_, "unmatched string start ('\"')"); - if (last == '\\') + if (curr == '"') { + advance(); + break; + } + + if (curr == '\n') + throw comp_error(loc_, "unterminated string literal"); + + if (curr == '\\') + { + advance(); + + if (state == reader::end) + throw comp_error(loc_, "invalid token ('\')"); + char c = curr; switch (curr) { @@ -435,13 +524,11 @@ lex_string: if (!buffer_.push(c)) throw comp_error(loc_, "max string size exceeded"); - - reader_.advance(); } - else if (!buffer_.push(last)) + else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if (state_ == state::localize) @@ -472,7 +559,7 @@ lex_name: else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if(state_ == state::field) @@ -491,24 +578,23 @@ lex_name: { if (path) throw comp_error(loc_, "invalid preprocessor directive"); - + auto token = parser::token::S2UNDEF; if (buffer_.length < 16) { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) { - if (itr->second > parser::token::HSENDIF) + if (itr->second > parser::token::SH_ENDIF) return parser::symbol_type(itr->second, loc_); - + token = itr->second; } } - // TODO: call preprocessor(token); - throw comp_error(loc_, "unknown preprocessor directive"); + preprocessor(token); state_ = state::start; continue; } @@ -518,7 +604,7 @@ lex_name: { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) return parser::symbol_type(itr->second, loc_); } @@ -557,7 +643,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -571,7 +657,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'') @@ -587,7 +673,7 @@ lex_number: } else if (curr == 'o') { - reader_.advance(); + advance(); while (true) { @@ -599,7 +685,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -609,7 +695,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length <= 0) @@ -621,7 +707,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -633,7 +719,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -643,7 +729,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -655,7 +741,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -667,7 +753,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -677,7 +763,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -689,54 +775,89 @@ lex_number: } } -const std::unordered_map lexer::keyword_map -{{ - { "#define", parser::token::HSDEFINE }, - { "#undef", parser::token::HSUNDEF }, - { "#ifdef", parser::token::HSIFDEF }, - { "#ifndef", parser::token::HSIFNDEF }, - { "#if", parser::token::HSIF }, - { "#elif", parser::token::HSELIF }, - { "#else", parser::token::HSELSE }, - { "#endif", parser::token::HSENDIF }, - { "#inline", parser::token::INLINE }, - { "#include", parser::token::INCLUDE }, - { "#using_animtree", parser::token::USINGTREE }, - { "#animtree", parser::token::ANIMTREE }, - { "endon", parser::token::ENDON }, - { "notify", parser::token::NOTIFY }, - { "wait", parser::token::WAIT }, - { "waittill", parser::token::WAITTILL }, - { "waittillmatch", parser::token::WAITTILLMATCH }, - { "waittillframeend", parser::token::WAITTILLFRAMEEND }, - { "waitframe", parser::token::WAITFRAME }, - { "if", parser::token::IF }, - { "else", parser::token::ELSE }, - { "do", parser::token::DO }, - { "while", parser::token::WHILE }, - { "for", parser::token::FOR }, - { "foreach", parser::token::FOREACH }, - { "in", parser::token::IN }, - { "switch", parser::token::SWITCH }, - { "case", parser::token::CASE }, - { "default", parser::token::DEFAULT }, - { "break", parser::token::BREAK }, - { "continue", parser::token::CONTINUE }, - { "return", parser::token::RETURN }, - { "breakpoint", parser::token::BREAKPOINT }, - { "prof_begin", parser::token::PROFBEGIN }, - { "prof_end", parser::token::PROFEND }, - { "thread", parser::token::THREAD }, - { "childthread", parser::token::CHILDTHREAD }, - { "thisthread", parser::token::THISTHREAD }, - { "call", parser::token::CALL }, - { "true", parser::token::TRUE }, - { "false", parser::token::FALSE }, - { "undefined", parser::token::UNDEFINED }, - { "game", parser::token::GAME }, - { "self", parser::token::SELF }, - { "anim", parser::token::ANIM }, - { "level", parser::token::LEVEL }, -}}; +void lexer::advance() +{ + reader_.advance(); + + // dont wrap comment marks '/\/' '/\*' outside strings + if (state_ == state::start && reader_.last_byte == '/') + return; + + while (reader_.current_byte == '\\') + { + if (reader_.bytes_remaining == 1) + throw comp_error(loc_, "invalid token ('\\')"); + + if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n') + break; + + if (reader_.buffer_pos[1] == '\r') + { + if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n') + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 3; + reader_.bytes_remaining -= 3; + } + + if ((reader_.buffer_pos[1] == '\n')) + { + if (reader_.bytes_remaining == 2) + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 2; + reader_.bytes_remaining -= 2; + } + + if (reader_.bytes_remaining == 0) + { + reader_.state = reader::end; + reader_.current_byte = 0; + } + else + { + reader_.current_byte = *reader_.buffer_pos; + } + + loc_.lines(); + loc_.step(); + } +} + +void lexer::preprocessor(parser::token::token_kind_type token) +{ + if (!clean_) + throw comp_error(loc_, "invalid token ('#')"); + + switch (token) + { + case parser::token::SH_DEFINE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_UNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELSE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ENDIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + default: + throw comp_error(loc_, "unknown preprocessor directive"); + } +} } // namespace xsk::gsc::s2 diff --git a/src/s2/xsk/lexer.hpp b/src/s2/xsk/lexer.hpp index 72d13685..ae78cb40 100644 --- a/src/s2/xsk/lexer.hpp +++ b/src/s2/xsk/lexer.hpp @@ -5,8 +5,6 @@ #pragma once -#include "s2.hpp" - namespace xsk::gsc::s2 { @@ -57,6 +55,7 @@ class lexer state state_; build mode_; bool indev_; + bool clean_; public: lexer(const std::string& name, const char* data, size_t size); @@ -66,7 +65,8 @@ public: void ban_header(const location& loc); private: - static const std::unordered_map keyword_map; + void advance(); + void preprocessor(parser::token::token_kind_type token); }; } // namespace xsk::gsc::s2 diff --git a/src/s2/xsk/parser.hpp b/src/s2/xsk/parser.hpp index f90f21e4..16e8f624 100644 --- a/src/s2/xsk/parser.hpp +++ b/src/s2/xsk/parser.hpp @@ -677,14 +677,14 @@ namespace xsk { namespace gsc { namespace s2 { S2EOF = 0, // "end of file" S2error = 1, // error S2UNDEF = 2, // "invalid token" - HSDEFINE = 3, // "#define" - HSUNDEF = 4, // "#undef" - HSIFDEF = 5, // "#ifdef" - HSIFNDEF = 6, // "#ifndef" - HSIF = 7, // "#if" - HSELIF = 8, // "#elif" - HSELSE = 9, // "#else" - HSENDIF = 10, // "#endif" + SH_DEFINE = 3, // "#define" + SH_UNDEF = 4, // "#undef" + SH_IFDEF = 5, // "#ifdef" + SH_IFNDEF = 6, // "#ifndef" + SH_IF = 7, // "#if" + SH_ELIF = 8, // "#elif" + SH_ELSE = 9, // "#else" + SH_ENDIF = 10, // "#endif" DEVBEGIN = 11, // "/#" DEVEND = 12, // "#/" INLINE = 13, // "#inline" @@ -809,14 +809,14 @@ namespace xsk { namespace gsc { namespace s2 { S_YYEOF = 0, // "end of file" S_YYerror = 1, // error S_YYUNDEF = 2, // "invalid token" - S_HSDEFINE = 3, // "#define" - S_HSUNDEF = 4, // "#undef" - S_HSIFDEF = 5, // "#ifdef" - S_HSIFNDEF = 6, // "#ifndef" - S_HSIF = 7, // "#if" - S_HSELIF = 8, // "#elif" - S_HSELSE = 9, // "#else" - S_HSENDIF = 10, // "#endif" + S_SH_DEFINE = 3, // "#define" + S_SH_UNDEF = 4, // "#undef" + S_SH_IFDEF = 5, // "#ifdef" + S_SH_IFNDEF = 6, // "#ifndef" + S_SH_IF = 7, // "#if" + S_SH_ELIF = 8, // "#elif" + S_SH_ELSE = 9, // "#else" + S_SH_ENDIF = 10, // "#endif" S_DEVBEGIN = 11, // "/#" S_DEVEND = 12, // "#/" S_INLINE = 13, // "#inline" @@ -2798,121 +2798,121 @@ switch (yykind) #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSDEFINE (location_type l) + make_SH_DEFINE (location_type l) { - return symbol_type (token::HSDEFINE, std::move (l)); + return symbol_type (token::SH_DEFINE, std::move (l)); } #else static symbol_type - make_HSDEFINE (const location_type& l) + make_SH_DEFINE (const location_type& l) { - return symbol_type (token::HSDEFINE, l); + return symbol_type (token::SH_DEFINE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSUNDEF (location_type l) + make_SH_UNDEF (location_type l) { - return symbol_type (token::HSUNDEF, std::move (l)); + return symbol_type (token::SH_UNDEF, std::move (l)); } #else static symbol_type - make_HSUNDEF (const location_type& l) + make_SH_UNDEF (const location_type& l) { - return symbol_type (token::HSUNDEF, l); + return symbol_type (token::SH_UNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFDEF (location_type l) + make_SH_IFDEF (location_type l) { - return symbol_type (token::HSIFDEF, std::move (l)); + return symbol_type (token::SH_IFDEF, std::move (l)); } #else static symbol_type - make_HSIFDEF (const location_type& l) + make_SH_IFDEF (const location_type& l) { - return symbol_type (token::HSIFDEF, l); + return symbol_type (token::SH_IFDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFNDEF (location_type l) + make_SH_IFNDEF (location_type l) { - return symbol_type (token::HSIFNDEF, std::move (l)); + return symbol_type (token::SH_IFNDEF, std::move (l)); } #else static symbol_type - make_HSIFNDEF (const location_type& l) + make_SH_IFNDEF (const location_type& l) { - return symbol_type (token::HSIFNDEF, l); + return symbol_type (token::SH_IFNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIF (location_type l) + make_SH_IF (location_type l) { - return symbol_type (token::HSIF, std::move (l)); + return symbol_type (token::SH_IF, std::move (l)); } #else static symbol_type - make_HSIF (const location_type& l) + make_SH_IF (const location_type& l) { - return symbol_type (token::HSIF, l); + return symbol_type (token::SH_IF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELIF (location_type l) + make_SH_ELIF (location_type l) { - return symbol_type (token::HSELIF, std::move (l)); + return symbol_type (token::SH_ELIF, std::move (l)); } #else static symbol_type - make_HSELIF (const location_type& l) + make_SH_ELIF (const location_type& l) { - return symbol_type (token::HSELIF, l); + return symbol_type (token::SH_ELIF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELSE (location_type l) + make_SH_ELSE (location_type l) { - return symbol_type (token::HSELSE, std::move (l)); + return symbol_type (token::SH_ELSE, std::move (l)); } #else static symbol_type - make_HSELSE (const location_type& l) + make_SH_ELSE (const location_type& l) { - return symbol_type (token::HSELSE, l); + return symbol_type (token::SH_ELSE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSENDIF (location_type l) + make_SH_ENDIF (location_type l) { - return symbol_type (token::HSENDIF, std::move (l)); + return symbol_type (token::SH_ENDIF, std::move (l)); } #else static symbol_type - make_HSENDIF (const location_type& l) + make_SH_ENDIF (const location_type& l) { - return symbol_type (token::HSENDIF, l); + return symbol_type (token::SH_ENDIF, l); } #endif #if 201103L <= YY_CPLUSPLUS diff --git a/src/s4/xsk/lexer.cpp b/src/s4/xsk/lexer.cpp index 1db763b4..3aa04f6a 100644 --- a/src/s4/xsk/lexer.cpp +++ b/src/s4/xsk/lexer.cpp @@ -16,6 +16,58 @@ xsk::gsc::s4::parser::symbol_type S4lex(xsk::gsc::s4::lexer& lexer) namespace xsk::gsc::s4 { +const std::unordered_map keyword_map +{{ + { "#define", parser::token::SH_DEFINE }, + { "#undef", parser::token::SH_UNDEF }, + { "#ifdef", parser::token::SH_IFDEF }, + { "#ifndef", parser::token::SH_IFNDEF }, + { "#if", parser::token::SH_IF }, + { "#elif", parser::token::SH_ELIF }, + { "#else", parser::token::SH_ELSE }, + { "#endif", parser::token::SH_ENDIF }, + { "#inline", parser::token::INLINE }, + { "#include", parser::token::INCLUDE }, + { "#using_animtree", parser::token::USINGTREE }, + { "#animtree", parser::token::ANIMTREE }, + { "endon", parser::token::ENDON }, + { "notify", parser::token::NOTIFY }, + { "wait", parser::token::WAIT }, + { "waittill", parser::token::WAITTILL }, + { "waittillmatch", parser::token::WAITTILLMATCH }, + { "waittillframeend", parser::token::WAITTILLFRAMEEND }, + { "waitframe", parser::token::WAITFRAME }, + { "if", parser::token::IF }, + { "else", parser::token::ELSE }, + { "do", parser::token::DO }, + { "while", parser::token::WHILE }, + { "for", parser::token::FOR }, + { "foreach", parser::token::FOREACH }, + { "in", parser::token::IN }, + { "switch", parser::token::SWITCH }, + { "case", parser::token::CASE }, + { "default", parser::token::DEFAULT }, + { "break", parser::token::BREAK }, + { "continue", parser::token::CONTINUE }, + { "return", parser::token::RETURN }, + { "breakpoint", parser::token::BREAKPOINT }, + { "prof_begin", parser::token::PROFBEGIN }, + { "prof_end", parser::token::PROFEND }, + { "thread", parser::token::THREAD }, + { "childthread", parser::token::CHILDTHREAD }, + { "thisthread", parser::token::THISTHREAD }, + { "call", parser::token::CALL }, + { "true", parser::token::TRUE }, + { "false", parser::token::FALSE }, + { "undefined", parser::token::UNDEFINED }, + { "game", parser::token::GAME }, + { "self", parser::token::SELF }, + { "anim", parser::token::ANIM }, + { "level", parser::token::LEVEL }, + { "isdefined", parser::token::ISDEFINED }, + { "istrue", parser::token::ISTRUE }, +}}; + buffer::buffer() : length(0) { data = static_cast(std::malloc(max_buf_size)); @@ -35,10 +87,8 @@ bool buffer::push(char c) return true; } -reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), last_byte(0), current_byte(0) -{ - -} +reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0), + last_byte(0), current_byte(0) { } void reader::init(const char* data, size_t size) { @@ -78,7 +128,7 @@ void reader::advance() } } -lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), loc_(location(&name)), +lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)), mode_(build::dev), header_top_(0), locs_(std::stack()), readers_(std::stack()) { reader_.init(data, size); @@ -97,6 +147,7 @@ void lexer::push_header(const std::string& file) locs_.push(loc_); loc_.initialize(std::get<0>(data)); reader_.init(std::get<1>(data), std::get<2>(data)); + clean_ = true; } catch (const std::exception& e) { @@ -145,7 +196,10 @@ auto lexer::lex() -> parser::symbol_type return parser::make_S4EOF(loc_); } - reader_.advance(); + if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n') + clean_ = false; + + advance(); switch (last) { @@ -157,12 +211,15 @@ auto lexer::lex() -> parser::symbol_type case '\n': loc_.lines(); loc_.step(); + clean_ = true; continue; + case '\\': + throw comp_error(loc_, "invalid token ('\\')"); case '/': if (curr != '/' && curr != '*' && curr != '#' && curr != '=') return parser::make_DIV(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_DIV(loc_); @@ -224,7 +281,30 @@ auto lexer::lex() -> parser::symbol_type { while (true) { - if (state == reader::end || curr == '\n') + if (state == reader::end) + break; + + if (last == '\\' && curr == '\r' || curr == '\n') + { + reader_.advance(); + + if (state == reader::end) + break; + + if (last == '\r') + { + if (curr != '\n') + throw comp_error(loc_, "invalid token ('\')"); + + reader_.advance(); + } + + loc_.lines(); + loc_.step(); + continue; + } + + if (curr == '\n') break; reader_.advance(); @@ -238,15 +318,23 @@ auto lexer::lex() -> parser::symbol_type throw comp_error(loc_, "unmatched devblock end ('#/')"); indev_ = false; - reader_.advance(); + advance(); return parser::make_DEVEND(loc_); } buffer_.push(last); - reader_.advance(); + advance(); + + while (state == reader::ok) + { + if (last != ' ' || last != '\t') + break; + + advance(); + } if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123))) - throw comp_error(loc_, "unterminated preprocessor directive ('#')"); + throw comp_error(loc_, "invalid preprocessor directive ('#')"); state_ = state::preprocessor; goto lex_name; @@ -254,7 +342,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '/' && curr != '=') return parser::make_MUL(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_ASSIGN_MUL(loc_); @@ -264,9 +352,9 @@ auto lexer::lex() -> parser::symbol_type state_ = state::string; goto lex_string; case '.': - reader_.advance(); + advance(); - if(state == reader::end) + if (state == reader::end) throw comp_error(loc_, "unterminated field ('.')"); state_ = state::field; @@ -291,7 +379,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != ':') return parser::make_COLON(loc_); - reader_.advance(); + advance(); return parser::make_DOUBLECOLON(loc_); case '?': return parser::make_QMARK(loc_); @@ -299,13 +387,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_ASSIGN(loc_); - reader_.advance(); + advance(); return parser::make_EQUALITY(loc_); case '+': if (curr != '+' && curr != '=') return parser::make_ADD(loc_); - reader_.advance(); + advance(); if (last == '+') return parser::make_INCREMENT(loc_); @@ -315,7 +403,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '-' && curr != '=') return parser::make_SUB(loc_); - reader_.advance(); + advance(); if (last == '-') return parser::make_DECREMENT(loc_); @@ -325,24 +413,24 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_MOD(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_MOD(loc_); case '|': if (curr != '|' && curr != '=') return parser::make_BITWISE_OR(loc_); - reader_.advance(); + advance(); if (last == '|') return parser::make_OR(loc_); return parser::make_ASSIGN_BW_OR(loc_); case '&': - if (curr != '&' && curr != '=' && curr != '"' && curr != '\'') + if (curr != '&' && curr != '=' && curr != '"') return parser::make_BITWISE_AND(loc_); - reader_.advance(); + advance(); if (last == '&') return parser::make_AND(loc_); @@ -356,13 +444,13 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_BITWISE_EXOR(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_BW_EXOR(loc_); case '!': if (curr != '=') return parser::make_NOT(loc_); - reader_.advance(); + advance(); return parser::make_INEQUALITY(loc_); case '~': return parser::make_COMPLEMENT(loc_); @@ -370,20 +458,20 @@ auto lexer::lex() -> parser::symbol_type if (curr != '<' && curr != '=') return parser::make_LESS(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_LESS_EQUAL(loc_); if (curr != '=') return parser::make_LSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_LSHIFT(loc_); case '>': if (curr != '>' && curr != '=') return parser::make_GREATER(loc_); - reader_.advance(); + advance(); if (last == '=') return parser::make_GREATER_EQUAL(loc_); @@ -391,7 +479,7 @@ auto lexer::lex() -> parser::symbol_type if (curr != '=') return parser::make_RSHIFT(loc_); - reader_.advance(); + advance(); return parser::make_ASSIGN_RSHIFT(loc_); default: lex_name_or_number: @@ -404,24 +492,27 @@ lex_name_or_number: } lex_string: - if (state == reader::end) - throw comp_error(loc_, "unmatched string start ('\"')"); - - reader_.advance(); - while (true) { - if (last == '"') - break; - - if (last == '\n') - throw comp_error(loc_, "unterminated string literal"); - if (state == reader::end) throw comp_error(loc_, "unmatched string start ('\"')"); - if (last == '\\') + if (curr == '"') { + advance(); + break; + } + + if (curr == '\n') + throw comp_error(loc_, "unterminated string literal"); + + if (curr == '\\') + { + advance(); + + if (state == reader::end) + throw comp_error(loc_, "invalid token ('\')"); + char c = curr; switch (curr) { @@ -435,13 +526,11 @@ lex_string: if (!buffer_.push(c)) throw comp_error(loc_, "max string size exceeded"); - - reader_.advance(); } - else if (!buffer_.push(last)) + else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if (state_ == state::localize) @@ -472,7 +561,7 @@ lex_name: else if (!buffer_.push(curr)) throw comp_error(loc_, "max string size exceeded"); - reader_.advance(); + advance(); } if(state_ == state::field) @@ -491,24 +580,23 @@ lex_name: { if (path) throw comp_error(loc_, "invalid preprocessor directive"); - + auto token = parser::token::S4UNDEF; if (buffer_.length < 16) { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) { - if (itr->second > parser::token::HSENDIF) + if (itr->second > parser::token::SH_ENDIF) return parser::symbol_type(itr->second, loc_); - + token = itr->second; } } - // TODO: call preprocessor(token); - throw comp_error(loc_, "unknown preprocessor directive"); + preprocessor(token); state_ = state::start; continue; } @@ -529,7 +617,7 @@ lex_name: { const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length)); - if(itr != keyword_map.end()) + if (itr != keyword_map.end()) return parser::symbol_type(itr->second, loc_); } @@ -568,7 +656,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -582,7 +670,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'') @@ -598,7 +686,7 @@ lex_number: } else if (curr == 'o') { - reader_.advance(); + advance(); while (true) { @@ -610,7 +698,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -620,7 +708,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length <= 0) @@ -632,7 +720,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -644,7 +732,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -654,7 +742,7 @@ lex_number: if (!buffer_.push(curr)) throw comp_error(loc_, "number literal size exceeded"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -666,7 +754,7 @@ lex_number: { buffer_.push(last); buffer_.push(curr); - reader_.advance(); + advance(); while (true) { @@ -678,7 +766,7 @@ lex_number: if (curr == '\'') { - reader_.advance(); + advance(); continue; } @@ -688,7 +776,7 @@ lex_number: if (!buffer_.push(curr)) throw error("gsc lexer: out of memory!"); - reader_.advance(); + advance(); } if (last == '\'' || buffer_.length < 3) @@ -700,56 +788,89 @@ lex_number: } } -const std::unordered_map lexer::keyword_map -{{ - { "#define", parser::token::HSDEFINE }, - { "#undef", parser::token::HSUNDEF }, - { "#ifdef", parser::token::HSIFDEF }, - { "#ifndef", parser::token::HSIFNDEF }, - { "#if", parser::token::HSIF }, - { "#elif", parser::token::HSELIF }, - { "#else", parser::token::HSELSE }, - { "#endif", parser::token::HSENDIF }, - { "#inline", parser::token::INLINE }, - { "#include", parser::token::INCLUDE }, - { "#using_animtree", parser::token::USINGTREE }, - { "#animtree", parser::token::ANIMTREE }, - { "endon", parser::token::ENDON }, - { "notify", parser::token::NOTIFY }, - { "wait", parser::token::WAIT }, - { "waittill", parser::token::WAITTILL }, - { "waittillmatch", parser::token::WAITTILLMATCH }, - { "waittillframeend", parser::token::WAITTILLFRAMEEND }, - { "waitframe", parser::token::WAITFRAME }, - { "if", parser::token::IF }, - { "else", parser::token::ELSE }, - { "do", parser::token::DO }, - { "while", parser::token::WHILE }, - { "for", parser::token::FOR }, - { "foreach", parser::token::FOREACH }, - { "in", parser::token::IN }, - { "switch", parser::token::SWITCH }, - { "case", parser::token::CASE }, - { "default", parser::token::DEFAULT }, - { "break", parser::token::BREAK }, - { "continue", parser::token::CONTINUE }, - { "return", parser::token::RETURN }, - { "breakpoint", parser::token::BREAKPOINT }, - { "prof_begin", parser::token::PROFBEGIN }, - { "prof_end", parser::token::PROFEND }, - { "thread", parser::token::THREAD }, - { "childthread", parser::token::CHILDTHREAD }, - { "thisthread", parser::token::THISTHREAD }, - { "call", parser::token::CALL }, - { "true", parser::token::TRUE }, - { "false", parser::token::FALSE }, - { "undefined", parser::token::UNDEFINED }, - { "game", parser::token::GAME }, - { "self", parser::token::SELF }, - { "anim", parser::token::ANIM }, - { "level", parser::token::LEVEL }, - { "isdefined", parser::token::ISDEFINED }, - { "istrue", parser::token::ISTRUE }, -}}; +void lexer::advance() +{ + reader_.advance(); + + // dont wrap comment marks '/\/' '/\*' outside strings + if (state_ == state::start && reader_.last_byte == '/') + return; + + while (reader_.current_byte == '\\') + { + if (reader_.bytes_remaining == 1) + throw comp_error(loc_, "invalid token ('\\')"); + + if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n') + break; + + if (reader_.buffer_pos[1] == '\r') + { + if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n') + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 3; + reader_.bytes_remaining -= 3; + } + + if ((reader_.buffer_pos[1] == '\n')) + { + if (reader_.bytes_remaining == 2) + throw comp_error(loc_, "invalid token ('\\')"); + + reader_.buffer_pos += 2; + reader_.bytes_remaining -= 2; + } + + if (reader_.bytes_remaining == 0) + { + reader_.state = reader::end; + reader_.current_byte = 0; + } + else + { + reader_.current_byte = *reader_.buffer_pos; + } + + loc_.lines(); + loc_.step(); + } +} + +void lexer::preprocessor(parser::token::token_kind_type token) +{ + if (!clean_) + throw comp_error(loc_, "invalid token ('#')"); + + switch (token) + { + case parser::token::SH_DEFINE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_UNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IFNDEF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_IF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ELSE: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + case parser::token::SH_ENDIF: + throw comp_error(loc_, "unimplemented preprocessor directive"); + break; + default: + throw comp_error(loc_, "unknown preprocessor directive"); + } +} } // namespace xsk::gsc::s4 diff --git a/src/s4/xsk/lexer.hpp b/src/s4/xsk/lexer.hpp index 7e129269..12b33453 100644 --- a/src/s4/xsk/lexer.hpp +++ b/src/s4/xsk/lexer.hpp @@ -5,8 +5,6 @@ #pragma once -#include "s4.hpp" - namespace xsk::gsc::s4 { @@ -57,6 +55,7 @@ class lexer state state_; build mode_; bool indev_; + bool clean_; public: lexer(const std::string& name, const char* data, size_t size); @@ -66,7 +65,8 @@ public: void ban_header(const location& loc); private: - static const std::unordered_map keyword_map; + void advance(); + void preprocessor(parser::token::token_kind_type token); }; } // namespace xsk::gsc::s4 diff --git a/src/s4/xsk/parser.hpp b/src/s4/xsk/parser.hpp index 6f7717d9..e0bb444b 100644 --- a/src/s4/xsk/parser.hpp +++ b/src/s4/xsk/parser.hpp @@ -683,14 +683,14 @@ namespace xsk { namespace gsc { namespace s4 { S4EOF = 0, // "end of file" S4error = 1, // error S4UNDEF = 2, // "invalid token" - HSDEFINE = 3, // "#define" - HSUNDEF = 4, // "#undef" - HSIFDEF = 5, // "#ifdef" - HSIFNDEF = 6, // "#ifndef" - HSIF = 7, // "#if" - HSELIF = 8, // "#elif" - HSELSE = 9, // "#else" - HSENDIF = 10, // "#endif" + SH_DEFINE = 3, // "#define" + SH_UNDEF = 4, // "#undef" + SH_IFDEF = 5, // "#ifdef" + SH_IFNDEF = 6, // "#ifndef" + SH_IF = 7, // "#if" + SH_ELIF = 8, // "#elif" + SH_ELSE = 9, // "#else" + SH_ENDIF = 10, // "#endif" DEVBEGIN = 11, // "/#" DEVEND = 12, // "#/" INLINE = 13, // "#inline" @@ -817,14 +817,14 @@ namespace xsk { namespace gsc { namespace s4 { S_YYEOF = 0, // "end of file" S_YYerror = 1, // error S_YYUNDEF = 2, // "invalid token" - S_HSDEFINE = 3, // "#define" - S_HSUNDEF = 4, // "#undef" - S_HSIFDEF = 5, // "#ifdef" - S_HSIFNDEF = 6, // "#ifndef" - S_HSIF = 7, // "#if" - S_HSELIF = 8, // "#elif" - S_HSELSE = 9, // "#else" - S_HSENDIF = 10, // "#endif" + S_SH_DEFINE = 3, // "#define" + S_SH_UNDEF = 4, // "#undef" + S_SH_IFDEF = 5, // "#ifdef" + S_SH_IFNDEF = 6, // "#ifndef" + S_SH_IF = 7, // "#if" + S_SH_ELIF = 8, // "#elif" + S_SH_ELSE = 9, // "#else" + S_SH_ENDIF = 10, // "#endif" S_DEVBEGIN = 11, // "/#" S_DEVEND = 12, // "#/" S_INLINE = 13, // "#inline" @@ -2854,121 +2854,121 @@ switch (yykind) #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSDEFINE (location_type l) + make_SH_DEFINE (location_type l) { - return symbol_type (token::HSDEFINE, std::move (l)); + return symbol_type (token::SH_DEFINE, std::move (l)); } #else static symbol_type - make_HSDEFINE (const location_type& l) + make_SH_DEFINE (const location_type& l) { - return symbol_type (token::HSDEFINE, l); + return symbol_type (token::SH_DEFINE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSUNDEF (location_type l) + make_SH_UNDEF (location_type l) { - return symbol_type (token::HSUNDEF, std::move (l)); + return symbol_type (token::SH_UNDEF, std::move (l)); } #else static symbol_type - make_HSUNDEF (const location_type& l) + make_SH_UNDEF (const location_type& l) { - return symbol_type (token::HSUNDEF, l); + return symbol_type (token::SH_UNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFDEF (location_type l) + make_SH_IFDEF (location_type l) { - return symbol_type (token::HSIFDEF, std::move (l)); + return symbol_type (token::SH_IFDEF, std::move (l)); } #else static symbol_type - make_HSIFDEF (const location_type& l) + make_SH_IFDEF (const location_type& l) { - return symbol_type (token::HSIFDEF, l); + return symbol_type (token::SH_IFDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIFNDEF (location_type l) + make_SH_IFNDEF (location_type l) { - return symbol_type (token::HSIFNDEF, std::move (l)); + return symbol_type (token::SH_IFNDEF, std::move (l)); } #else static symbol_type - make_HSIFNDEF (const location_type& l) + make_SH_IFNDEF (const location_type& l) { - return symbol_type (token::HSIFNDEF, l); + return symbol_type (token::SH_IFNDEF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSIF (location_type l) + make_SH_IF (location_type l) { - return symbol_type (token::HSIF, std::move (l)); + return symbol_type (token::SH_IF, std::move (l)); } #else static symbol_type - make_HSIF (const location_type& l) + make_SH_IF (const location_type& l) { - return symbol_type (token::HSIF, l); + return symbol_type (token::SH_IF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELIF (location_type l) + make_SH_ELIF (location_type l) { - return symbol_type (token::HSELIF, std::move (l)); + return symbol_type (token::SH_ELIF, std::move (l)); } #else static symbol_type - make_HSELIF (const location_type& l) + make_SH_ELIF (const location_type& l) { - return symbol_type (token::HSELIF, l); + return symbol_type (token::SH_ELIF, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSELSE (location_type l) + make_SH_ELSE (location_type l) { - return symbol_type (token::HSELSE, std::move (l)); + return symbol_type (token::SH_ELSE, std::move (l)); } #else static symbol_type - make_HSELSE (const location_type& l) + make_SH_ELSE (const location_type& l) { - return symbol_type (token::HSELSE, l); + return symbol_type (token::SH_ELSE, l); } #endif #if 201103L <= YY_CPLUSPLUS static symbol_type - make_HSENDIF (location_type l) + make_SH_ENDIF (location_type l) { - return symbol_type (token::HSENDIF, std::move (l)); + return symbol_type (token::SH_ENDIF, std::move (l)); } #else static symbol_type - make_HSENDIF (const location_type& l) + make_SH_ENDIF (const location_type& l) { - return symbol_type (token::HSENDIF, l); + return symbol_type (token::SH_ENDIF, l); } #endif #if 201103L <= YY_CPLUSPLUS