gsc-tool/src/s4/xsk/lexer.cpp

877 lines
26 KiB
C++
Raw Normal View History

2022-01-26 12:08:28 +01:00
// Copyright 2022 xensik. All rights reserved.
//
// Use of this source code is governed by a GNU GPLv3 license
// that can be found in the LICENSE file.
#include "stdafx.hpp"
#include "s4.hpp"
#include "parser.hpp"
#include "lexer.hpp"
xsk::gsc::s4::parser::symbol_type S4lex(xsk::gsc::s4::lexer& lexer)
{
return lexer.lex();
}
namespace xsk::gsc::s4
{
2022-02-04 15:39:47 +01:00
const std::unordered_map<std::string_view, parser::token::token_kind_type> keyword_map
{{
{ "#define", parser::token::SH_DEFINE },
{ "#undef", parser::token::SH_UNDEF },
{ "#ifdef", parser::token::SH_IFDEF },
{ "#ifndef", parser::token::SH_IFNDEF },
{ "#if", parser::token::SH_IF },
{ "#elif", parser::token::SH_ELIF },
{ "#else", parser::token::SH_ELSE },
{ "#endif", parser::token::SH_ENDIF },
{ "#inline", parser::token::INLINE },
{ "#include", parser::token::INCLUDE },
{ "#using_animtree", parser::token::USINGTREE },
{ "#animtree", parser::token::ANIMTREE },
{ "endon", parser::token::ENDON },
{ "notify", parser::token::NOTIFY },
{ "wait", parser::token::WAIT },
{ "waittill", parser::token::WAITTILL },
{ "waittillmatch", parser::token::WAITTILLMATCH },
{ "waittillframeend", parser::token::WAITTILLFRAMEEND },
{ "waitframe", parser::token::WAITFRAME },
{ "if", parser::token::IF },
{ "else", parser::token::ELSE },
{ "do", parser::token::DO },
{ "while", parser::token::WHILE },
{ "for", parser::token::FOR },
{ "foreach", parser::token::FOREACH },
{ "in", parser::token::IN },
{ "switch", parser::token::SWITCH },
{ "case", parser::token::CASE },
{ "default", parser::token::DEFAULT },
{ "break", parser::token::BREAK },
{ "continue", parser::token::CONTINUE },
{ "return", parser::token::RETURN },
{ "breakpoint", parser::token::BREAKPOINT },
{ "prof_begin", parser::token::PROFBEGIN },
{ "prof_end", parser::token::PROFEND },
{ "thread", parser::token::THREAD },
{ "childthread", parser::token::CHILDTHREAD },
{ "thisthread", parser::token::THISTHREAD },
{ "call", parser::token::CALL },
{ "true", parser::token::TRUE },
{ "false", parser::token::FALSE },
{ "undefined", parser::token::UNDEFINED },
{ "game", parser::token::GAME },
{ "self", parser::token::SELF },
{ "anim", parser::token::ANIM },
{ "level", parser::token::LEVEL },
{ "isdefined", parser::token::ISDEFINED },
{ "istrue", parser::token::ISTRUE },
}};
2022-01-28 15:58:41 +01:00
buffer::buffer() : length(0)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
data = static_cast<char*>(std::malloc(max_buf_size));
2022-01-26 12:08:28 +01:00
}
buffer::~buffer()
{
if(data) std::free(data);
}
bool buffer::push(char c)
{
2022-01-28 15:58:41 +01:00
if(length >= max_buf_size)
return false;
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
data[length++] = c;
return true;
}
2021-09-13 17:50:36 +02:00
2022-02-04 15:39:47 +01:00
reader::reader() : state(reader::end), buffer_pos(0), bytes_remaining(0),
last_byte(0), current_byte(0) { }
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
void reader::init(const char* data, size_t size)
2021-09-13 17:50:36 +02:00
{
2022-01-26 12:08:28 +01:00
if(data && size)
{
state = reader::ok;
buffer_pos = data;
bytes_remaining = size;
last_byte = 0;
current_byte = *data;
}
else
{
state = reader::end;
buffer_pos = 0;
bytes_remaining = 0;
last_byte = 0;
current_byte = 0;
}
2021-09-13 17:50:36 +02:00
}
2022-01-26 12:08:28 +01:00
void reader::advance()
2021-09-13 17:50:36 +02:00
{
2022-01-26 12:08:28 +01:00
++buffer_pos;
if(bytes_remaining-- == 1)
{
state = reader::end;
bytes_remaining = 0;
last_byte = current_byte;
current_byte = 0;
}
else
{
last_byte = current_byte;
current_byte = *buffer_pos;
}
2021-09-13 17:50:36 +02:00
}
2022-02-04 15:39:47 +01:00
lexer::lexer(const std::string& name, const char* data, size_t size) : indev_(false), clean_(true), loc_(location(&name)),
2022-01-26 12:08:28 +01:00
mode_(build::dev), header_top_(0), locs_(std::stack<location>()), readers_(std::stack<reader>())
2021-09-13 17:50:36 +02:00
{
2022-01-26 12:08:28 +01:00
reader_.init(data, size);
2021-09-13 17:50:36 +02:00
}
2022-01-26 12:08:28 +01:00
void lexer::push_header(const std::string& file)
{
try
{
if (header_top_++ >= 10)
2022-01-30 14:45:07 +01:00
throw comp_error(loc_, "maximum gsh depth exceeded '10'");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
auto data = resolver::file_data(file + ".gsh");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
readers_.push(reader_);
locs_.push(loc_);
loc_.initialize(std::get<0>(data));
reader_.init(std::get<1>(data), std::get<2>(data));
2022-02-04 15:39:47 +01:00
clean_ = true;
2022-01-26 12:08:28 +01:00
}
catch (const std::exception& e)
{
2022-01-30 14:45:07 +01:00
throw error("parsing header file '" + file + "': " + e.what());
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
}
2022-01-26 12:08:28 +01:00
void lexer::pop_header()
{
header_top_--;
loc_ = locs_.top();
locs_.pop();
reader_ = readers_.top();
readers_.pop();
2021-09-13 17:50:36 +02:00
}
2022-01-30 14:45:07 +01:00
void lexer::ban_header(const location& loc)
2021-09-13 17:50:36 +02:00
{
2022-01-26 12:08:28 +01:00
if (header_top_ > 0)
{
throw comp_error(loc, "not allowed inside a gsh file");
}
2021-09-13 17:50:36 +02:00
}
2022-01-30 14:45:07 +01:00
auto lexer::lex() -> parser::symbol_type
2021-09-13 17:50:36 +02:00
{
2022-01-26 12:08:28 +01:00
buffer_.length = 0;
2022-01-28 15:58:41 +01:00
state_ = state::start;
2022-01-26 12:08:28 +01:00
loc_.step();
while (true)
{
2022-01-28 15:58:41 +01:00
const auto& state = reader_.state;
auto& last = reader_.last_byte;
auto& curr = reader_.current_byte;
auto path = false;
if (state == reader::end)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
if (header_top_ > 0)
2022-01-26 12:08:28 +01:00
pop_header();
else
2022-01-30 14:45:07 +01:00
return parser::make_S4EOF(loc_);
2022-01-26 12:08:28 +01:00
}
2022-02-04 15:39:47 +01:00
if (clean_ && last != 0 && last != ' ' && last != '\t' && last != '\n')
clean_ = false;
advance();
2022-01-26 12:08:28 +01:00
switch (last)
{
case ' ':
case '\t':
case '\r':
loc_.step();
continue;
case '\n':
loc_.lines();
loc_.step();
2022-02-04 15:39:47 +01:00
clean_ = true;
2022-01-26 12:08:28 +01:00
continue;
2022-02-04 15:39:47 +01:00
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
2022-01-26 12:08:28 +01:00
case '/':
2022-01-28 15:58:41 +01:00
if (curr != '/' && curr != '*' && curr != '#' && curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_DIV(loc_);
2022-01-26 12:08:28 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
if (last == '=')
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_DIV(loc_);
2022-01-26 12:08:28 +01:00
if (last == '#')
{
2022-01-28 15:58:41 +01:00
if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
2022-01-30 14:45:07 +01:00
if (mode_ == build::dev)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
indev_ = true;
2022-01-30 14:45:07 +01:00
return parser::make_DEVBEGIN(loc_);
2022-01-26 12:08:28 +01:00
}
else
{
while (true)
{
2022-01-28 15:58:41 +01:00
if (state == reader::end)
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (curr == '\n')
2022-01-26 12:08:28 +01:00
{
loc_.lines();
loc_.step();
}
2022-01-28 15:58:41 +01:00
else if (last == '#' && curr == '/')
2022-01-26 12:08:28 +01:00
{
reader_.advance();
break;
}
reader_.advance();
}
}
}
else if (last == '*')
{
while (true)
{
2022-01-28 15:58:41 +01:00
if (state == reader::end)
throw comp_error(loc_, "unmatched multiline comment start ('/*')");
if (curr == '\n')
2022-01-26 12:08:28 +01:00
{
loc_.lines();
loc_.step();
}
2022-01-28 15:58:41 +01:00
else if (last == '*' && curr == '/')
2022-01-26 12:08:28 +01:00
{
reader_.advance();
break;
}
reader_.advance();
}
}
else if (last == '/')
{
while (true)
{
2022-02-04 15:39:47 +01:00
if (state == reader::end)
break;
2022-02-10 13:07:05 +01:00
if (last == '\\' && (curr == '\r' || curr == '\n'))
2022-02-04 15:39:47 +01:00
{
reader_.advance();
if (state == reader::end)
break;
if (last == '\r')
{
if (curr != '\n')
throw comp_error(loc_, "invalid token ('\')");
reader_.advance();
}
loc_.lines();
loc_.step();
continue;
}
if (curr == '\n')
2022-01-26 12:08:28 +01:00
break;
reader_.advance();
}
}
continue;
case '#':
if (curr == '/')
{
2022-01-28 15:58:41 +01:00
if (!indev_)
throw comp_error(loc_, "unmatched devblock end ('#/')");
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
indev_ = false;
2022-02-04 15:39:47 +01:00
advance();
2022-01-30 14:45:07 +01:00
return parser::make_DEVEND(loc_);
2022-01-26 12:08:28 +01:00
}
buffer_.push(last);
2022-02-04 15:39:47 +01:00
advance();
while (state == reader::ok)
{
if (last != ' ' || last != '\t')
break;
advance();
}
2022-01-26 12:12:34 +01:00
2022-01-28 15:58:41 +01:00
if (state == reader::end || !((last > 64 && last < 91) || (last > 96 && last < 123)))
2022-02-04 15:39:47 +01:00
throw comp_error(loc_, "invalid preprocessor directive ('#')");
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
state_ = state::preprocessor;
goto lex_name;
2022-01-26 12:08:28 +01:00
case '*':
2022-01-28 15:58:41 +01:00
if (curr != '/' && curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_MUL(loc_);
2022-01-26 12:12:34 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
if (last == '=')
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_MUL(loc_);
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
2022-01-26 12:08:28 +01:00
case '"':
2022-01-28 15:58:41 +01:00
state_ = state::string;
goto lex_string;
2022-01-26 12:08:28 +01:00
case '.':
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
if (state == reader::end)
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "unterminated field ('.')");
state_ = state::field;
goto lex_name_or_number;
2022-01-26 12:08:28 +01:00
case '(':
2022-01-30 14:45:07 +01:00
return parser::make_LPAREN(loc_);
2022-01-26 12:08:28 +01:00
case ')':
2022-01-30 14:45:07 +01:00
return parser::make_RPAREN(loc_);
2022-01-26 12:08:28 +01:00
case '{':
2022-01-30 14:45:07 +01:00
return parser::make_LBRACE(loc_);
2022-01-26 12:08:28 +01:00
case '}':
2022-01-30 14:45:07 +01:00
return parser::make_RBRACE(loc_);
2022-01-26 12:08:28 +01:00
case '[':
2022-01-30 14:45:07 +01:00
return parser::make_LBRACKET(loc_);
2022-01-26 12:08:28 +01:00
case ']':
2022-01-30 14:45:07 +01:00
return parser::make_RBRACKET(loc_);
2022-01-26 12:08:28 +01:00
case ',':
2022-01-30 14:45:07 +01:00
return parser::make_COMMA(loc_);
2022-01-26 12:08:28 +01:00
case ';':
2022-01-30 14:45:07 +01:00
return parser::make_SEMICOLON(loc_);
2022-01-26 12:08:28 +01:00
case ':':
if (curr != ':')
2022-01-30 14:45:07 +01:00
return parser::make_COLON(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-30 14:45:07 +01:00
return parser::make_DOUBLECOLON(loc_);
2022-01-26 12:08:28 +01:00
case '?':
2022-01-30 14:45:07 +01:00
return parser::make_QMARK(loc_);
2022-01-26 12:08:28 +01:00
case '=':
if (curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-30 14:45:07 +01:00
return parser::make_EQUALITY(loc_);
2022-01-26 12:08:28 +01:00
case '+':
if (curr != '+' && curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_ADD(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '+')
2022-01-30 14:45:07 +01:00
return parser::make_INCREMENT(loc_);
2022-01-28 15:58:41 +01:00
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_ADD(loc_);
2022-01-26 12:08:28 +01:00
case '-':
if (curr != '-' && curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_SUB(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '-')
2022-01-30 14:45:07 +01:00
return parser::make_DECREMENT(loc_);
2022-01-28 15:58:41 +01:00
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_SUB(loc_);
2022-01-26 12:08:28 +01:00
case '%':
if (curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_MOD(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_MOD(loc_);
2022-01-26 12:08:28 +01:00
case '|':
if (curr != '|' && curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_BITWISE_OR(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '|')
2022-01-30 14:45:07 +01:00
return parser::make_OR(loc_);
2022-01-28 15:58:41 +01:00
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_BW_OR(loc_);
2022-01-26 12:08:28 +01:00
case '&':
2022-02-04 15:39:47 +01:00
if (curr != '&' && curr != '=' && curr != '"')
2022-01-30 14:45:07 +01:00
return parser::make_BITWISE_AND(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '&')
2022-01-30 14:45:07 +01:00
return parser::make_AND(loc_);
2022-01-28 15:58:41 +01:00
if (last == '=')
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_BW_AND(loc_);
2022-01-28 15:58:41 +01:00
state_ = state::localize;
goto lex_string;
2022-01-26 12:08:28 +01:00
case '^':
if (curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_BITWISE_EXOR(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_BW_EXOR(loc_);
2022-01-26 12:08:28 +01:00
case '!':
if (curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_NOT(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-30 14:45:07 +01:00
return parser::make_INEQUALITY(loc_);
2022-01-26 12:08:28 +01:00
case '~':
2022-01-30 14:45:07 +01:00
return parser::make_COMPLEMENT(loc_);
2022-01-26 12:08:28 +01:00
case '<':
if (curr != '<' && curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_LESS(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '=')
2022-01-30 14:45:07 +01:00
return parser::make_LESS_EQUAL(loc_);
2022-01-28 15:58:41 +01:00
if (curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_LSHIFT(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_LSHIFT(loc_);
2022-01-26 12:08:28 +01:00
case '>':
if (curr != '>' && curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_GREATER(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '=')
2022-01-30 14:45:07 +01:00
return parser::make_GREATER_EQUAL(loc_);
2022-01-28 15:58:41 +01:00
if (curr != '=')
2022-01-30 14:45:07 +01:00
return parser::make_RSHIFT(loc_);
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-30 14:45:07 +01:00
return parser::make_ASSIGN_RSHIFT(loc_);
2022-01-26 12:08:28 +01:00
default:
2022-01-28 15:58:41 +01:00
lex_name_or_number:
2022-01-26 12:08:28 +01:00
if (last >= '0' && last <= '9')
2022-01-28 15:58:41 +01:00
goto lex_number;
2022-01-26 12:08:28 +01:00
else if (last == '_' || last >= 'A' && last <= 'Z' || last >= 'a' && last <= 'z')
2022-01-28 15:58:41 +01:00
goto lex_name;
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, utils::string::va("bad token: \'%c\'", last));
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
lex_string:
while (true)
{
2022-02-04 15:39:47 +01:00
if (state == reader::end)
throw comp_error(loc_, "unmatched string start ('\"')");
if (curr == '"')
{
advance();
2022-01-28 15:58:41 +01:00
break;
2022-02-04 15:39:47 +01:00
}
2022-01-26 12:08:28 +01:00
2022-02-04 15:39:47 +01:00
if (curr == '\n')
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "unterminated string literal");
2022-01-26 12:08:28 +01:00
2022-02-04 15:39:47 +01:00
if (curr == '\\')
2022-01-28 15:58:41 +01:00
{
2022-02-04 15:39:47 +01:00
advance();
if (state == reader::end)
throw comp_error(loc_, "invalid token ('\')");
2022-01-28 15:58:41 +01:00
char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (!buffer_.push(c))
throw comp_error(loc_, "max string size exceeded");
}
2022-02-04 15:39:47 +01:00
else if (!buffer_.push(curr))
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "max string size exceeded");
2022-01-26 12:08:28 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (state_ == state::localize)
2022-01-30 14:45:07 +01:00
return parser::make_ISTRING(std::string(buffer_.data, buffer_.length), loc_);
2021-09-13 17:50:36 +02:00
2022-01-30 14:45:07 +01:00
return parser::make_STRING(std::string(buffer_.data, buffer_.length), loc_);
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
lex_name:
buffer_.push(last);
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (state == reader::end)
break;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr == '\\')
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (last == '\\')
throw comp_error(loc_, "invalid path '\\\\'");
path = true;
if (!buffer_.push('/'))
throw comp_error(loc_, "max string size exceeded");
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
else if (!buffer_.push(curr))
throw comp_error(loc_, "max string size exceeded");
2021-09-13 17:50:36 +02:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if(state_ == state::field)
{
if (path)
throw comp_error(loc_, "invalid field token '\\'");
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
if (std::string_view(buffer_.data, buffer_.length) == "size")
{
2022-01-30 14:45:07 +01:00
return parser::make_SIZE(loc_);
2022-01-28 15:58:41 +01:00
}
2022-01-30 14:45:07 +01:00
return parser::make_FIELD(std::string(buffer_.data, buffer_.length), loc_);
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
else if (state_ == state::preprocessor)
{
if (path)
throw comp_error(loc_, "invalid preprocessor directive");
2022-02-04 15:39:47 +01:00
2022-01-30 14:45:07 +01:00
auto token = parser::token::S4UNDEF;
2021-09-13 17:50:36 +02:00
2022-01-30 14:45:07 +01:00
if (buffer_.length < 16)
{
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
2021-09-13 17:50:36 +02:00
2022-02-04 15:39:47 +01:00
if (itr != keyword_map.end())
2022-01-30 14:45:07 +01:00
{
2022-02-04 15:39:47 +01:00
if (itr->second > parser::token::SH_ENDIF)
2022-01-30 14:45:07 +01:00
return parser::symbol_type(itr->second, loc_);
2022-02-04 15:39:47 +01:00
2022-01-30 14:45:07 +01:00
token = itr->second;
}
}
2021-09-13 17:50:36 +02:00
2022-02-04 15:39:47 +01:00
preprocessor(token);
2022-01-28 15:58:41 +01:00
state_ = state::start;
continue;
}
else
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (buffer_.data[0] != '_')
{
for (auto i = 0; i < buffer_.length; i++)
{
auto c = buffer_.data[i];
if (c > 64 && c < 91)
buffer_.data[i] = c + 32;
}
}
2022-01-30 14:45:07 +01:00
if (buffer_.length < 17)
{
const auto& itr = keyword_map.find(std::string_view(buffer_.data, buffer_.length));
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
if (itr != keyword_map.end())
2022-01-30 14:45:07 +01:00
return parser::symbol_type(itr->second, loc_);
}
2022-01-28 15:58:41 +01:00
if (path)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (buffer_.data[buffer_.length - 1] == '/')
throw comp_error(loc_, "invalid path end '\\'");
2022-01-30 14:45:07 +01:00
return parser::make_PATH(resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
2022-01-30 14:45:07 +01:00
return parser::make_IDENTIFIER(resolver::make_token(std::string_view(buffer_.data, buffer_.length)), loc_);
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
lex_number:
if (state_ == state::field)
buffer_.push('.');
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (state_ == state::field || last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
buffer_.push(last);
auto dot = 0;
auto flt = 0;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (state == reader::end)
break;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw comp_error(loc_, "invalid number literal");
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if ((curr == '.' || curr == 'f') && last == '\'')
throw comp_error(loc_, "invalid number literal");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (curr == '\'')
{
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
continue;
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
2022-01-26 12:08:28 +01:00
break;
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (!buffer_.push(curr))
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "number literal size exceeded");
2021-09-13 17:50:36 +02:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (last == '\'')
throw comp_error(loc_, "invalid number literal");
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (state_ == state::field && dot || dot > 1 || flt > 1 || flt && buffer_.data[buffer_.length - 1] != 'f')
throw comp_error(loc_, "invalid number literal");
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (state_ == state::field || dot || flt)
2022-01-30 14:45:07 +01:00
return parser::make_FLOAT(std::string(buffer_.data, buffer_.length), loc_);
2022-01-28 15:58:41 +01:00
2022-01-30 14:45:07 +01:00
return parser::make_INTEGER(std::string(buffer_.data, buffer_.length), loc_);
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
else if (curr == 'o')
2022-01-26 12:08:28 +01:00
{
2022-02-04 15:39:47 +01:00
advance();
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (state == reader::end)
break;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr == '\'' && (last == '\'' || last == 'o') || (curr == 'o' && last == '\''))
throw comp_error(loc_, "invalid octal literal");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (curr == '\'')
{
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
continue;
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (!(curr > 47 && curr < 56))
2022-01-26 12:08:28 +01:00
break;
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
2021-09-13 17:50:36 +02:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (last == '\'' || buffer_.length <= 0)
throw comp_error(loc_, "invalid octal literal");
2021-09-13 17:50:36 +02:00
2022-01-30 14:45:07 +01:00
return parser::make_INTEGER(xsk::utils::string::oct_to_dec(buffer_.data), loc_);
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
else if (curr == 'b')
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
buffer_.push(last);
buffer_.push(curr);
2022-02-04 15:39:47 +01:00
advance();
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (state == reader::end)
break;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr == '\'' && (last == '\'' || last == 'b') || (curr == 'b' && last == '\''))
throw comp_error(loc_, "invalid binary literal");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (curr == '\'')
{
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
continue;
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr != '0' && curr != '1')
2022-01-26 12:08:28 +01:00
break;
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (!buffer_.push(curr))
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "number literal size exceeded");
2021-09-13 17:50:36 +02:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (last == '\'' || buffer_.length < 3)
throw comp_error(loc_, "invalid binary literal");
2021-09-13 17:50:36 +02:00
2022-01-30 14:45:07 +01:00
return parser::make_INTEGER(xsk::utils::string::bin_to_dec(buffer_.data), loc_);
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
else if (curr == 'x')
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
buffer_.push(last);
buffer_.push(curr);
2022-02-04 15:39:47 +01:00
advance();
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (state == reader::end)
break;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr == '\'' && (last == '\'' || last == 'x') || (curr == 'x' && last == '\''))
throw comp_error(loc_, "invalid hexadecimal literal");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (curr == '\'')
{
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
continue;
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (!((curr > 47 && curr < 58) || (curr > 64 && curr < 71) || (curr > 96 && curr < 103)))
2022-01-26 12:08:28 +01:00
break;
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (!buffer_.push(curr))
throw error("gsc lexer: out of memory!");
2021-09-13 17:50:36 +02:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (last == '\'' || buffer_.length < 3)
throw comp_error(loc_, "invalid hexadecimal literal");
2022-01-26 12:08:28 +01:00
2022-01-30 14:45:07 +01:00
return parser::make_INTEGER(xsk::utils::string::hex_to_dec(buffer_.data), loc_);
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
// cant get here!
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
}
2022-02-04 15:39:47 +01:00
void lexer::advance()
{
reader_.advance();
// dont wrap comment marks '/\/' '/\*' outside strings
if (state_ == state::start && reader_.last_byte == '/')
return;
while (reader_.current_byte == '\\')
{
if (reader_.bytes_remaining == 1)
throw comp_error(loc_, "invalid token ('\\')");
if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
break;
if (reader_.buffer_pos[1] == '\r')
{
if (reader_.bytes_remaining <= 3 || reader_.buffer_pos[2] != '\n')
throw comp_error(loc_, "invalid token ('\\')");
reader_.buffer_pos += 3;
reader_.bytes_remaining -= 3;
}
if ((reader_.buffer_pos[1] == '\n'))
{
if (reader_.bytes_remaining == 2)
throw comp_error(loc_, "invalid token ('\\')");
reader_.buffer_pos += 2;
reader_.bytes_remaining -= 2;
}
if (reader_.bytes_remaining == 0)
{
reader_.state = reader::end;
reader_.current_byte = 0;
}
else
{
reader_.current_byte = *reader_.buffer_pos;
}
loc_.lines();
loc_.step();
}
}
void lexer::preprocessor(parser::token::token_kind_type token)
{
if (!clean_)
throw comp_error(loc_, "invalid token ('#')");
switch (token)
{
case parser::token::SH_DEFINE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_UNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IFNDEF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_IF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ELSE:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
case parser::token::SH_ENDIF:
throw comp_error(loc_, "unimplemented preprocessor directive");
break;
default:
throw comp_error(loc_, "unknown preprocessor directive");
}
}
2022-01-26 12:08:28 +01:00
} // namespace xsk::gsc::s4