gsc-tool/src/gsc/lexer.cpp

608 lines
18 KiB
C++
Raw Normal View History

2023-01-23 23:31:08 +01:00
// Copyright 2023 xensik. All rights reserved.
2022-01-26 12:08:28 +01:00
//
// Use of this source code is governed by a GNU GPLv3 license
// that can be found in the LICENSE file.
2023-01-23 23:31:08 +01:00
#include "stdinc.hpp"
2022-01-26 12:08:28 +01:00
#include "lexer.hpp"
2023-01-23 23:31:08 +01:00
#include "context.hpp"
#include "utils/string.hpp"
2022-01-26 12:08:28 +01:00
2023-01-23 23:31:08 +01:00
namespace xsk::gsc
2022-01-26 12:08:28 +01:00
{
lexer::lexer(context const* ctx, std::string const& name, char const* data, usize size) : ctx_{ ctx }, reader_{ data, size }, loc_{ &name }, buflen_{ 0 }, spacing_{ spacing::null }, indev_{ false }
2023-01-23 23:31:08 +01:00
{
}
auto lexer::lex() -> token
2022-01-26 12:08:28 +01:00
{
buflen_ = 0;
2022-01-26 12:08:28 +01:00
while (true)
{
2022-01-28 15:58:41 +01:00
auto& last = reader_.last_byte;
auto& curr = reader_.curr_byte;
2022-01-28 15:58:41 +01:00
auto path = false;
auto localize = false;
2022-02-20 12:10:21 +01:00
loc_.step();
2022-01-28 15:58:41 +01:00
if (reader_.ended())
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (indev_)
throw comp_error(loc_, "unmatched devblock start ('/#')");
2022-01-26 12:08:28 +01:00
if (curr == 0 && last != '\n')
{
curr = -1;
return token{ token::NEWLINE, spacing_, loc_ };
}
2022-01-26 12:08:28 +01:00
else
return token{ token::EOS, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
}
if (last == 0 || last == '\n')
spacing_ = spacing::null;
else if (last == ' ' || last == '\t')
spacing_ = (spacing_ == spacing::null) ? spacing::empty : spacing::back;
else
spacing_ = spacing::none;
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
switch (last)
{
case ' ':
case '\t':
case '\r':
loc_.step();
continue;
case '\n':
loc_.lines();
loc_.step();
return token{ token::NEWLINE, spacing_, loc_ };
2022-02-04 15:39:47 +01:00
case '\\':
throw comp_error(loc_, "invalid token ('\\')");
2022-01-26 12:08:28 +01:00
case '/':
2022-02-20 12:10:21 +01:00
if (curr != '=' && curr != '#' && curr != '@' && curr != '*' && curr != '/')
return token{ token::DIV, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
if (last == '=')
return token{ token::DIVEQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
if (last == '#')
{
2022-01-28 15:58:41 +01:00
if (indev_)
throw comp_error(loc_, "cannot recurse devblock ('/#')");
2023-01-23 23:31:08 +01:00
if (ctx_->build() == build::dev)
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
indev_ = true;
return token{ token::DEVBEGIN, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
}
else
{
while (true)
{
if (reader_.ended())
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "unmatched devblock start ('/#')");
if (curr == '\n')
2022-01-26 12:08:28 +01:00
{
loc_.lines();
loc_.step();
}
2022-01-28 15:58:41 +01:00
else if (last == '#' && curr == '/')
2022-01-26 12:08:28 +01:00
{
2022-02-20 12:10:21 +01:00
advance();
2022-01-26 12:08:28 +01:00
break;
}
2022-02-20 12:10:21 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
}
}
2022-02-20 12:10:21 +01:00
else if (last == '@')
2022-01-26 12:08:28 +01:00
{
while (true)
{
if (reader_.ended())
2022-02-20 12:10:21 +01:00
throw comp_error(loc_, "unmatched script doc comment start ('/@')");
2022-01-28 15:58:41 +01:00
if (curr == '\n')
2022-01-26 12:08:28 +01:00
{
loc_.lines();
loc_.step();
}
2022-02-20 12:10:21 +01:00
else if (last == '@' && curr == '/')
2022-01-26 12:08:28 +01:00
{
2022-02-20 12:10:21 +01:00
advance();
2022-01-26 12:08:28 +01:00
break;
}
2022-02-20 12:10:21 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
}
2022-02-20 12:10:21 +01:00
else if (last == '*')
2022-01-26 12:08:28 +01:00
{
while (true)
{
if (reader_.ended())
2022-02-20 12:10:21 +01:00
throw comp_error(loc_, "unmatched multiline comment start ('/*')");
2022-02-04 15:39:47 +01:00
2022-02-20 12:10:21 +01:00
if (curr == '\n')
2022-02-04 15:39:47 +01:00
{
loc_.lines();
loc_.step();
}
2022-02-20 12:10:21 +01:00
else if (last == '*' && curr == '/')
{
advance();
break;
}
advance();
}
}
else if (last == '/')
{
while (true)
{
if (reader_.ended())
2022-02-20 12:10:21 +01:00
break;
2022-02-04 15:39:47 +01:00
if (curr == '\n')
2022-01-26 12:08:28 +01:00
break;
2022-02-20 12:10:21 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
}
continue;
case '#':
if (curr == '/')
{
2022-01-28 15:58:41 +01:00
if (!indev_)
throw comp_error(loc_, "unmatched devblock end ('#/')");
2022-01-26 12:08:28 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-02-20 12:10:21 +01:00
indev_ = false;
return token{ token::DEVEND, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
}
return token{ token::SHARP, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '*':
2022-02-20 12:10:21 +01:00
if (curr != '=' && curr != '/')
return token{ token::STAR, spacing_, loc_ };
2022-01-26 12:12:34 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
if (last == '=')
return token{ token::STAREQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "unmatched multiline comment end ('*/')");
2022-01-26 12:08:28 +01:00
case '"':
2022-01-28 15:58:41 +01:00
goto lex_string;
2022-01-26 12:08:28 +01:00
case '.':
2022-02-20 12:10:21 +01:00
if (curr < '0' || curr > '9')
return token{ token::DOT, spacing_, loc_ };
2022-02-20 12:10:21 +01:00
goto lex_number;
2022-01-26 12:08:28 +01:00
case '(':
return token{ token::LPAREN, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case ')':
return token{ token::RPAREN, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '{':
return token{ token::LBRACE, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '}':
return token{ token::RBRACE, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '[':
return token{ token::LBRACKET, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case ']':
return token{ token::RBRACKET, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case ',':
return token{ token::COMMA, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case ';':
return token{ token::SEMICOLON, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case ':':
if (curr != ':')
return token{ token::COLON, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
return token{ token::DOUBLECOLON, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '?':
return token{ token::QMARK, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '=':
if (curr != '=')
return token{ token::ASSIGN, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
return token{ token::EQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '+':
if (curr != '+' && curr != '=')
return token{ token::PLUS, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '+')
return token{ token::INC, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
return token{ token::PLUSEQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '-':
if (curr != '-' && curr != '=')
return token{ token::MINUS, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '-')
return token{ token::DEC, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
return token{ token::MINUSEQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '%':
if (curr != '=')
return token{ token::MOD, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
return token{ token::MODEQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '|':
if (curr != '|' && curr != '=')
return token{ token::BITOR, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '|')
return token{ token::OR, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
return token{ token::BITOREQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '&':
2022-02-04 15:39:47 +01:00
if (curr != '&' && curr != '=' && curr != '"')
return token{ token::BITAND, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '&')
return token{ token::AND, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
if (last == '=')
return token{ token::BITANDEQ, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
localize = true;
2022-01-28 15:58:41 +01:00
goto lex_string;
2022-01-26 12:08:28 +01:00
case '^':
if (curr != '=')
return token{ token::BITEXOR, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
return token{ token::BITEXOREQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '!':
if (curr != '=')
return token{ token::BANG, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
return token{ token::NE, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '~':
return token{ token::TILDE, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '<':
if (curr != '<' && curr != '=')
return token{ token::LT, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '=')
return token{ token::LE, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
if (curr != '=')
return token{ token::SHL, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
return token{ token::SHLEQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
case '>':
if (curr != '>' && curr != '=')
return token{ token::GT, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
if (last == '=')
return token{ token::GE, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
if (curr != '=')
return token{ token::SHR, spacing_, loc_ };
2022-01-28 15:58:41 +01:00
2022-02-04 15:39:47 +01:00
advance();
return token{ token::SHREQ, spacing_, loc_ };
2022-01-26 12:08:28 +01:00
default:
if (last >= '0' && last <= '9')
2022-01-28 15:58:41 +01:00
goto lex_number;
2022-04-07 19:22:10 +02:00
else if (last == '_' || (last >= 'A' && last <= 'Z') || (last >= 'a' && last <= 'z'))
2022-01-28 15:58:41 +01:00
goto lex_name;
2022-01-26 12:08:28 +01:00
2023-01-23 23:31:08 +01:00
throw comp_error(loc_, fmt::format("bad token: '{}'", last));
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
lex_string:
while (true)
{
if (reader_.ended())
2022-02-04 15:39:47 +01:00
throw comp_error(loc_, "unmatched string start ('\"')");
if (curr == '"')
{
advance();
2022-01-28 15:58:41 +01:00
break;
2022-02-04 15:39:47 +01:00
}
2022-01-26 12:08:28 +01:00
2022-02-04 15:39:47 +01:00
if (curr == '\n')
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "unterminated string literal");
2022-01-26 12:08:28 +01:00
2022-02-04 15:39:47 +01:00
if (curr == '\\')
2022-01-28 15:58:41 +01:00
{
2022-02-04 15:39:47 +01:00
advance();
if (reader_.ended())
2022-02-04 15:39:47 +01:00
throw comp_error(loc_, "invalid token ('\')");
2022-01-28 15:58:41 +01:00
char c = curr;
switch (curr)
{
case 't': c = '\t'; break;
case 'r': c = '\r'; break;
case 'n': c = '\n'; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
default: break;
}
2021-09-13 17:50:36 +02:00
push(c);
2022-01-28 15:58:41 +01:00
}
else
push(curr);
2022-01-26 12:08:28 +01:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
}
2021-09-13 17:50:36 +02:00
if (localize)
return token{ token::ISTRING, spacing_, loc_, std::string{ &buffer_[0], buflen_ } };
2021-09-13 17:50:36 +02:00
return token{ token::STRING, spacing_, loc_, std::string{ &buffer_[0], buflen_ } };
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
lex_name:
push(last);
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
if (reader_.ended())
2022-01-28 15:58:41 +01:00
break;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (!(curr == '\\' || curr == '_' || (curr > 64 && curr < 91) || (curr > 96 && curr < 123) || (curr > 47 && curr < 58)))
break;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr == '\\')
2022-01-26 12:08:28 +01:00
{
2022-01-28 15:58:41 +01:00
if (last == '\\')
throw comp_error(loc_, "invalid path '\\\\'");
path = true;
push('/');
2022-01-26 12:08:28 +01:00
}
else
push(curr);
2021-09-13 17:50:36 +02:00
2022-02-04 15:39:47 +01:00
advance();
2022-01-28 15:58:41 +01:00
}
2021-09-13 17:50:36 +02:00
if (path)
2022-01-28 15:58:41 +01:00
{
if (buffer_[buflen_ - 1] == '/')
throw comp_error(loc_, "invalid path end '\\'");
2021-09-13 17:50:36 +02:00
return token{ token::PATH, spacing_, loc_, ctx_->make_token(std::string_view{ &buffer_[0], buflen_ }) };
2022-01-28 15:58:41 +01:00
}
return token{ token::NAME, spacing_, loc_, std::string{ &buffer_[0], buflen_ } };
2022-01-26 12:08:28 +01:00
2022-01-28 15:58:41 +01:00
lex_number:
2022-02-20 12:10:21 +01:00
if (last == '.' || last != '0' || (last == '0' && (curr != 'o' && curr != 'b' && curr != 'x')))
2022-01-26 12:08:28 +01:00
{
push(last);
2022-01-28 15:58:41 +01:00
2022-02-20 12:10:21 +01:00
auto dot = last == '.' ? 1 : 0;
2022-01-28 15:58:41 +01:00
auto flt = 0;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
if (reader_.ended())
2022-01-28 15:58:41 +01:00
break;
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr == '\'' && (last == '\'' || last == 'f' || last == '.'))
throw comp_error(loc_, "invalid number literal");
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if ((curr == '.' || curr == 'f') && last == '\'')
throw comp_error(loc_, "invalid number literal");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (curr == '\'')
{
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
continue;
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr == 'f')
flt++;
else if (curr == '.')
dot++;
else if (!(curr > 47 && curr < 58))
2022-01-26 12:08:28 +01:00
break;
2021-09-13 17:50:36 +02:00
push(curr);
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (last == '\'')
throw comp_error(loc_, "invalid number literal");
2021-09-13 17:50:36 +02:00
if (dot > 1 || flt > 1 || (flt && buffer_[buflen_ - 1] != 'f'))
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "invalid number literal");
2021-09-13 17:50:36 +02:00
2022-02-20 12:10:21 +01:00
if (dot || flt)
return token{ token::FLT, spacing_, loc_, std::string{ &buffer_[0], buflen_ } };
2022-01-28 15:58:41 +01:00
return token{ token::INT, spacing_, loc_, std::string{ &buffer_[0], buflen_ } };
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
else if (curr == 'o')
2022-01-26 12:08:28 +01:00
{
2022-02-04 15:39:47 +01:00
advance();
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
if (reader_.ended())
2022-01-28 15:58:41 +01:00
break;
2021-09-13 17:50:36 +02:00
2022-04-07 19:22:10 +02:00
if ((curr == '\'' && (last == '\'' || last == 'o')) || (curr == 'o' && last == '\''))
2022-02-20 12:10:21 +01:00
throw comp_error(loc_, "invalid octal literal");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (curr == '\'')
{
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
continue;
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (!(curr > 47 && curr < 56))
2022-01-26 12:08:28 +01:00
break;
2021-09-13 17:50:36 +02:00
push(curr);
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
if (last == '\'' || buflen_ <= 0)
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "invalid octal literal");
2021-09-13 17:50:36 +02:00
push('\0');
return token{ token::INT, spacing_, loc_, utils::string::oct_to_dec(&buffer_[0]) };
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
else if (curr == 'b')
2022-01-26 12:08:28 +01:00
{
push(last);
push(curr);
2022-02-04 15:39:47 +01:00
advance();
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
if (reader_.ended())
2022-01-28 15:58:41 +01:00
break;
2021-09-13 17:50:36 +02:00
2022-04-07 19:22:10 +02:00
if ((curr == '\'' && (last == '\'' || last == 'b')) || (curr == 'b' && last == '\''))
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "invalid binary literal");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (curr == '\'')
{
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
continue;
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (curr != '0' && curr != '1')
2022-01-26 12:08:28 +01:00
break;
2021-09-13 17:50:36 +02:00
push(curr);
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
if (last == '\'' || buflen_ < 3)
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "invalid binary literal");
2021-09-13 17:50:36 +02:00
push('\0');
return token{ token::INT, spacing_, loc_, utils::string::bin_to_dec(&buffer_[0]) };
2022-01-26 12:08:28 +01:00
}
2022-01-28 15:58:41 +01:00
else if (curr == 'x')
2022-01-26 12:08:28 +01:00
{
push(last);
push(curr);
2022-02-04 15:39:47 +01:00
advance();
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
while (true)
2022-01-26 12:08:28 +01:00
{
if (reader_.ended())
2022-01-28 15:58:41 +01:00
break;
2021-09-13 17:50:36 +02:00
2022-04-07 19:22:10 +02:00
if ((curr == '\'' && (last == '\'' || last == 'x')) || (curr == 'x' && last == '\''))
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "invalid hexadecimal literal");
2021-09-13 17:50:36 +02:00
2022-01-26 12:08:28 +01:00
if (curr == '\'')
{
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
continue;
}
2021-09-13 17:50:36 +02:00
2022-01-28 15:58:41 +01:00
if (!((curr > 47 && curr < 58) || (curr > 64 && curr < 71) || (curr > 96 && curr < 103)))
2022-01-26 12:08:28 +01:00
break;
2021-09-13 17:50:36 +02:00
push(curr);
2022-02-04 15:39:47 +01:00
advance();
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
if (last == '\'' || buflen_ < 3)
2022-01-28 15:58:41 +01:00
throw comp_error(loc_, "invalid hexadecimal literal");
2022-01-26 12:08:28 +01:00
push('\0');
return token{ token::INT, spacing_, loc_, utils::string::hex_to_dec(&buffer_[0]) };
2022-01-26 12:08:28 +01:00
}
2022-02-20 12:10:21 +01:00
2023-01-23 23:31:08 +01:00
throw error("UNEXPECTED LEXER INTERNAL ERROR");
2022-01-26 12:08:28 +01:00
}
2021-09-13 17:50:36 +02:00
}
auto lexer::push(char c) -> void
{
if (buflen_ >= 0x1000)
throw error("lexer: max literal size exceeded");
buffer_[buflen_++] = c;
}
2023-01-23 23:31:08 +01:00
auto lexer::advance() -> void
2022-02-04 15:39:47 +01:00
{
reader_.advance();
2022-02-20 12:10:21 +01:00
loc_.end.column++;
2022-02-04 15:39:47 +01:00
if (reader_.curr_byte == '\\') [[unlikely]]
linewrap();
2022-02-20 12:10:21 +01:00
}
2022-02-04 15:39:47 +01:00
auto lexer::linewrap() -> void
2022-02-20 12:10:21 +01:00
{
while (reader_.curr_byte == '\\')
2022-02-04 15:39:47 +01:00
{
if (reader_.available == 1)
2022-02-04 15:39:47 +01:00
throw comp_error(loc_, "invalid token ('\\')");
if (reader_.buffer_pos[1] != '\r' && reader_.buffer_pos[1] != '\n')
break;
if (reader_.buffer_pos[1] == '\r')
{
if (reader_.available <= 3 || reader_.buffer_pos[2] != '\n')
2022-02-04 15:39:47 +01:00
throw comp_error(loc_, "invalid token ('\\')");
reader_.buffer_pos += 3;
reader_.available -= 3;
2022-02-04 15:39:47 +01:00
}
if ((reader_.buffer_pos[1] == '\n'))
{
if (reader_.available == 2)
2022-02-04 15:39:47 +01:00
throw comp_error(loc_, "invalid token ('\\')");
reader_.buffer_pos += 2;
reader_.available -= 2;
2022-02-04 15:39:47 +01:00
}
reader_.curr_byte = reader_.available ? *reader_.buffer_pos : 0;
2022-02-04 15:39:47 +01:00
loc_.lines();
loc_.step();
}
}
2023-01-23 23:31:08 +01:00
} // namespace xsk::gsc