You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-04 12:00:25 +00:00
New GDScript tokenizer and parser
Sometimes to fix something you have to break it first. This get GDScript mostly working with the new tokenizer and parser but a lot of things isn't working yet. It compiles and it's usable, and that should be enough for now. Don't worry: other huge commits will come after this.
This commit is contained in:
@@ -31,268 +31,209 @@
|
||||
#ifndef GDSCRIPT_TOKENIZER_H
|
||||
#define GDSCRIPT_TOKENIZER_H
|
||||
|
||||
#include "core/pair.h"
|
||||
#include "core/list.h"
|
||||
#include "core/set.h"
|
||||
#include "core/string_name.h"
|
||||
#include "core/ustring.h"
|
||||
#include "core/variant.h"
|
||||
#include "core/vmap.h"
|
||||
#include "gdscript_functions.h"
|
||||
#include "core/vector.h"
|
||||
|
||||
class GDScriptTokenizer {
|
||||
public:
|
||||
enum Token {
|
||||
|
||||
TK_EMPTY,
|
||||
TK_IDENTIFIER,
|
||||
TK_CONSTANT,
|
||||
TK_SELF,
|
||||
TK_BUILT_IN_TYPE,
|
||||
TK_BUILT_IN_FUNC,
|
||||
TK_OP_IN,
|
||||
TK_OP_EQUAL,
|
||||
TK_OP_NOT_EQUAL,
|
||||
TK_OP_LESS,
|
||||
TK_OP_LESS_EQUAL,
|
||||
TK_OP_GREATER,
|
||||
TK_OP_GREATER_EQUAL,
|
||||
TK_OP_AND,
|
||||
TK_OP_OR,
|
||||
TK_OP_NOT,
|
||||
TK_OP_ADD,
|
||||
TK_OP_SUB,
|
||||
TK_OP_MUL,
|
||||
TK_OP_DIV,
|
||||
TK_OP_MOD,
|
||||
TK_OP_SHIFT_LEFT,
|
||||
TK_OP_SHIFT_RIGHT,
|
||||
TK_OP_ASSIGN,
|
||||
TK_OP_ASSIGN_ADD,
|
||||
TK_OP_ASSIGN_SUB,
|
||||
TK_OP_ASSIGN_MUL,
|
||||
TK_OP_ASSIGN_DIV,
|
||||
TK_OP_ASSIGN_MOD,
|
||||
TK_OP_ASSIGN_SHIFT_LEFT,
|
||||
TK_OP_ASSIGN_SHIFT_RIGHT,
|
||||
TK_OP_ASSIGN_BIT_AND,
|
||||
TK_OP_ASSIGN_BIT_OR,
|
||||
TK_OP_ASSIGN_BIT_XOR,
|
||||
TK_OP_BIT_AND,
|
||||
TK_OP_BIT_OR,
|
||||
TK_OP_BIT_XOR,
|
||||
TK_OP_BIT_INVERT,
|
||||
//TK_OP_PLUS_PLUS,
|
||||
//TK_OP_MINUS_MINUS,
|
||||
TK_CF_IF,
|
||||
TK_CF_ELIF,
|
||||
TK_CF_ELSE,
|
||||
TK_CF_FOR,
|
||||
TK_CF_WHILE,
|
||||
TK_CF_BREAK,
|
||||
TK_CF_CONTINUE,
|
||||
TK_CF_PASS,
|
||||
TK_CF_RETURN,
|
||||
TK_CF_MATCH,
|
||||
TK_PR_FUNCTION,
|
||||
TK_PR_CLASS,
|
||||
TK_PR_CLASS_NAME,
|
||||
TK_PR_EXTENDS,
|
||||
TK_PR_IS,
|
||||
TK_PR_ONREADY,
|
||||
TK_PR_TOOL,
|
||||
TK_PR_STATIC,
|
||||
TK_PR_EXPORT,
|
||||
TK_PR_SETGET,
|
||||
TK_PR_CONST,
|
||||
TK_PR_VAR,
|
||||
TK_PR_AS,
|
||||
TK_PR_VOID,
|
||||
TK_PR_ENUM,
|
||||
TK_PR_PRELOAD,
|
||||
TK_PR_ASSERT,
|
||||
TK_PR_YIELD,
|
||||
TK_PR_SIGNAL,
|
||||
TK_PR_BREAKPOINT,
|
||||
TK_PR_REMOTE,
|
||||
TK_PR_MASTER,
|
||||
TK_PR_PUPPET,
|
||||
TK_PR_REMOTESYNC,
|
||||
TK_PR_MASTERSYNC,
|
||||
TK_PR_PUPPETSYNC,
|
||||
TK_BRACKET_OPEN,
|
||||
TK_BRACKET_CLOSE,
|
||||
TK_CURLY_BRACKET_OPEN,
|
||||
TK_CURLY_BRACKET_CLOSE,
|
||||
TK_PARENTHESIS_OPEN,
|
||||
TK_PARENTHESIS_CLOSE,
|
||||
TK_COMMA,
|
||||
TK_SEMICOLON,
|
||||
TK_PERIOD,
|
||||
TK_QUESTION_MARK,
|
||||
TK_COLON,
|
||||
TK_DOLLAR,
|
||||
TK_FORWARD_ARROW,
|
||||
TK_NEWLINE,
|
||||
TK_CONST_PI,
|
||||
TK_CONST_TAU,
|
||||
TK_WILDCARD,
|
||||
TK_CONST_INF,
|
||||
TK_CONST_NAN,
|
||||
TK_ERROR,
|
||||
TK_EOF,
|
||||
TK_CURSOR, //used for code completion
|
||||
TK_MAX
|
||||
};
|
||||
|
||||
protected:
|
||||
enum StringMode {
|
||||
STRING_SINGLE_QUOTE,
|
||||
STRING_DOUBLE_QUOTE,
|
||||
STRING_MULTILINE
|
||||
};
|
||||
|
||||
static const char *token_names[TK_MAX];
|
||||
|
||||
public:
|
||||
static const char *get_token_name(Token p_token);
|
||||
|
||||
bool is_token_literal(int p_offset = 0, bool variable_safe = false) const;
|
||||
StringName get_token_literal(int p_offset = 0) const;
|
||||
|
||||
virtual const Variant &get_token_constant(int p_offset = 0) const = 0;
|
||||
virtual Token get_token(int p_offset = 0) const = 0;
|
||||
virtual StringName get_token_identifier(int p_offset = 0) const = 0;
|
||||
virtual GDScriptFunctions::Function get_token_built_in_func(int p_offset = 0) const = 0;
|
||||
virtual Variant::Type get_token_type(int p_offset = 0) const = 0;
|
||||
virtual int get_token_line(int p_offset = 0) const = 0;
|
||||
virtual int get_token_column(int p_offset = 0) const = 0;
|
||||
virtual int get_token_line_indent(int p_offset = 0) const = 0;
|
||||
virtual int get_token_line_tab_indent(int p_offset = 0) const = 0;
|
||||
virtual String get_token_error(int p_offset = 0) const = 0;
|
||||
virtual void advance(int p_amount = 1) = 0;
|
||||
#ifdef DEBUG_ENABLED
|
||||
virtual const Vector<Pair<int, String>> &get_warning_skips() const = 0;
|
||||
virtual const Set<String> &get_warning_global_skips() const = 0;
|
||||
virtual bool is_ignoring_warnings() const = 0;
|
||||
#endif // DEBUG_ENABLED
|
||||
|
||||
virtual ~GDScriptTokenizer() {}
|
||||
};
|
||||
|
||||
class GDScriptTokenizerText : public GDScriptTokenizer {
|
||||
enum {
|
||||
MAX_LOOKAHEAD = 4,
|
||||
TK_RB_SIZE = MAX_LOOKAHEAD * 2 + 1
|
||||
|
||||
};
|
||||
|
||||
struct TokenData {
|
||||
Token type;
|
||||
StringName identifier; //for identifier types
|
||||
Variant constant; //for constant types
|
||||
union {
|
||||
Variant::Type vtype; //for type types
|
||||
GDScriptFunctions::Function func; //function for built in functions
|
||||
int warning_code; //for warning skip
|
||||
struct Token {
|
||||
enum Type {
|
||||
EMPTY,
|
||||
// Basic
|
||||
ANNOTATION,
|
||||
IDENTIFIER,
|
||||
LITERAL,
|
||||
// Comparison
|
||||
LESS,
|
||||
LESS_EQUAL,
|
||||
GREATER,
|
||||
GREATER_EQUAL,
|
||||
EQUAL_EQUAL,
|
||||
BANG_EQUAL,
|
||||
// Logical
|
||||
AND,
|
||||
OR,
|
||||
NOT,
|
||||
AMPERSAND_AMPERSAND,
|
||||
PIPE_PIPE,
|
||||
BANG,
|
||||
// Bitwise
|
||||
AMPERSAND,
|
||||
PIPE,
|
||||
TILDE,
|
||||
CARET,
|
||||
LESS_LESS,
|
||||
GREATER_GREATER,
|
||||
// Math
|
||||
PLUS,
|
||||
MINUS,
|
||||
STAR,
|
||||
SLASH,
|
||||
PERCENT,
|
||||
// Assignment
|
||||
EQUAL,
|
||||
PLUS_EQUAL,
|
||||
MINUS_EQUAL,
|
||||
STAR_EQUAL,
|
||||
SLASH_EQUAL,
|
||||
PERCENT_EQUAL,
|
||||
LESS_LESS_EQUAL,
|
||||
GREATER_GREATER_EQUAL,
|
||||
AMPERSAND_EQUAL,
|
||||
PIPE_EQUAL,
|
||||
CARET_EQUAL,
|
||||
// Control flow
|
||||
IF,
|
||||
ELIF,
|
||||
ELSE,
|
||||
FOR,
|
||||
WHILE,
|
||||
BREAK,
|
||||
CONTINUE,
|
||||
PASS,
|
||||
RETURN,
|
||||
MATCH,
|
||||
// Keywords
|
||||
AS,
|
||||
ASSERT,
|
||||
AWAIT,
|
||||
BREAKPOINT,
|
||||
CLASS,
|
||||
CLASS_NAME,
|
||||
CONST,
|
||||
ENUM,
|
||||
EXTENDS,
|
||||
FUNC,
|
||||
IN,
|
||||
IS,
|
||||
NAMESPACE,
|
||||
PRELOAD,
|
||||
SELF,
|
||||
SIGNAL,
|
||||
STATIC,
|
||||
SUPER,
|
||||
VAR,
|
||||
VOID,
|
||||
YIELD,
|
||||
// Punctuation
|
||||
BRACKET_OPEN,
|
||||
BRACKET_CLOSE,
|
||||
BRACE_OPEN,
|
||||
BRACE_CLOSE,
|
||||
PARENTHESIS_OPEN,
|
||||
PARENTHESIS_CLOSE,
|
||||
COMMA,
|
||||
SEMICOLON,
|
||||
PERIOD,
|
||||
PERIOD_PERIOD,
|
||||
COLON,
|
||||
DOLLAR,
|
||||
FORWARD_ARROW,
|
||||
UNDERSCORE,
|
||||
// Whitespace
|
||||
NEWLINE,
|
||||
INDENT,
|
||||
DEDENT,
|
||||
// Constants
|
||||
CONST_PI,
|
||||
CONST_TAU,
|
||||
CONST_INF,
|
||||
CONST_NAN,
|
||||
// Error message improvement
|
||||
VCS_CONFLICT_MARKER,
|
||||
BACKTICK,
|
||||
QUESTION_MARK,
|
||||
// Special
|
||||
ERROR,
|
||||
TK_EOF, // "EOF" is reserved
|
||||
TK_MAX
|
||||
};
|
||||
int line, col;
|
||||
TokenData() {
|
||||
type = TK_EMPTY;
|
||||
line = col = 0;
|
||||
vtype = Variant::NIL;
|
||||
|
||||
Type type = EMPTY;
|
||||
Variant literal;
|
||||
int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
|
||||
int leftmost_column = 0, rightmost_column = 0; // Column span for multiline tokens.
|
||||
|
||||
const char *get_name() const;
|
||||
// TODO: Allow some keywords as identifiers?
|
||||
bool is_identifier() const { return type == IDENTIFIER; }
|
||||
StringName get_identifier() const { return literal; }
|
||||
|
||||
Token(Type p_type) {
|
||||
type = p_type;
|
||||
}
|
||||
|
||||
Token() {
|
||||
type = EMPTY;
|
||||
}
|
||||
};
|
||||
|
||||
void _make_token(Token p_type);
|
||||
void _make_newline(int p_indentation = 0, int p_tabs = 0);
|
||||
void _make_identifier(const StringName &p_identifier);
|
||||
void _make_built_in_func(GDScriptFunctions::Function p_func);
|
||||
void _make_constant(const Variant &p_constant);
|
||||
void _make_type(const Variant::Type &p_type);
|
||||
void _make_error(const String &p_error);
|
||||
private:
|
||||
String source;
|
||||
const CharType *_source = nullptr;
|
||||
const CharType *_current = nullptr;
|
||||
int line = 0, column = 0;
|
||||
int cursor_line = 0, cursor_column = 0;
|
||||
int tab_size = 4;
|
||||
|
||||
String code;
|
||||
int len;
|
||||
int code_pos;
|
||||
const CharType *_code;
|
||||
int line;
|
||||
int column;
|
||||
TokenData tk_rb[TK_RB_SIZE * 2 + 1];
|
||||
int tk_rb_pos;
|
||||
String last_error;
|
||||
bool error_flag;
|
||||
// Keep track of multichar tokens.
|
||||
const CharType *_start = nullptr;
|
||||
int start_line = 0, start_column = 0;
|
||||
int leftmost_column = 0, rightmost_column = 0;
|
||||
|
||||
#ifdef DEBUG_ENABLED
|
||||
Vector<Pair<int, String>> warning_skips;
|
||||
Set<String> warning_global_skips;
|
||||
bool ignore_warnings;
|
||||
#endif // DEBUG_ENABLED
|
||||
// Info cache.
|
||||
bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
|
||||
bool multiline_mode = false;
|
||||
List<Token> error_stack;
|
||||
bool pending_newline = false;
|
||||
Token last_newline;
|
||||
int pending_indents = 0;
|
||||
List<int> indent_stack;
|
||||
List<CharType> paren_stack;
|
||||
CharType indent_char = '\0';
|
||||
int position = 0;
|
||||
int length = 0;
|
||||
|
||||
void _advance();
|
||||
_FORCE_INLINE_ bool _is_at_end() { return position >= length; }
|
||||
_FORCE_INLINE_ CharType _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
|
||||
int indent_level() const { return indent_stack.size(); }
|
||||
bool has_error() const { return !error_stack.empty(); }
|
||||
Token pop_error();
|
||||
CharType _advance();
|
||||
void _skip_whitespace();
|
||||
void check_indent();
|
||||
|
||||
Token make_error(const String &p_message);
|
||||
void push_error(const String &p_message);
|
||||
void push_error(const Token &p_error);
|
||||
Token make_paren_error(CharType p_paren);
|
||||
Token make_token(Token::Type p_type) const;
|
||||
Token make_literal(const Variant &p_literal) const;
|
||||
Token make_identifier(const StringName &p_identifier) const;
|
||||
Token check_vcs_marker(CharType p_test, Token::Type p_double_type);
|
||||
void push_paren(CharType p_char);
|
||||
bool pop_paren(CharType p_expected);
|
||||
|
||||
void newline(bool p_make_token);
|
||||
Token number();
|
||||
Token potential_identifier();
|
||||
Token string();
|
||||
Token annotation();
|
||||
|
||||
public:
|
||||
void set_code(const String &p_code);
|
||||
virtual Token get_token(int p_offset = 0) const;
|
||||
virtual StringName get_token_identifier(int p_offset = 0) const;
|
||||
virtual GDScriptFunctions::Function get_token_built_in_func(int p_offset = 0) const;
|
||||
virtual Variant::Type get_token_type(int p_offset = 0) const;
|
||||
virtual int get_token_line(int p_offset = 0) const;
|
||||
virtual int get_token_column(int p_offset = 0) const;
|
||||
virtual int get_token_line_indent(int p_offset = 0) const;
|
||||
virtual int get_token_line_tab_indent(int p_offset = 0) const;
|
||||
virtual const Variant &get_token_constant(int p_offset = 0) const;
|
||||
virtual String get_token_error(int p_offset = 0) const;
|
||||
virtual void advance(int p_amount = 1);
|
||||
#ifdef DEBUG_ENABLED
|
||||
virtual const Vector<Pair<int, String>> &get_warning_skips() const { return warning_skips; }
|
||||
virtual const Set<String> &get_warning_global_skips() const { return warning_global_skips; }
|
||||
virtual bool is_ignoring_warnings() const { return ignore_warnings; }
|
||||
#endif // DEBUG_ENABLED
|
||||
Token scan();
|
||||
|
||||
void set_source_code(const String &p_source_code);
|
||||
|
||||
int get_cursor_line() const;
|
||||
int get_cursor_column() const;
|
||||
void set_cursor_position(int p_line, int p_column);
|
||||
void set_multiline_mode(bool p_state);
|
||||
static String get_token_name(Token::Type p_token_type);
|
||||
|
||||
GDScriptTokenizer();
|
||||
};
|
||||
|
||||
class GDScriptTokenizerBuffer : public GDScriptTokenizer {
|
||||
enum {
|
||||
|
||||
TOKEN_BYTE_MASK = 0x80,
|
||||
TOKEN_BITS = 8,
|
||||
TOKEN_MASK = (1 << TOKEN_BITS) - 1,
|
||||
TOKEN_LINE_BITS = 24,
|
||||
TOKEN_LINE_MASK = (1 << TOKEN_LINE_BITS) - 1,
|
||||
};
|
||||
|
||||
Vector<StringName> identifiers;
|
||||
Vector<Variant> constants;
|
||||
VMap<uint32_t, uint32_t> lines;
|
||||
Vector<uint32_t> tokens;
|
||||
Variant nil;
|
||||
int token;
|
||||
|
||||
public:
|
||||
Error set_code_buffer(const Vector<uint8_t> &p_buffer);
|
||||
static Vector<uint8_t> parse_code_string(const String &p_code);
|
||||
virtual Token get_token(int p_offset = 0) const;
|
||||
virtual StringName get_token_identifier(int p_offset = 0) const;
|
||||
virtual GDScriptFunctions::Function get_token_built_in_func(int p_offset = 0) const;
|
||||
virtual Variant::Type get_token_type(int p_offset = 0) const;
|
||||
virtual int get_token_line(int p_offset = 0) const;
|
||||
virtual int get_token_column(int p_offset = 0) const;
|
||||
virtual int get_token_line_indent(int p_offset = 0) const;
|
||||
virtual int get_token_line_tab_indent(int p_offset = 0) const { return 0; }
|
||||
virtual const Variant &get_token_constant(int p_offset = 0) const;
|
||||
virtual String get_token_error(int p_offset = 0) const;
|
||||
virtual void advance(int p_amount = 1);
|
||||
#ifdef DEBUG_ENABLED
|
||||
virtual const Vector<Pair<int, String>> &get_warning_skips() const {
|
||||
static Vector<Pair<int, String>> v;
|
||||
return v;
|
||||
}
|
||||
virtual const Set<String> &get_warning_global_skips() const {
|
||||
static Set<String> s;
|
||||
return s;
|
||||
}
|
||||
virtual bool is_ignoring_warnings() const { return true; }
|
||||
#endif // DEBUG_ENABLED
|
||||
GDScriptTokenizerBuffer();
|
||||
};
|
||||
|
||||
#endif // GDSCRIPT_TOKENIZER_H
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user