1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-04 12:00:25 +00:00

New GDScript tokenizer and parser

Sometimes to fix something you have to break it first.

This get GDScript mostly working with the new tokenizer and parser but
a lot of things isn't working yet. It compiles and it's usable, and that
should be enough for now.

Don't worry: other huge commits will come after this.
This commit is contained in:
George Marques
2020-05-01 19:14:56 -03:00
parent 818bfbc5b5
commit 5d6e853806
29 changed files with 7906 additions and 16496 deletions

View File

@@ -31,268 +31,209 @@
#ifndef GDSCRIPT_TOKENIZER_H
#define GDSCRIPT_TOKENIZER_H
#include "core/pair.h"
#include "core/list.h"
#include "core/set.h"
#include "core/string_name.h"
#include "core/ustring.h"
#include "core/variant.h"
#include "core/vmap.h"
#include "gdscript_functions.h"
#include "core/vector.h"
class GDScriptTokenizer {
public:
enum Token {
TK_EMPTY,
TK_IDENTIFIER,
TK_CONSTANT,
TK_SELF,
TK_BUILT_IN_TYPE,
TK_BUILT_IN_FUNC,
TK_OP_IN,
TK_OP_EQUAL,
TK_OP_NOT_EQUAL,
TK_OP_LESS,
TK_OP_LESS_EQUAL,
TK_OP_GREATER,
TK_OP_GREATER_EQUAL,
TK_OP_AND,
TK_OP_OR,
TK_OP_NOT,
TK_OP_ADD,
TK_OP_SUB,
TK_OP_MUL,
TK_OP_DIV,
TK_OP_MOD,
TK_OP_SHIFT_LEFT,
TK_OP_SHIFT_RIGHT,
TK_OP_ASSIGN,
TK_OP_ASSIGN_ADD,
TK_OP_ASSIGN_SUB,
TK_OP_ASSIGN_MUL,
TK_OP_ASSIGN_DIV,
TK_OP_ASSIGN_MOD,
TK_OP_ASSIGN_SHIFT_LEFT,
TK_OP_ASSIGN_SHIFT_RIGHT,
TK_OP_ASSIGN_BIT_AND,
TK_OP_ASSIGN_BIT_OR,
TK_OP_ASSIGN_BIT_XOR,
TK_OP_BIT_AND,
TK_OP_BIT_OR,
TK_OP_BIT_XOR,
TK_OP_BIT_INVERT,
//TK_OP_PLUS_PLUS,
//TK_OP_MINUS_MINUS,
TK_CF_IF,
TK_CF_ELIF,
TK_CF_ELSE,
TK_CF_FOR,
TK_CF_WHILE,
TK_CF_BREAK,
TK_CF_CONTINUE,
TK_CF_PASS,
TK_CF_RETURN,
TK_CF_MATCH,
TK_PR_FUNCTION,
TK_PR_CLASS,
TK_PR_CLASS_NAME,
TK_PR_EXTENDS,
TK_PR_IS,
TK_PR_ONREADY,
TK_PR_TOOL,
TK_PR_STATIC,
TK_PR_EXPORT,
TK_PR_SETGET,
TK_PR_CONST,
TK_PR_VAR,
TK_PR_AS,
TK_PR_VOID,
TK_PR_ENUM,
TK_PR_PRELOAD,
TK_PR_ASSERT,
TK_PR_YIELD,
TK_PR_SIGNAL,
TK_PR_BREAKPOINT,
TK_PR_REMOTE,
TK_PR_MASTER,
TK_PR_PUPPET,
TK_PR_REMOTESYNC,
TK_PR_MASTERSYNC,
TK_PR_PUPPETSYNC,
TK_BRACKET_OPEN,
TK_BRACKET_CLOSE,
TK_CURLY_BRACKET_OPEN,
TK_CURLY_BRACKET_CLOSE,
TK_PARENTHESIS_OPEN,
TK_PARENTHESIS_CLOSE,
TK_COMMA,
TK_SEMICOLON,
TK_PERIOD,
TK_QUESTION_MARK,
TK_COLON,
TK_DOLLAR,
TK_FORWARD_ARROW,
TK_NEWLINE,
TK_CONST_PI,
TK_CONST_TAU,
TK_WILDCARD,
TK_CONST_INF,
TK_CONST_NAN,
TK_ERROR,
TK_EOF,
TK_CURSOR, //used for code completion
TK_MAX
};
protected:
enum StringMode {
STRING_SINGLE_QUOTE,
STRING_DOUBLE_QUOTE,
STRING_MULTILINE
};
static const char *token_names[TK_MAX];
public:
static const char *get_token_name(Token p_token);
bool is_token_literal(int p_offset = 0, bool variable_safe = false) const;
StringName get_token_literal(int p_offset = 0) const;
virtual const Variant &get_token_constant(int p_offset = 0) const = 0;
virtual Token get_token(int p_offset = 0) const = 0;
virtual StringName get_token_identifier(int p_offset = 0) const = 0;
virtual GDScriptFunctions::Function get_token_built_in_func(int p_offset = 0) const = 0;
virtual Variant::Type get_token_type(int p_offset = 0) const = 0;
virtual int get_token_line(int p_offset = 0) const = 0;
virtual int get_token_column(int p_offset = 0) const = 0;
virtual int get_token_line_indent(int p_offset = 0) const = 0;
virtual int get_token_line_tab_indent(int p_offset = 0) const = 0;
virtual String get_token_error(int p_offset = 0) const = 0;
virtual void advance(int p_amount = 1) = 0;
#ifdef DEBUG_ENABLED
virtual const Vector<Pair<int, String>> &get_warning_skips() const = 0;
virtual const Set<String> &get_warning_global_skips() const = 0;
virtual bool is_ignoring_warnings() const = 0;
#endif // DEBUG_ENABLED
virtual ~GDScriptTokenizer() {}
};
class GDScriptTokenizerText : public GDScriptTokenizer {
enum {
MAX_LOOKAHEAD = 4,
TK_RB_SIZE = MAX_LOOKAHEAD * 2 + 1
};
struct TokenData {
Token type;
StringName identifier; //for identifier types
Variant constant; //for constant types
union {
Variant::Type vtype; //for type types
GDScriptFunctions::Function func; //function for built in functions
int warning_code; //for warning skip
struct Token {
enum Type {
EMPTY,
// Basic
ANNOTATION,
IDENTIFIER,
LITERAL,
// Comparison
LESS,
LESS_EQUAL,
GREATER,
GREATER_EQUAL,
EQUAL_EQUAL,
BANG_EQUAL,
// Logical
AND,
OR,
NOT,
AMPERSAND_AMPERSAND,
PIPE_PIPE,
BANG,
// Bitwise
AMPERSAND,
PIPE,
TILDE,
CARET,
LESS_LESS,
GREATER_GREATER,
// Math
PLUS,
MINUS,
STAR,
SLASH,
PERCENT,
// Assignment
EQUAL,
PLUS_EQUAL,
MINUS_EQUAL,
STAR_EQUAL,
SLASH_EQUAL,
PERCENT_EQUAL,
LESS_LESS_EQUAL,
GREATER_GREATER_EQUAL,
AMPERSAND_EQUAL,
PIPE_EQUAL,
CARET_EQUAL,
// Control flow
IF,
ELIF,
ELSE,
FOR,
WHILE,
BREAK,
CONTINUE,
PASS,
RETURN,
MATCH,
// Keywords
AS,
ASSERT,
AWAIT,
BREAKPOINT,
CLASS,
CLASS_NAME,
CONST,
ENUM,
EXTENDS,
FUNC,
IN,
IS,
NAMESPACE,
PRELOAD,
SELF,
SIGNAL,
STATIC,
SUPER,
VAR,
VOID,
YIELD,
// Punctuation
BRACKET_OPEN,
BRACKET_CLOSE,
BRACE_OPEN,
BRACE_CLOSE,
PARENTHESIS_OPEN,
PARENTHESIS_CLOSE,
COMMA,
SEMICOLON,
PERIOD,
PERIOD_PERIOD,
COLON,
DOLLAR,
FORWARD_ARROW,
UNDERSCORE,
// Whitespace
NEWLINE,
INDENT,
DEDENT,
// Constants
CONST_PI,
CONST_TAU,
CONST_INF,
CONST_NAN,
// Error message improvement
VCS_CONFLICT_MARKER,
BACKTICK,
QUESTION_MARK,
// Special
ERROR,
TK_EOF, // "EOF" is reserved
TK_MAX
};
int line, col;
TokenData() {
type = TK_EMPTY;
line = col = 0;
vtype = Variant::NIL;
Type type = EMPTY;
Variant literal;
int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
int leftmost_column = 0, rightmost_column = 0; // Column span for multiline tokens.
const char *get_name() const;
// TODO: Allow some keywords as identifiers?
bool is_identifier() const { return type == IDENTIFIER; }
StringName get_identifier() const { return literal; }
Token(Type p_type) {
type = p_type;
}
Token() {
type = EMPTY;
}
};
void _make_token(Token p_type);
void _make_newline(int p_indentation = 0, int p_tabs = 0);
void _make_identifier(const StringName &p_identifier);
void _make_built_in_func(GDScriptFunctions::Function p_func);
void _make_constant(const Variant &p_constant);
void _make_type(const Variant::Type &p_type);
void _make_error(const String &p_error);
private:
String source;
const CharType *_source = nullptr;
const CharType *_current = nullptr;
int line = 0, column = 0;
int cursor_line = 0, cursor_column = 0;
int tab_size = 4;
String code;
int len;
int code_pos;
const CharType *_code;
int line;
int column;
TokenData tk_rb[TK_RB_SIZE * 2 + 1];
int tk_rb_pos;
String last_error;
bool error_flag;
// Keep track of multichar tokens.
const CharType *_start = nullptr;
int start_line = 0, start_column = 0;
int leftmost_column = 0, rightmost_column = 0;
#ifdef DEBUG_ENABLED
Vector<Pair<int, String>> warning_skips;
Set<String> warning_global_skips;
bool ignore_warnings;
#endif // DEBUG_ENABLED
// Info cache.
bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
bool multiline_mode = false;
List<Token> error_stack;
bool pending_newline = false;
Token last_newline;
int pending_indents = 0;
List<int> indent_stack;
List<CharType> paren_stack;
CharType indent_char = '\0';
int position = 0;
int length = 0;
void _advance();
_FORCE_INLINE_ bool _is_at_end() { return position >= length; }
_FORCE_INLINE_ CharType _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
int indent_level() const { return indent_stack.size(); }
bool has_error() const { return !error_stack.empty(); }
Token pop_error();
CharType _advance();
void _skip_whitespace();
void check_indent();
Token make_error(const String &p_message);
void push_error(const String &p_message);
void push_error(const Token &p_error);
Token make_paren_error(CharType p_paren);
Token make_token(Token::Type p_type) const;
Token make_literal(const Variant &p_literal) const;
Token make_identifier(const StringName &p_identifier) const;
Token check_vcs_marker(CharType p_test, Token::Type p_double_type);
void push_paren(CharType p_char);
bool pop_paren(CharType p_expected);
void newline(bool p_make_token);
Token number();
Token potential_identifier();
Token string();
Token annotation();
public:
void set_code(const String &p_code);
virtual Token get_token(int p_offset = 0) const;
virtual StringName get_token_identifier(int p_offset = 0) const;
virtual GDScriptFunctions::Function get_token_built_in_func(int p_offset = 0) const;
virtual Variant::Type get_token_type(int p_offset = 0) const;
virtual int get_token_line(int p_offset = 0) const;
virtual int get_token_column(int p_offset = 0) const;
virtual int get_token_line_indent(int p_offset = 0) const;
virtual int get_token_line_tab_indent(int p_offset = 0) const;
virtual const Variant &get_token_constant(int p_offset = 0) const;
virtual String get_token_error(int p_offset = 0) const;
virtual void advance(int p_amount = 1);
#ifdef DEBUG_ENABLED
virtual const Vector<Pair<int, String>> &get_warning_skips() const { return warning_skips; }
virtual const Set<String> &get_warning_global_skips() const { return warning_global_skips; }
virtual bool is_ignoring_warnings() const { return ignore_warnings; }
#endif // DEBUG_ENABLED
Token scan();
void set_source_code(const String &p_source_code);
int get_cursor_line() const;
int get_cursor_column() const;
void set_cursor_position(int p_line, int p_column);
void set_multiline_mode(bool p_state);
static String get_token_name(Token::Type p_token_type);
GDScriptTokenizer();
};
class GDScriptTokenizerBuffer : public GDScriptTokenizer {
enum {
TOKEN_BYTE_MASK = 0x80,
TOKEN_BITS = 8,
TOKEN_MASK = (1 << TOKEN_BITS) - 1,
TOKEN_LINE_BITS = 24,
TOKEN_LINE_MASK = (1 << TOKEN_LINE_BITS) - 1,
};
Vector<StringName> identifiers;
Vector<Variant> constants;
VMap<uint32_t, uint32_t> lines;
Vector<uint32_t> tokens;
Variant nil;
int token;
public:
Error set_code_buffer(const Vector<uint8_t> &p_buffer);
static Vector<uint8_t> parse_code_string(const String &p_code);
virtual Token get_token(int p_offset = 0) const;
virtual StringName get_token_identifier(int p_offset = 0) const;
virtual GDScriptFunctions::Function get_token_built_in_func(int p_offset = 0) const;
virtual Variant::Type get_token_type(int p_offset = 0) const;
virtual int get_token_line(int p_offset = 0) const;
virtual int get_token_column(int p_offset = 0) const;
virtual int get_token_line_indent(int p_offset = 0) const;
virtual int get_token_line_tab_indent(int p_offset = 0) const { return 0; }
virtual const Variant &get_token_constant(int p_offset = 0) const;
virtual String get_token_error(int p_offset = 0) const;
virtual void advance(int p_amount = 1);
#ifdef DEBUG_ENABLED
virtual const Vector<Pair<int, String>> &get_warning_skips() const {
static Vector<Pair<int, String>> v;
return v;
}
virtual const Set<String> &get_warning_global_skips() const {
static Set<String> s;
return s;
}
virtual bool is_ignoring_warnings() const { return true; }
#endif // DEBUG_ENABLED
GDScriptTokenizerBuffer();
};
#endif // GDSCRIPT_TOKENIZER_H
#endif