/* valageniescanner.vala
 *
 * Copyright (C) 2008-2012  Jamie McCracken, Jürg Billeter
 * Based on code by Jürg Billeter
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.

 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.

 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
 *
 * Author:
 * 	Jamie McCracken jamiemcc gnome org
 */

using GLib;

/**
 * Lexical scanner for Genie source files.
 */
public class Vala.Genie.Scanner {
	public SourceFile source_file { get; private set; }

	public int indent_spaces { get; set;}

	char* begin;
	char* current;
	char* end;

	int line;
	int column;

	int current_indent_level;
	int indent_level;
	int pending_dedents;

	/* track open parens and braces for automatic line continuations */
	int open_parens_count;
	int open_brace_count;

	TokenType last_token;
	bool parse_started;

	Comment _comment;

	Conditional[] conditional_stack;

	struct Conditional {
		public bool matched;
		public bool else_found;
		public bool skip_section;
	}

	State[] state_stack;

	enum State {
		PARENS,
		BRACE,
		BRACKET,
		REGEX_LITERAL,
		TEMPLATE,
		TEMPLATE_PART,
		VERBATIM_TEMPLATE
	}

	public Scanner (SourceFile source_file) {
		this.source_file = source_file;

		begin = source_file.get_mapped_contents ();
		end = begin + source_file.get_mapped_length ();

		current = begin;

		_indent_spaces = 0;
		line = 1;
		column = 1;
		current_indent_level = 0;
		indent_level = 0;
		pending_dedents = 0;

		open_parens_count = 0;
		open_brace_count = 0;

		parse_started = false;
		last_token = TokenType.NONE;

	}

	inline bool in_template () {
		return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
	}

	inline bool in_verbatim_template () {
		return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.VERBATIM_TEMPLATE);
	}

	inline bool in_template_part () {
		return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
	}

	inline bool is_ident_char (char c) {
		return (c.isalnum () || c == '_');
	}

	inline bool in_regex_literal () {
		return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
	}

	SourceReference get_source_reference (int offset, int length = 0) {
		return new SourceReference (source_file, SourceLocation (current, line, column + offset), SourceLocation (current + length, line, column + offset + length));
	}

	public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
		TokenType type;
		char* begin = current;
		token_begin = SourceLocation (begin, line, column);

		int token_length_in_chars = -1;

		if (current >= end) {
			type = TokenType.EOF;
		} else {
			switch (current[0]) {
			case '/':
				type = TokenType.CLOSE_REGEX_LITERAL;
				current++;
				state_stack.length--;
				var fl_i = false;
				var fl_s = false;
				var fl_m = false;
				var fl_x = false;
				while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
					switch (current[0]) {
					case 'i':
						if (fl_i) {
							Report.error (get_source_reference (token_length_in_chars), "modifier 'i' used more than once");
						}
						fl_i = true;
						break;
					case 's':
						if (fl_s) {
							Report.error (get_source_reference (token_length_in_chars), "modifier 's' used more than once");
						}
						fl_s = true;
						break;
					case 'm':
						if (fl_m) {
							Report.error (get_source_reference (token_length_in_chars), "modifier 'm' used more than once");
						}
						fl_m = true;
						break;
					case 'x':
						if (fl_x) {
							Report.error (get_source_reference (token_length_in_chars), "modifier 'x' used more than once");
						}
						fl_x = true;
						break;
					}
					current++;
					token_length_in_chars++;
				}
				break;
			default:
				type = TokenType.REGEX_LITERAL;
				token_length_in_chars = 0;
				while (current < end && current[0] != '/') {
					if (current[0] == '\\') {
						current++;
						token_length_in_chars++;
						if (current >= end) {
							break;
						}

						switch (current[0]) {
						case '\'':
						case '"':
						case '\\':
						case '/':
						case '^':
						case '$':
						case '.':
						case '[':
						case ']':
						case '{':
						case '}':
						case '(':
						case ')':
						case '?':
						case '*':
						case '+':
						case '-':
						case '#':
						case '&':
						case '~':
						case ':':
						case ';':
						case '<':
						case '>':
						case '|':
						case '%':
						case '=':
						case '@':
						case '0':
						case 'b':
						case 'B':
						case 'f':
						case 'n':
						case 'N':
						case 'r':
						case 'R':
						case 't':
						case 'v':
						case 'a':
						case 'A':
						case 'p':
						case 'P':
						case 'e':
						case 'd':
						case 'D':
						case 's':
						case 'S':
						case 'w':
						case 'W':
						case 'G':
						case 'z':
						case 'Z':
							current++;
							token_length_in_chars++;
							break;
						case 'u':
							// u escape character has four hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
								current++;
								token_length_in_chars++;
							}
							if (digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\u used with no following hex digits");
							} else if (digit_length < 4) {
								Report.error (get_source_reference (token_length_in_chars), "incomplete universal character name");
							}
							break;
						case 'x':
							// hexadecimal escape character requires two hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							for (digit_length = 0; current < end && current[0].isxdigit ();) {
								if (current[0] != '0') {
									digit_length++;
								}
								current++;
								token_length_in_chars++;
							}
							if (digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\x used with no following hex digits");
							} else if (digit_length > 2) {
								Report.error (get_source_reference (token_length_in_chars), "hex escape sequence out of range");
							}
							break;
						default:
							// back references \1 through \99
							if (current[0].isdigit ()) {
								current++;
								token_length_in_chars++;
								if (current[0].isdigit ()) {
									current++;
									token_length_in_chars++;
								}
							} else {
								Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
							}
							break;
						}
					} else if (current[0] == '\n') {
						break;
					} else {
						unichar u = ((string) current).get_char_validated ((long) (end - current));
						if (u != (unichar) (-1)) {
							current += u.to_utf8 (null);
							token_length_in_chars++;
						} else {
							current++;
							Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
						}
					}
				}
				if (current >= end || current[0] == '\n') {
					Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
					state_stack.length--;
					return read_token (out token_begin, out token_end);
				}
				break;
			}
		}

		if (token_length_in_chars < 0) {
			column += (int) (current - begin);
		} else {
			column += token_length_in_chars;
		}

		token_end = SourceLocation (current, line, column - 1);

		return type;
	}


	public void seek (SourceLocation location) {
		current = location.pos;
		line = location.line;
		column = location.column;

		conditional_stack = null;
		state_stack = null;
	}

	public static TokenType get_identifier_or_keyword (char* begin, int len) {
		switch (len) {
		case 2:
			switch (begin[0]) {
			case 'a':
				if (matches (begin, "as")) return TokenType.AS;
				break;
			case 'd':
				if (matches (begin, "do")) return TokenType.DO;
				break;
			case 'i':
				switch (begin[1]) {
				case 'f':
					return TokenType.IF;
				case 'n':
					return TokenType.IN;
				case 's':
					return TokenType.IS;
				}
				break;
			case 'o':
				if (matches (begin, "of")) return TokenType.OF;

				if (matches (begin, "or")) return TokenType.OP_OR;
				break;
			case 't':
				if (matches (begin, "to")) return TokenType.TO;
				break;
			}
			break;
		case 3:
			switch (begin[0]) {
			case 'a':
				if (matches (begin, "and")) return TokenType.OP_AND;
				break;
			case 'd':
				if (matches (begin, "def")) return TokenType.DEF;
				break;
			case 'f':
				if (matches (begin, "for")) return TokenType.FOR;
				break;
			case 'g':
				if (matches (begin, "get")) return TokenType.GET;
				break;
			case 'i':
				if (matches (begin, "isa")) return TokenType.ISA;
				break;
			case 'n':
				switch (begin[1]) {
				case 'e':
					if (matches (begin, "new")) return TokenType.NEW;
					break;
				case 'o':
					if (matches (begin, "not")) return TokenType.OP_NEG;
					break;
				}
				break;
			case 'o':
				if (matches (begin, "out")) return TokenType.OUT;
				break;
			case 'r':
				if (matches (begin, "ref")) return TokenType.REF;
				break;
			case 's':
				if (matches (begin, "set")) return TokenType.SET;
				break;
			case 't':
				if (matches (begin, "try")) return TokenType.TRY;
				break;
			case 'v':
				if (matches (begin, "var")) return TokenType.VAR;
				break;
			}
			break;
		case 4:
			switch (begin[0]) {
			case 'c':
				if (matches (begin, "case")) return TokenType.CASE;
				break;
			case 'd':
				if (matches (begin, "dict")) return TokenType.DICT;
				break;
			case 'e':
				switch (begin[1]) {
				case 'l':
					if (matches (begin, "else")) return TokenType.ELSE;
					break;
				case 'n':
					if (matches (begin, "enum")) return TokenType.ENUM;
					break;
				}
				break;
			case 'i':
				if (matches (begin, "init")) return TokenType.INIT;
				break;
			case 'l':
				switch (begin[1]) {
				case 'i':
					if (matches (begin, "list")) return TokenType.LIST;
					break;
				case 'o':
					if (matches (begin, "lock")) return TokenType.LOCK;
					break;
				}
				break;

			case 'n':
				if (matches (begin, "null")) return TokenType.NULL;
				break;
			case 'p':
				switch (begin[1]) {
				case 'a':
					if (matches (begin, "pass")) return TokenType.PASS;
					break;
				case 'r':
					if (matches (begin, "prop")) return TokenType.PROP;
					break;
				}
				break;
			case 's':
				if (matches (begin, "self")) return TokenType.SELF;
				break;
			case 't':
				if (matches (begin, "true")) return TokenType.TRUE;
				break;
			case 'u':
				if (matches (begin, "uses")) return TokenType.USES;
				break;
			case 'v':
				if (matches (begin, "void")) return TokenType.VOID;
				break;
			case 'w':
				switch (begin[1]) {
				case 'e':
					if (matches (begin, "weak")) return TokenType.WEAK;
					break;
				case 'h':
					if (matches (begin, "when")) return TokenType.WHEN;
					break;
				}
				break;
			}
			break;
		case 5:
			switch (begin[0]) {
			case 'a':
				switch (begin[1]) {
				case 'r':
					if (matches (begin, "array")) return TokenType.ARRAY;
					break;
				case 's':
					if (matches (begin, "async")) return TokenType.ASYNC;
					break;
				}
				break;
			case 'b':
				if (matches (begin, "break")) return TokenType.BREAK;
				break;
			case 'c':
				switch (begin[1]) {
				case 'l':
					if (matches (begin, "class")) return TokenType.CLASS;
					break;
				case 'o':
					if (matches (begin, "const")) return TokenType.CONST;
					break;
				}
				break;
			case 'e':
				if (matches (begin, "event")) return TokenType.EVENT;
				break;
			case 'f':
				switch (begin[1]) {
				case 'a':
					if (matches (begin, "false")) return TokenType.FALSE;
					break;
				case 'i':
					if (matches (begin, "final")) return TokenType.FINAL;
					break;
				}
				break;
			case 'o':
				if (matches (begin, "owned")) return TokenType.OWNED;
				break;
			case 'p':
				if (matches (begin, "print")) return TokenType.PRINT;
				break;
			case 's':
				if (matches (begin, "super")) return TokenType.SUPER;
				break;
			case 'r':
				if (matches (begin, "raise")) return TokenType.RAISE;
				break;
			case 'w':
				if (matches (begin, "while")) return TokenType.WHILE;
				break;
			case 'y':
				if (matches (begin, "yield")) return TokenType.YIELD;
				break;
			}
			break;
		case 6:
			switch (begin[0]) {
			case 'a':
				if (matches (begin, "assert")) return TokenType.ASSERT;
				break;
			case 'd':
				switch (begin[1]) {
				case 'e':
					if (matches (begin, "delete")) return TokenType.DELETE;
					break;
				case 'o':
					if (matches (begin, "downto")) return TokenType.DOWNTO;
					break;
				}
				break;
			case 'e':
				switch (begin[1]) {
				case 'x':
					switch (begin[2]) {
					case 'c':
						if (matches (begin, "except")) return TokenType.EXCEPT;
						break;
					case 't':
						if (matches (begin, "extern")) return TokenType.EXTERN;
						break;
					}
					break;
				}
				break;
			case 'i':
				if (matches (begin, "inline")) return TokenType.INLINE;
				break;
			case 'p':
				switch (begin[1]) {
				case 'a':
					if (matches (begin, "params")) return TokenType.PARAMS;
					break;
				case 'u':
					if (matches (begin, "public")) return TokenType.PUBLIC;
					break;
				}
				break;
			case 'r':
				switch (begin[1]) {
				case 'a':
					if (matches (begin, "raises")) return TokenType.RAISES;
					break;
				case 'e':
					if (matches (begin, "return")) return TokenType.RETURN;
					break;
				}
				break;
			case 's':
				switch (begin[1]) {
				case 'e':
					if (matches (begin, "sealed")) return TokenType.SEALED;
					break;
				case 'i':
					if (matches (begin, "sizeof")) return TokenType.SIZEOF;
					break;
				case 't':
					switch (begin[2]) {
					case 'a':
						if (matches (begin, "static")) return TokenType.STATIC;
						break;
					case 'r':
						if (matches (begin, "struct")) return TokenType.STRUCT;
						break;
					}
					break;
				}
				break;
			case 't':
				if (matches (begin, "typeof")) return TokenType.TYPEOF;
				break;
			}
			break;
		case 7:
			switch (begin[0]) {
			case 'd':
				switch (begin[1]) {
				case 'e':
					if (matches (begin, "default")) return TokenType.DEFAULT;
					break;
				case 'y':
					if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
					break;
				}
				break;
			case 'e':
				if (matches (begin, "ensures")) return TokenType.ENSURES;
				break;
			case 'f':
				switch (begin[1]) {
				case 'i':
					if (matches (begin, "finally")) return TokenType.FINALLY;
					break;
				}
				break;
			case 'p':
				if (matches (begin, "private")) return TokenType.PRIVATE;
				break;
			case 'u':
				if (matches (begin, "unowned")) return TokenType.UNOWNED;
				break;
			case 'v':
				if (matches (begin, "virtual")) return TokenType.VIRTUAL;
				break;
			}
			break;
		case 8:
			switch (begin[0]) {
			case 'a':
				if (matches (begin, "abstract")) return TokenType.ABSTRACT;
				break;
			case 'c':
				if (matches (begin, "continue")) return TokenType.CONTINUE;
				break;
			case 'd':
				if (matches (begin, "delegate")) return TokenType.DELEGATE;
				break;
			case 'i':
				if (matches (begin, "internal")) return TokenType.INTERNAL;
				break;
			case 'o':
				if (matches (begin, "override")) return TokenType.OVERRIDE;
				break;
			case 'r':
				switch (begin[2]) {
				case 'a':
					if (matches (begin, "readonly")) return TokenType.READONLY;
					break;
				case 'q':
					if (matches (begin, "requires")) return TokenType.REQUIRES;
					break;
				}
				break;
			case 'v':
				if (matches (begin, "volatile")) return TokenType.VOLATILE;
				break;
			}
			break;
		case 9:
			switch (begin[0]) {
			case 'c':
				if (matches (begin, "construct")) return TokenType.CONSTRUCT;
				break;
			case 'e':
				if (matches (begin, "exception")) return TokenType.EXCEPTION;
				break;
			case 'i':
				if (matches (begin, "interface")) return TokenType.INTERFACE;
				break;
			case 'n':
				if (matches (begin, "namespace")) return TokenType.NAMESPACE;
				break;
			case 'p':
				if (matches (begin, "protected")) return TokenType.PROTECTED;
				break;
			}
			break;
		case 10:
			switch (begin[0]) {
			case 'i':
				if (matches (begin, "implements")) return TokenType.IMPLEMENTS;
				break;
			}
			break;
		}
		return TokenType.IDENTIFIER;
	}


	public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
		bool is_verbatim = in_verbatim_template ();
		TokenType type;
		char* begin = current;
		token_begin = SourceLocation (begin, line, column);

		int token_length_in_chars = -1;

		if (current >= end) {
			type = TokenType.EOF;
		} else {
			switch (current[0]) {
			case '"':
				if (is_verbatim) {
					if (current < end -2 && current[1] == '"' && current[2] == '"' && current[3] != '"') {
						type = TokenType.CLOSE_TEMPLATE;
						current += 3;
						state_stack.length--;
					} else {
						type = TokenType.VERBATIM_TEMPLATE_STRING_LITERAL;
						current++;
						token_length_in_chars++;
						state_stack += State.TEMPLATE_PART;
					}
				} else {
					type = TokenType.CLOSE_TEMPLATE;
					current++;
					state_stack.length--;
				}
				break;
			case '$':
				token_begin.pos++; // $ is not part of following token
				current++;
				if (current[0].isalpha () || current[0] == '_') {
					int len = 0;
					while (current < end && is_ident_char (current[0])) {
						current++;
						len++;
					}
					type = TokenType.IDENTIFIER;
					state_stack += State.TEMPLATE_PART;
				} else if (current[0] == '(') {
					current++;
					column += 2;
					state_stack += State.PARENS;
					return read_token (out token_begin, out token_end);
				} else if (current[0] == '$') {
					type = is_verbatim ? TokenType.VERBATIM_TEMPLATE_STRING_LITERAL : TokenType.TEMPLATE_STRING_LITERAL;
					current++;
					state_stack += State.TEMPLATE_PART;
				} else {
					Report.error (get_source_reference (1), "unexpected character");
					return read_template_token (out token_begin, out token_end);
				}
				break;
			default:
				type = is_verbatim ? TokenType.VERBATIM_TEMPLATE_STRING_LITERAL : TokenType.TEMPLATE_STRING_LITERAL;
				token_length_in_chars = 0;
				while (current < end && current[0] != '"' && current[0] != '$') {
					if (current[0] == '\\' && !is_verbatim) {
						current++;
						token_length_in_chars++;
						if (current >= end) {
							break;
						}

						switch (current[0]) {
						case '\'':
						case '"':
						case '\\':
						case '0':
						case 'b':
						case 'f':
						case 'n':
						case 'r':
						case 't':
						case 'v':
							current++;
							token_length_in_chars++;
							break;
						case 'u':
							// u escape character has four hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
								current++;
								token_length_in_chars++;
							}
							if (digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\u used with no following hex digits");
							} else if (digit_length < 4) {
								Report.error (get_source_reference (token_length_in_chars), "incomplete universal character name");
							}
							break;
						case 'x':
							// hexadecimal escape character requires two hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							for (digit_length = 0; current < end && current[0].isxdigit ();) {
								if (current[0] != '0') {
									digit_length++;
								}
								current++;
								token_length_in_chars++;
							}
							if (digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\x used with no following hex digits");
							} else if (digit_length > 2) {
								Report.error (get_source_reference (token_length_in_chars), "hex escape sequence out of range");
							}
							break;
						default:
							Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
							break;
						}
					} else if (current[0] == '\n') {
						current++;
						line++;
						column = 1;
						token_length_in_chars = 1;
					} else {
						unichar u = ((string) current).get_char_validated ((long) (end - current));
						if (u != (unichar) (-1)) {
							current += u.to_utf8 (null);
							token_length_in_chars++;
						} else {
							current++;
							Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
						}
					}
				}
				if (current >= end) {
					Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
					state_stack.length--;
					return read_token (out token_begin, out token_end);
				}
				state_stack += State.TEMPLATE_PART;
				break;
			}
		}

		if (token_length_in_chars < 0) {
			column += (int) (current - begin);
		} else {
			column += token_length_in_chars;
		}

		token_end = SourceLocation (current, line, column - 1);

		return type;
	}


	public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
		if (current == null) {
			token_begin = SourceLocation (current, line, column);
			token_end = SourceLocation (current, line, column);
			return TokenType.EOF;
		}

		if (in_template () || in_verbatim_template ()) {
			return read_template_token (out token_begin, out token_end);
		} else if (in_template_part ()) {
			state_stack.length--;

			token_begin = SourceLocation (current, line, column);
			token_end = SourceLocation (current, line, column - 1);

			return TokenType.COMMA;
		} else if (in_regex_literal ()) {
			return read_regex_token (out token_begin, out token_end);
		}


		/* emit dedents if outstanding before checking any other chars */

		if (pending_dedents > 0) {
			pending_dedents--;
			indent_level--;

			token_begin = SourceLocation (current, line, column);
			token_end = SourceLocation (current, line, column);

			last_token = TokenType.DEDENT;

			return TokenType.DEDENT;
		}

		if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
			/* scrub whitespace (excluding newlines) and comments */
			space ();
		}


		/* handle explicit line continuation (lines ending with "\") */
		while (current < end && current[0] == '\\' && current[1] == '\n') {
			current += 2;
			line++;
			skip_space_tabs ();
		}

		/* handle automatic line continuations (when inside parens or braces) */
		while (current < end && current[0] == '\n' && (open_parens_count > 0 || open_brace_count > 0)) {
			current++;
			line++;
			skip_space_tabs ();
		}


		/* handle non-consecutive new line once parsing is underway - EOL */
		if (newline () && parse_started && last_token != TokenType.EOL && last_token != TokenType.SEMICOLON) {
			token_begin = SourceLocation (current, line, column);
			token_end = SourceLocation (current, line, column);

			last_token = TokenType.EOL;

			return TokenType.EOL;
		}


		while (skip_newlines ()) {
			token_begin = SourceLocation (current, line, column);

			current_indent_level = count_tabs ();

			/* if its an empty new line then ignore */
			if (current_indent_level == -1)  {
				continue;
			}

			if (current_indent_level > indent_level) {
				indent_level = current_indent_level;

				token_end = SourceLocation (current, line, column);

				last_token = TokenType.INDENT;

				return TokenType.INDENT;
			} else if (current_indent_level < indent_level) {
				indent_level--;

				pending_dedents = (indent_level - current_indent_level);
				token_end = SourceLocation (current, line, column);

				last_token = TokenType.DEDENT;

				return TokenType.DEDENT;
			}
		}

		TokenType type;
		char* begin = current;
		token_begin = SourceLocation (begin, line, column);

		int token_length_in_chars = -1;

		parse_started = true;

		if (current >= end) {
			if (indent_level > 0) {
				indent_level--;

				pending_dedents = indent_level;

				type = TokenType.DEDENT;
			} else {
				type = TokenType.EOF;
			}
		} else if (current[0].isalpha () || current[0] == '_') {
			int len = 0;
			while (current < end && is_ident_char (current[0])) {
				current++;
				len++;
			}
			type = get_identifier_or_keyword (begin, len);
		} else if (current[0] == '@') {
			if (current < end - 1 && current[1] == '"') {
				current += 1;
				if (current < end - 5 && current[1] == '"' && current[2] == '"') {
					current += 3;
					state_stack += State.VERBATIM_TEMPLATE;
				} else {
					current += 1;
					state_stack += State.TEMPLATE;
				}
				type = TokenType.OPEN_TEMPLATE;
			} else {
				token_begin.pos++; // @ is not part of the identifier
				current++;
				int len = 0;
				while (current < end && is_ident_char (current[0])) {
					current++;
					len++;
				}
				type = TokenType.IDENTIFIER;
			}
		} else if (current[0].isdigit ()) {
			while (current < end && current[0].isdigit ()) {
				current++;
			}
			type = TokenType.INTEGER_LITERAL;
			if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
				current++;
				while (current < end && current[0].isdigit ()) {
					current++;
				}
				if (current < end && current[0].tolower () == 'e') {
					current++;
					if (current < end && (current[0] == '+' || current[0] == '-')) {
						current++;
					}
					while (current < end && current[0].isdigit ()) {
						current++;
					}
				}
				type = TokenType.REAL_LITERAL;
			} else if (current < end && current[0].tolower () == 'e') {
				current++;
				if (current < end && (current[0] == '+' || current[0] == '-')) {
					current++;
				}
				while (current < end && current[0].isdigit ()) {
					current++;
				}
				type = TokenType.REAL_LITERAL;
			} else if (current < end && current == begin + 1
					   && begin[0] == '0'
					   && (begin[1] == 'x' || begin[1] == 'X')
					   && begin[2].isxdigit ()) {
				// hexadecimal integer literal
				current++;
				while (current < end && current[0].isxdigit ()) {
					current++;
				}
			} else if (current < end && current == begin + 1
					   && begin[0] == '0'
					   && (begin[1] == 'b' || begin[1] == 'B' || begin[1] == 'o' || begin[1] == 'O')
					   && begin[2].isdigit ()) {
				// binary or octal integer literal
				current++;
				while (current < end && current[0].isdigit ()) {
					current++;
				}
			}
			if (current < end) {
				bool real_literal = (type == TokenType.REAL_LITERAL);

				switch (current[0]) {
				case 'l':
				case 'L':
					if (type == TokenType.INTEGER_LITERAL) {
						current++;
						if (current < end && current[0].tolower () == 'l') {
							current++;
						}
					}
					break;
				case 'u':
				case 'U':
					if (type == TokenType.INTEGER_LITERAL) {
						current++;
						if (current < end && current[0].tolower () == 'l') {
							current++;
							if (current < end && current[0].tolower () == 'l') {
								current++;
							}
						}
					}
					break;
				case 'f':
				case 'F':
				case 'd':
				case 'D':
					type = TokenType.REAL_LITERAL;
					current++;
					break;
				}

				if (!real_literal && is_ident_char (current[0])) {
					// allow identifiers to start with a digit
					// as long as they contain at least one char
					while (current < end && is_ident_char (current[0])) {
						current++;
					}
					type = TokenType.IDENTIFIER;
				}
			}
		} else {
			switch (current[0]) {
			case '{':
				type = TokenType.OPEN_BRACE;
				open_brace_count++;
				state_stack += State.BRACE;
				current++;
				break;
			case '}':
				type = TokenType.CLOSE_BRACE;
				open_brace_count--;
				if (state_stack.length > 0) {
					state_stack.length--;
				}
				current++;
				break;
			case '(':
				type = TokenType.OPEN_PARENS;
				open_parens_count++;
				state_stack += State.PARENS;
				current++;
				break;
			case ')':
				type = TokenType.CLOSE_PARENS;
				open_parens_count--;
				current++;
				if (state_stack.length > 0) {
					state_stack.length--;
				}
				if (in_template () || in_verbatim_template ()) {
					type = TokenType.COMMA;
				}
				break;
			case '[':
				type = TokenType.OPEN_BRACKET;
				state_stack += State.BRACKET;
				current++;
				break;
			case ']':
				type = TokenType.CLOSE_BRACKET;
				if (state_stack.length > 0) {
					state_stack.length--;
				}
				current++;
				break;
			case '.':
				type = TokenType.DOT;
				current++;
				if (current < end - 1) {
					if (current[0] == '.' && current[1] == '.') {
						type = TokenType.ELLIPSIS;
						current += 2;
					}
				}
				break;
			case ':':
				type = TokenType.COLON;
				current++;
				break;
			case ',':
				type = TokenType.COMMA;
				current++;
				break;
			case ';':
				type = TokenType.SEMICOLON;
				current++;
				break;
			case '#':
				type = TokenType.HASH;
				current++;
				break;
			case '?':
				type = TokenType.INTERR;
				current++;
				break;
			case '|':
				type = TokenType.BITWISE_OR;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.ASSIGN_BITWISE_OR;
						current++;
						break;
					case '|':
						type = TokenType.OP_OR;
						current++;
						break;
					}
				}
				break;
			case '&':
				type = TokenType.BITWISE_AND;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.ASSIGN_BITWISE_AND;
						current++;
						break;
					case '&':
						type = TokenType.OP_AND;
						current++;
						break;
					}
				}
				break;
			case '^':
				type = TokenType.CARRET;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.ASSIGN_BITWISE_XOR;
					current++;
				}
				break;
			case '~':
				type = TokenType.TILDE;
				current++;
				break;
			case '=':
				type = TokenType.ASSIGN;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.OP_EQ;
						current++;
						break;
					case '>':
						type = TokenType.LAMBDA;
						current++;
						break;
					}
				}
				break;
			case '<':
				type = TokenType.OP_LT;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.OP_LE;
						current++;
						break;
					case '<':
						type = TokenType.OP_SHIFT_LEFT;
						current++;
						if (current < end && current[0] == '=') {
							type = TokenType.ASSIGN_SHIFT_LEFT;
							current++;
						}
						break;
					}
				}
				break;
			case '>':
				type = TokenType.OP_GT;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.OP_GE;
					current++;
				}
				break;
			case '!':
				type = TokenType.OP_NEG;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.OP_NE;
					current++;
				}
				break;
			case '+':
				type = TokenType.PLUS;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.ASSIGN_ADD;
						current++;
						break;
					case '+':
						type = TokenType.OP_INC;
						current++;
						break;
					}
				}
				break;
			case '-':
				type = TokenType.MINUS;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.ASSIGN_SUB;
						current++;
						break;
					case '-':
						type = TokenType.OP_DEC;
						current++;
						break;
					case '>':
						type = TokenType.OP_PTR;
						current++;
						break;
					}
				}
				break;
			case '*':
				type = TokenType.STAR;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.ASSIGN_MUL;
					current++;
				}
				break;
			case '/':
				switch (last_token) {
				case TokenType.ASSIGN:
				case TokenType.COMMA:
				case TokenType.MINUS:
				case TokenType.OP_AND:
				case TokenType.OP_EQ:
				case TokenType.OP_GE:
				case TokenType.OP_GT:
				case TokenType.OP_INC:
				case TokenType.OP_LE:
				case TokenType.OP_LT:
				case TokenType.OP_NE:
				case TokenType.OP_NEG:
				case TokenType.OP_OR:
				case TokenType.OPEN_BRACE:
				case TokenType.OPEN_PARENS:
				case TokenType.PLUS:
				case TokenType.RETURN:
					type = TokenType.OPEN_REGEX_LITERAL;
					state_stack += State.REGEX_LITERAL;
					current++;
					break;
				default:
					type = TokenType.DIV;
					current++;
					if (current < end && current[0] == '=') {
						type = TokenType.ASSIGN_DIV;
						current++;
					}
					break;
				}
				break;

			case '%':
				type = TokenType.PERCENT;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.ASSIGN_PERCENT;
					current++;
				}
				break;
			case '\'':
			case '"':
				if (begin[0] == '\'') {
					type = TokenType.CHARACTER_LITERAL;
				} else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
					type = TokenType.VERBATIM_STRING_LITERAL;
					token_length_in_chars = 6;
					current += 3;
					while (current < end - 4) {
						if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
							break;
						} else if (current[0] == '\n') {
							current++;
							line++;
							column = 1;
							token_length_in_chars = 3;
						} else {
							unichar u = ((string) current).get_char_validated ((long) (end - current));
							if (u != (unichar) (-1)) {
								current += u.to_utf8 (null);
								token_length_in_chars++;
							} else {
								Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
							}
						}
					}
					if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
						current += 3;
					} else {
						Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"\"\"");
					}
					break;
				} else {
					type = TokenType.STRING_LITERAL;
				}
				token_length_in_chars = 2;
				current++;
				while (current < end && current[0] != begin[0]) {
					if (current[0] == '\\') {
						current++;
						token_length_in_chars++;
						if (current >= end) {
							break;
						}

						switch (current[0]) {
						case '\'':
						case '"':
						case '\\':
						case '0':
						case 'b':
						case 'f':
						case 'n':
						case 'r':
						case 't':
						case 'v':
							current++;
							token_length_in_chars++;
							break;
						case 'u':
							// u escape character has four hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
								current++;
								token_length_in_chars++;
							}
							if (digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\u used with no following hex digits");
							} else if (digit_length < 4) {
								Report.error (get_source_reference (token_length_in_chars), "incomplete universal character name");
							}
							break;
						case 'x':
							// hexadecimal escape character requires two hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							for (digit_length = 0; current < end && current[0].isxdigit ();) {
								if (current[0] != '0') {
									digit_length++;
								}
								current++;
								token_length_in_chars++;
							}
							if (digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\x used with no following hex digits");
							} else if (digit_length > 2) {
								Report.error (get_source_reference (token_length_in_chars), "hex escape sequence out of range");
							}
							break;
						default:
							Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
							break;
						}
					} else if (current[0] == '\n') {
						current++;
						line++;
						column = 1;
						token_length_in_chars = 1;
					} else {
						unichar u = ((string) current).get_char_validated ((long) (end - current));
						if (u != (unichar) (-1)) {
							current += u.to_utf8 (null);
							token_length_in_chars++;
						} else {
							current++;
							Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
						}
					}
					if (current < end && begin[0] == '\'' && current[0] != '\'') {
						// multiple characters in single character literal
						Report.error (get_source_reference (token_length_in_chars), "invalid character literal");
					}
				}
				if (current < end) {
					current++;
				} else {
					Report.error (get_source_reference (token_length_in_chars), "syntax error, expected %c", begin[0]);
				}
				break;
			default:
				unichar u = ((string) current).get_char_validated ((long) (end - current));
				if (u != (unichar) (-1)) {
					current += u.to_utf8 (null);
					Report.error (get_source_reference (0), "syntax error, unexpected character");
				} else {
					current++;
					Report.error (get_source_reference (0), "invalid UTF-8 character");
				}
				column++;
				return read_token (out token_begin, out token_end);
			}
		}

		if (token_length_in_chars < 0) {
			column += (int) (current - begin);
		} else {
			column += token_length_in_chars;
		}

		token_end = SourceLocation (current, line, column - 1);
		last_token = type;

		return type;
	}

	int count_tabs ()
	{

		int tab_count = 0;


		if (_indent_spaces == 0) {
			while (current < end && current[0] == '\t') {
				current++;
				column++;
				tab_count++;
			}
		} else {
			int space_count = 0;
			while (current < end && current[0] == ' ') {
				current++;
				column++;
				space_count++;
			}

			tab_count = space_count / _indent_spaces;

		}

		/* ignore comments and whitespace and other lines that contain no code */

		space ();

		if ((current < end) && (current[0] == '\n')) return -1;

		return tab_count;
	}

	static bool matches (char* begin, string keyword) {
		char* keyword_array = (char *) keyword;
		long len = keyword.length;
		for (int i = 0; i < len; i++) {
			if (begin[i] != keyword_array[i]) {
				return false;
			}
		}
		return true;
	}

	bool whitespace () {
		bool found = false;
		while (current < end && current[0].isspace () && current[0] != '\n' ) {

			found = true;
			current++;
			column++;
		}

		if ((column == 1) && (current < end) && (current[0] == '#')) {
			pp_directive ();
			return true;
		}

		return found;
	}

	inline bool newline () {
		if (current[0] == '\n') {
			return true;
		}

		return false;
	}

	bool skip_newlines () {
		bool new_lines = false;

		while (newline ()) {
			current++;

			line++;
			column = 1;
			current_indent_level = 0;

			new_lines = true;
		}

		return new_lines;
	}

	bool comment (bool file_comment = false) {
		if (current == null
			|| current > end - 2
			|| current[0] != '/'
			|| (current[1] != '/' && current[1] != '*')) {
			return false;
		}


		if (current[1] == '/') {
			// single-line comment

			SourceReference source_reference = null;
			if (file_comment) {
				source_reference = get_source_reference (0);
			}

			current += 2;

			// skip until end of line or end of file
			while (current < end && current[0] != '\n') {
				current++;
			}

			/* do not ignore EOL if comment does not exclusively occupy the line */
			if (current[0] == '\n' && last_token == TokenType.EOL) {
				current++;
				line++;
				column = 1;
				current_indent_level = 0;
			}

			if (source_reference != null) {
				push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
			}

		} else {
			// delimited comment
			SourceReference source_reference = null;
			if (file_comment && current[2] == '*') {
				return false;
			}

			if (current[2] == '*' || file_comment) {
				source_reference = get_source_reference (0);
			}

			current += 2;
			char* begin = current;

			while (current < end - 1
				   && (current[0] != '*' || current[1] != '/')) {
				if (current[0] == '\n') {
					line++;
					column = 0;
				}
				current++;
				column++;
			}
			if (current == end - 1) {
				Report.error (get_source_reference (0), "syntax error, expected */");
				return true;
			}

			if (source_reference != null) {
				string comment = ((string) begin).substring (0, (long) (current - begin));
				push_comment (comment, source_reference, file_comment);
			}

			current += 2;
			column += 2;
		}

		return true;
	}

	bool skip_tabs () {
		bool found = false;
		while (current < end && current[0] == '\t' ) {
			current++;
			column++;
			found = true;
		}

		return found;
	}

	void skip_space_tabs () {
		while (whitespace () || skip_tabs () || comment () ) {
		}

	}

	void space () {
		while (whitespace () || comment ()) {
		}
	}

	public void parse_file_comments () {
		while (whitespace () || comment (true)) {
		}

	}

	void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
		if (comment_item[0] == '*') {
			if (_comment != null) {
				// extra doc comment, add it to source file comments
				source_file.add_comment (_comment);
			}
			_comment = new Comment (comment_item, source_reference);
		}

		if (file_comment) {
			source_file.add_comment (new Comment (comment_item, source_reference));
			_comment = null;
		}
	}

	/**
	 * Clears and returns the content of the comment stack.
	 *
	 * @return saved comment
	 */
	public Comment? pop_comment () {
		if (_comment == null) {
			return null;
		}

		var comment = _comment;
		_comment = null;
		return comment;
	}

	bool pp_whitespace () {
		bool found = false;
		while (current < end && current[0].isspace () && current[0] != '\n') {
			found = true;
			current++;
			column++;
		}
		return found;
	}

	void pp_space () {
		while (pp_whitespace () || comment ()) {
		}
	}

	void pp_directive () {
		// hash sign
		current++;
		column++;

		pp_space ();

		char* begin = current;
		int len = 0;
		while (current < end && current[0].isalnum ()) {
			current++;
			column++;
			len++;
		}

		if (len == 2 && matches (begin, "if")) {
			parse_pp_if ();
		} else if (len == 4 && matches (begin, "elif")) {
			parse_pp_elif ();
		} else if (len == 4 && matches (begin, "else")) {
			parse_pp_else ();
		} else if (len == 5 && matches (begin, "endif")) {
			parse_pp_endif ();
		} else {
			Report.error (get_source_reference (-len, len), "syntax error, invalid preprocessing directive");
		}

		if (conditional_stack.length > 0
			&& conditional_stack[conditional_stack.length - 1].skip_section) {
			// skip lines until next preprocessing directive
			bool bol = false;
			while (current < end) {
				if (bol && current < end && current[0] == '#') {
					// go back to begin of line
					current -= (column - 1);
					column = 1;
					return;
				}
				if (current[0] == '\n') {
					line++;
					column = 0;
					bol = true;
				} else if (!current[0].isspace ()) {
					bol = false;
				}
				current++;
				column++;
			}
		}
	}

	void pp_eol () {
		pp_space ();
		if (current >= end || current[0] != '\n') {
			Report.error (get_source_reference (0), "syntax error, expected newline");
		}
	}

	void parse_pp_if () {
		pp_space ();

		bool condition = parse_pp_expression ();

		pp_eol ();

		conditional_stack += Conditional ();

		if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
			// condition true => process code within if
			conditional_stack[conditional_stack.length - 1].matched = true;
		} else {
			// skip lines until next preprocessing directive
			conditional_stack[conditional_stack.length - 1].skip_section = true;
		}
	}

	void parse_pp_elif () {
		pp_space ();

		bool condition = parse_pp_expression ();

		pp_eol ();

		if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
			Report.error (get_source_reference (0), "syntax error, unexpected #elif");
			return;
		}

		if (condition && !conditional_stack[conditional_stack.length - 1].matched
			&& (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
			// condition true => process code within if
			conditional_stack[conditional_stack.length - 1].matched = true;
			conditional_stack[conditional_stack.length - 1].skip_section = false;
		} else {
			// skip lines until next preprocessing directive
			conditional_stack[conditional_stack.length - 1].skip_section = true;
		}
	}

	void parse_pp_else () {
		pp_eol ();

		if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
			Report.error (get_source_reference (0), "syntax error, unexpected #else");
			return;
		}

		if (!conditional_stack[conditional_stack.length - 1].matched
			&& (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
			// condition true => process code within if
			conditional_stack[conditional_stack.length - 1].matched = true;
			conditional_stack[conditional_stack.length - 1].skip_section = false;
		} else {
			// skip lines until next preprocessing directive
			conditional_stack[conditional_stack.length - 1].skip_section = true;
		}
	}

	void parse_pp_endif () {
		pp_eol ();

		if (conditional_stack.length == 0) {
			Report.error (get_source_reference (0), "syntax error, unexpected #endif");
			return;
		}

		conditional_stack.length--;
	}

	bool parse_pp_symbol () {
		int len = 0;
		while (current < end && is_ident_char (current[0])) {
			current++;
			column++;
			len++;
		}

		if (len == 0) {
			Report.error (get_source_reference (0), "syntax error, expected identifier");
			return false;
		}

		string identifier = ((string) (current - len)).substring (0, len);
		bool defined;
		if (identifier == "true") {
			defined = true;
		} else if (identifier == "false") {
			defined = false;
		} else {
			defined = source_file.context.is_defined (identifier);
		}

		return defined;
	}

	bool parse_pp_primary_expression () {
		if (current >= end) {
			Report.error (get_source_reference (0), "syntax error, expected identifier");
		} else if (is_ident_char (current[0])) {
			return parse_pp_symbol ();
		} else if (current[0] == '(') {
			current++;
			column++;
			pp_space ();
			bool result = parse_pp_expression ();
			pp_space ();
			if (current < end && current[0] ==  ')') {
				current++;
				column++;
			} else {
				Report.error (get_source_reference (0), "syntax error, expected `)'");
			}
			return result;
		} else {
			Report.error (get_source_reference (0), "syntax error, expected identifier");
		}
		return false;
	}

	bool parse_pp_unary_expression () {
		if (current < end && current[0] == '!') {
			current++;
			column++;
			pp_space ();
			return !parse_pp_unary_expression ();
		}

		return parse_pp_primary_expression ();
	}

	bool parse_pp_equality_expression () {
		bool left = parse_pp_unary_expression ();
		pp_space ();
		while (true) {
			if (current < end - 1 && current[0] == '=' && current[1] == '=') {
				current += 2;
				column += 2;
				pp_space ();
				bool right = parse_pp_unary_expression ();
				left = (left == right);
			} else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
				current += 2;
				column += 2;
				pp_space ();
				bool right = parse_pp_unary_expression ();
				left = (left != right);
			} else {
				break;
			}
		}
		return left;
	}

	bool parse_pp_and_expression () {
		bool left = parse_pp_equality_expression ();
		pp_space ();
		while (current < end - 1 && current[0] == '&' && current[1] == '&') {
			current += 2;
			column += 2;
			pp_space ();
			bool right = parse_pp_equality_expression ();
			left = left && right;
		}
		return left;
	}

	bool parse_pp_or_expression () {
		bool left = parse_pp_and_expression ();
		pp_space ();
		while (current < end - 1 && current[0] == '|' && current[1] == '|') {
			current += 2;
			column += 2;
			pp_space ();
			bool right = parse_pp_and_expression ();
			left = left || right;
		}
		return left;
	}

	bool parse_pp_expression () {
		return parse_pp_or_expression ();
	}
}