/* valascanner.vala
 *
 * Copyright (C) 2008-2012  Jürg Billeter
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.

 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.

 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
 *
 * Author:
 * 	Jürg Billeter <j@bitron.ch>
 * 	Jukka-Pekka Iivonen <jp0409@jippii.fi>
 */

using GLib;

/**
 * Lexical scanner for Vala source files.
 */
public class Vala.Scanner {
	public SourceFile source_file { get; private set; }

	TokenType previous;
	char* current;
	char* end;

	int line;
	int column;

	Comment _comment;

	Conditional[] conditional_stack;

	struct Conditional {
		public bool matched;
		public bool else_found;
		public bool skip_section;
	}

	State[] state_stack;

	enum State {
		PARENS,
		BRACE,
		BRACKET,
		TEMPLATE,
		TEMPLATE_PART,
		REGEX_LITERAL,
		VERBATIM_TEMPLATE
	}

	public Scanner (SourceFile source_file) {
		this.source_file = source_file;

		char* begin = source_file.get_mapped_contents ();
		end = begin + source_file.get_mapped_length ();

		current = begin;

		line = 1;
		column = 1;
	}

	public void seek (SourceLocation location) {
		current = location.pos;
		line = location.line;
		column = location.column;

		conditional_stack = null;
		state_stack = null;
	}

	inline bool in_template () {
		return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
	}

	inline bool in_verbatim_template () {
		return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.VERBATIM_TEMPLATE);
	}

	inline bool in_template_part () {
		return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
	}

	inline bool in_regex_literal () {
		return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
	}

	inline bool is_ident_char (char c) {
		return (c.isalnum () || c == '_');
	}

	SourceReference get_source_reference (int offset, int length = 0) {
		return new SourceReference (source_file, SourceLocation (current, line, column + offset), SourceLocation (current + length, line, column + offset + length));
	}

	public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
		TokenType type;
		char* begin = current;
		token_begin = SourceLocation (begin, line, column);

		int token_length_in_chars = -1;

		if (current >= end) {
			type = TokenType.EOF;
		} else {
			switch (current[0]) {
			case '/':
				type = TokenType.CLOSE_REGEX_LITERAL;
				current++;
				state_stack.length--;
				var fl_i = false;
				var fl_s = false;
				var fl_m = false;
				var fl_x = false;
				while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
					switch (current[0]) {
					case 'i':
						if (fl_i) {
							Report.error (get_source_reference (token_length_in_chars), "modifier 'i' used more than once");
						}
						fl_i = true;
						break;
					case 's':
						if (fl_s) {
							Report.error (get_source_reference (token_length_in_chars), "modifier 's' used more than once");
						}
						fl_s = true;
						break;
					case 'm':
						if (fl_m) {
							Report.error (get_source_reference (token_length_in_chars), "modifier 'm' used more than once");
						}
						fl_m = true;
						break;
					case 'x':
						if (fl_x) {
							Report.error (get_source_reference (token_length_in_chars), "modifier 'x' used more than once");
						}
						fl_x = true;
						break;
					}
					current++;
					token_length_in_chars++;
				}
				break;
			default:
				type = TokenType.REGEX_LITERAL;
				token_length_in_chars = 0;
				while (current < end && current[0] != '/') {
					if (current[0] == '\\') {
						current++;
						token_length_in_chars++;
						if (current >= end) {
							break;
						}

						switch (current[0]) {
						case '\'':
						case '"':
						case '\\':
						case '/':
						case '^':
						case '$':
						case '.':
						case '[':
						case ']':
						case '{':
						case '}':
						case '(':
						case ')':
						case '?':
						case '*':
						case '+':
						case '-':
						case '#':
						case '&':
						case '~':
						case ':':
						case ';':
						case '<':
						case '>':
						case '|':
						case '%':
						case '=':
						case '@':
						case '0':
						case 'b':
						case 'B':
						case 'f':
						case 'n':
						case 'N':
						case 'r':
						case 'R':
						case 't':
						case 'v':
						case 'a':
						case 'A':
						case 'p':
						case 'P':
						case 'e':
						case 'd':
						case 'D':
						case 's':
						case 'S':
						case 'w':
						case 'W':
						case 'G':
						case 'z':
						case 'Z':
							current++;
							token_length_in_chars++;
							break;
						case 'u':
							// u escape character has four hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
								current++;
								token_length_in_chars++;
							}
							if (digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\u used with no following hex digits");
							} else if (digit_length < 4) {
								Report.error (get_source_reference (token_length_in_chars), "incomplete universal character name");
							}
							break;
						case 'x':
							// hexadecimal escape character requires two hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							bool empty = true;
							for (digit_length = 0; current < end && current[0].isxdigit ();) {
								if (current[0] != '0') {
									digit_length++;
								} else {
									empty = false;
								}
								current++;
								token_length_in_chars++;
							}
							if (empty && digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\x used with no following hex digits");
							} else if (digit_length > 2) {
								Report.error (get_source_reference (token_length_in_chars), "hex escape sequence out of range");
							}
							break;
						default:
							// back references \1 through \99
							if (current[0].isdigit ()) {
								current++;
								token_length_in_chars++;
								if (current[0].isdigit ()) {
									current++;
									token_length_in_chars++;
								}
							} else {
								Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
							}
							break;
						}
					} else if (current[0] == '\n') {
						break;
					} else {
						unichar u = ((string) current).get_char_validated ((long) (end - current));
						if (u != (unichar) (-1)) {
							current += u.to_utf8 (null);
							token_length_in_chars++;
						} else {
							current++;
							Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
						}
					}
				}
				if (current >= end || current[0] == '\n') {
					Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
					state_stack.length--;
					return read_token (out token_begin, out token_end);
				}
				break;
			}
		}

		if (token_length_in_chars < 0) {
			column += (int) (current - begin);
		} else {
			column += token_length_in_chars;
		}

		token_end = SourceLocation (current, line, column - 1);

		return type;
	}

	public static TokenType get_identifier_or_keyword (char* begin, int len) {
		switch (len) {
		case 2:
			switch (begin[0]) {
			case 'a':
				if (matches (begin, "as")) return TokenType.AS;
				break;
			case 'd':
				if (matches (begin, "do")) return TokenType.DO;
				break;
			case 'i':
				switch (begin[1]) {
				case 'f':
					return TokenType.IF;
				case 'n':
					return TokenType.IN;
				case 's':
					return TokenType.IS;
				}
				break;
			}
			break;
		case 3:
			switch (begin[0]) {
			case 'f':
				if (matches (begin, "for")) return TokenType.FOR;
				break;
			case 'g':
				if (matches (begin, "get")) return TokenType.GET;
				break;
			case 'n':
				if (matches (begin, "new")) return TokenType.NEW;
				break;
			case 'o':
				if (matches (begin, "out")) return TokenType.OUT;
				break;
			case 'r':
				if (matches (begin, "ref")) return TokenType.REF;
				break;
			case 's':
				if (matches (begin, "set")) return TokenType.SET;
				break;
			case 't':
				if (matches (begin, "try")) return TokenType.TRY;
				break;
			case 'v':
				if (matches (begin, "var")) return TokenType.VAR;
				break;
			}
			break;
		case 4:
			switch (begin[0]) {
			case 'b':
				if (matches (begin, "base")) return TokenType.BASE;
				break;
			case 'c':
				if (matches (begin, "case")) return TokenType.CASE;
				break;
			case 'e':
				switch (begin[1]) {
				case 'l':
					if (matches (begin, "else")) return TokenType.ELSE;
					break;
				case 'n':
					if (matches (begin, "enum")) return TokenType.ENUM;
					break;
				}
				break;
			case 'l':
				if (matches (begin, "lock")) return TokenType.LOCK;
				break;
			case 'n':
				if (matches (begin, "null")) return TokenType.NULL;
				break;
			case 't':
				switch (begin[1]) {
				case 'h':
					if (matches (begin, "this")) return TokenType.THIS;
					break;
				case 'r':
					if (matches (begin, "true")) return TokenType.TRUE;
					break;
				}
				break;
			case 'v':
				if (matches (begin, "void")) return TokenType.VOID;
				break;
			case 'w':
				switch (begin[1]) {
				case 'e':
					if (matches (begin, "weak")) return TokenType.WEAK;
					break;
				case 'i':
					if (matches (begin, "with")) return TokenType.WITH;
					break;
				}
				break;
			}
			break;
		case 5:
			switch (begin[0]) {
			case 'a':
				if (matches (begin, "async")) return TokenType.ASYNC;
				break;
			case 'b':
				if (matches (begin, "break")) return TokenType.BREAK;
				break;
			case 'c':
				switch (begin[1]) {
				case 'a':
					if (matches (begin, "catch")) return TokenType.CATCH;
					break;
				case 'l':
					if (matches (begin, "class")) return TokenType.CLASS;
					break;
				case 'o':
					if (matches (begin, "const")) return TokenType.CONST;
					break;
				}
				break;
			case 'f':
				if (matches (begin, "false")) return TokenType.FALSE;
				break;
			case 'o':
				if (matches (begin, "owned")) return TokenType.OWNED;
				break;
			case 't':
				if (matches (begin, "throw")) return TokenType.THROW;
				break;
			case 'u':
				if (matches (begin, "using")) return TokenType.USING;
				break;
			case 'w':
				if (matches (begin, "while")) return TokenType.WHILE;
				break;
			case 'y':
				if (matches (begin, "yield")) return TokenType.YIELD;
				break;
			}
			break;
		case 6:
			switch (begin[0]) {
			case 'd':
				if (matches (begin, "delete")) return TokenType.DELETE;
				break;
			case 'e':
				if (matches (begin, "extern")) return TokenType.EXTERN;
				break;
			case 'i':
				if (matches (begin, "inline")) return TokenType.INLINE;
				break;
			case 'p':
				switch (begin[1]) {
				case 'a':
					if (matches (begin, "params")) return TokenType.PARAMS;
					break;
				case 'u':
					if (matches (begin, "public")) return TokenType.PUBLIC;
					break;
				}
				break;
			case 'r':
				if (matches (begin, "return")) return TokenType.RETURN;
				break;
			case 's':
				switch (begin[1]) {
				case 'e':
					if (matches (begin, "sealed")) return TokenType.SEALED;
					break;
				case 'i':
					switch (begin[2]) {
					case 'g':
						if (matches (begin, "signal")) return TokenType.SIGNAL;
						break;
					case 'z':
						if (matches (begin, "sizeof")) return TokenType.SIZEOF;
						break;
					}
					break;
				case 't':
					switch (begin[2]) {
					case 'a':
						if (matches (begin, "static")) return TokenType.STATIC;
						break;
					case 'r':
						if (matches (begin, "struct")) return TokenType.STRUCT;
						break;
					}
					break;
				case 'w':
					if (matches (begin, "switch")) return TokenType.SWITCH;
					break;
				}
				break;
			case 't':
				switch (begin[1]) {
				case 'h':
					if (matches (begin, "throws")) return TokenType.THROWS;
					break;
				case 'y':
					if (matches (begin, "typeof")) return TokenType.TYPEOF;
					break;
				}
				break;
			case 'u':
				if (matches (begin, "unlock")) return TokenType.UNLOCK;
				break;
			}
			break;
		case 7:
			switch (begin[0]) {
			case 'd':
				switch (begin[1]) {
				case 'e':
					if (matches (begin, "default")) return TokenType.DEFAULT;
					break;
				case 'y':
					if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
					break;
				}
				break;
			case 'e':
				if (matches (begin, "ensures")) return TokenType.ENSURES;
				break;
			case 'f':
				switch (begin[1]) {
				case 'i':
					if (matches (begin, "finally")) return TokenType.FINALLY;
					break;
				case 'o':
					if (matches (begin, "foreach")) return TokenType.FOREACH;
					break;
				}
				break;
			case 'p':
				switch (begin[1]) {
				case 'r':
					if (matches (begin, "private")) return TokenType.PRIVATE;
					break;
				case 'a':
					if (matches (begin, "partial")) return TokenType.PARTIAL;
					break;
				}
				break;
			case 'u':
				if (matches (begin, "unowned")) return TokenType.UNOWNED;
				break;
			case 'v':
				if (matches (begin, "virtual")) return TokenType.VIRTUAL;
				break;
			}
			break;
		case 8:
			switch (begin[0]) {
			case 'a':
				if (matches (begin, "abstract")) return TokenType.ABSTRACT;
				break;
			case 'c':
				if (matches (begin, "continue")) return TokenType.CONTINUE;
				break;
			case 'd':
				if (matches (begin, "delegate")) return TokenType.DELEGATE;
				break;
			case 'i':
				if (matches (begin, "internal")) return TokenType.INTERNAL;
				break;
			case 'o':
				if (matches (begin, "override")) return TokenType.OVERRIDE;
				break;
			case 'r':
				if (matches (begin, "requires")) return TokenType.REQUIRES;
				break;
			case 'v':
				if (matches (begin, "volatile")) return TokenType.VOLATILE;
				break;
			}
			break;
		case 9:
			switch (begin[0]) {
			case 'c':
				if (matches (begin, "construct")) return TokenType.CONSTRUCT;
				break;
			case 'i':
				if (matches (begin, "interface")) return TokenType.INTERFACE;
				break;
			case 'n':
				if (matches (begin, "namespace")) return TokenType.NAMESPACE;
				break;
			case 'p':
				if (matches (begin, "protected")) return TokenType.PROTECTED;
				break;
			}
			break;
		case 11:
			if (matches (begin, "errordomain")) return TokenType.ERRORDOMAIN;
			break;
		}
		return TokenType.IDENTIFIER;
	}

	TokenType read_number () {
		var type = TokenType.INTEGER_LITERAL;

		// integer part
		if (current < end - 2 && current[0] == '0') {
			switch (current[1]) {
			case 'x':
			case 'X':
				// hexadecimal literal
				current += 2;
				while (current < end && current[0].isxdigit ()) {
					current++;
				}
				// fractional part
				// hexadecimal fractional part
				if (current < end - 1 && current[0] == '.' && current[1].isxdigit ()) {
					type = TokenType.REAL_LITERAL;
					current++;
					while (current < end && current[0].isxdigit ()) {
						current++;
					}
				}
				// hexadecimal exponent part
				if (current < end && current[0].tolower () == 'p') {
					type = TokenType.REAL_LITERAL;
					current++;
					if (current < end && (current[0] == '+' || current[0] == '-')) {
						current++;
					}
					while (current < end && current[0].isdigit ()) {
						current++;
					}
				} else if (type == TokenType.REAL_LITERAL) {
					Report.error (get_source_reference (1), "hexadecimal floating constants require an exponent");
				}
				break;
			case 'b':
			case 'B':
			case 'o':
			case 'O':
				// binary integer literal or octal integer literal
				current += 2;
				while (current < end && current[0].isdigit ()) {
					current++;
				}
				break;
			default:
				// decimal number (also may be octal integer)
				while (current < end && current[0].isdigit ()) {
					current++;
				}
				break;
			}
		} else {
			// decimal number
			while (current < end && current[0].isdigit ()) {
				current++;
			}
		}

		// fractional part
		if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
			type = TokenType.REAL_LITERAL;
			current++;
			while (current < end && current[0].isdigit ()) {
				current++;
			}
		}

		// exponent part
		if (current < end && current[0].tolower () == 'e') {
			type = TokenType.REAL_LITERAL;
			current++;
			if (current < end && (current[0] == '+' || current[0] == '-')) {
				current++;
			}
			while (current < end && current[0].isdigit ()) {
				current++;
			}
		}

		// type suffix
		if (current < end) {
			bool real_literal = (type == TokenType.REAL_LITERAL);

			switch (current[0]) {
			case 'l':
			case 'L':
				if (type == TokenType.INTEGER_LITERAL) {
					current++;
					if (current < end && current[0].tolower () == 'l') {
						current++;
					}
				}
				break;
			case 'u':
			case 'U':
				if (type == TokenType.INTEGER_LITERAL) {
					current++;
					if (current < end && current[0].tolower () == 'l') {
						current++;
						if (current < end && current[0].tolower () == 'l') {
							current++;
						}
					}
				}
				break;
			case 'f':
			case 'F':
			case 'd':
			case 'D':
				type = TokenType.REAL_LITERAL;
				current++;
				break;
			}

			if (!real_literal && is_ident_char (current[0])) {
				// allow identifiers to start with a digit
				// as long as they contain at least one char
				while (current < end && is_ident_char (current[0])) {
					current++;
				}
				type = TokenType.IDENTIFIER;
			}
		}

		return type;
	}

	public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
		bool is_verbatim = in_verbatim_template ();
		TokenType type;
		char* begin = current;
		token_begin = SourceLocation (begin, line, column);

		int token_length_in_chars = -1;

		if (current >= end) {
			type = TokenType.EOF;
		} else {
			switch (current[0]) {
			case '"':
				if (is_verbatim) {
					if (current < end -2 && current[1] == '"' && current[2] == '"' && current[3] != '"') {
						type = TokenType.CLOSE_TEMPLATE;
						current += 3;
						state_stack.length--;
					} else {
						type = TokenType.VERBATIM_TEMPLATE_STRING_LITERAL;
						current++;
						token_length_in_chars++;
						state_stack += State.TEMPLATE_PART;
					}
				} else {
					type = TokenType.CLOSE_TEMPLATE;
					current++;
					state_stack.length--;
				}
				break;
			case '$':
				token_begin.pos++; // $ is not part of following token
				current++;
				if (current[0].isalpha () || current[0] == '_') {
					int len = 0;
					while (current < end && is_ident_char (current[0])) {
						current++;
						len++;
					}
					type = TokenType.IDENTIFIER;
					state_stack += State.TEMPLATE_PART;
				} else if (current[0] == '(') {
					current++;
					column += 2;
					state_stack += State.PARENS;
					return read_token (out token_begin, out token_end);
				} else if (current[0] == '$') {
					type = is_verbatim ? TokenType.VERBATIM_TEMPLATE_STRING_LITERAL : TokenType.TEMPLATE_STRING_LITERAL;
					current++;
					state_stack += State.TEMPLATE_PART;
				} else {
					Report.error (get_source_reference (1), "unexpected character");
					return read_template_token (out token_begin, out token_end);
				}
				break;
			default:
				type = is_verbatim ? TokenType.VERBATIM_TEMPLATE_STRING_LITERAL : TokenType.TEMPLATE_STRING_LITERAL;
				token_length_in_chars = 0;
				while (current < end && current[0] != '"' && current[0] != '$') {
					if (current[0] == '\\' && !is_verbatim) {
						current++;
						token_length_in_chars++;
						if (current >= end) {
							break;
						}

						switch (current[0]) {
						case '\'':
						case '"':
						case '\\':
						case '0':
						case 'b':
						case 'f':
						case 'n':
						case 'r':
						case 't':
						case 'v':
							current++;
							token_length_in_chars++;
							break;
						case 'u':
							// u escape character has four hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
								current++;
								token_length_in_chars++;
							}
							if (digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\u used with no following hex digits");
							} else if (digit_length < 4) {
								Report.error (get_source_reference (token_length_in_chars), "incomplete universal character name");
							}
							break;
						case 'x':
							// hexadecimal escape character requires two hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							bool empty = true;
							for (digit_length = 0; current < end && current[0].isxdigit ();) {
								if (current[0] != '0') {
									digit_length++;
								} else {
									empty = false;
								}
								current++;
								token_length_in_chars++;
							}
							if (empty && digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\x used with no following hex digits");
							} else if (digit_length > 2) {
								Report.error (get_source_reference (token_length_in_chars), "hex escape sequence out of range");
							}
							break;
						default:
							Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
							break;
						}
					} else if (current[0] == '\n') {
						current++;
						line++;
						column = 1;
						token_length_in_chars = 1;
					} else {
						unichar u = ((string) current).get_char_validated ((long) (end - current));
						if (u != (unichar) (-1)) {
							current += u.to_utf8 (null);
							token_length_in_chars++;
						} else {
							current++;
							Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
						}
					}
				}
				if (current >= end) {
					Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
					state_stack.length--;
					return read_token (out token_begin, out token_end);
				}
				state_stack += State.TEMPLATE_PART;
				break;
			}
		}

		if (token_length_in_chars < 0) {
			column += (int) (current - begin);
		} else {
			column += token_length_in_chars;
		}

		token_end = SourceLocation (current, line, column - 1);

		return type;
	}

	public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
		if (in_template () || in_verbatim_template ()) {
			return read_template_token (out token_begin, out token_end);
		} else if (in_template_part ()) {
			state_stack.length--;

			token_begin = SourceLocation (current, line, column);
			token_end = SourceLocation (current, line, column - 1);

			return TokenType.COMMA;
		} else if (in_regex_literal ()) {
			return read_regex_token (out token_begin, out token_end);
		}

		space ();

		TokenType type;
		char* begin = current;
		token_begin = SourceLocation (begin, line, column);

		int token_length_in_chars = -1;

		if (current >= end) {
			type = TokenType.EOF;
		} else if (current[0].isalpha () || current[0] == '_') {
			int len = 0;
			while (current < end && is_ident_char (current[0])) {
				current++;
				len++;
			}
			type = get_identifier_or_keyword (begin, len);
		} else if (current[0] == '@') {
			if (current < end - 1 && current[1] == '"') {
				current += 1;
				if (current < end - 5 && current[1] == '"' && current[2] == '"') {
					current += 3;
					state_stack += State.VERBATIM_TEMPLATE;
				} else {
					current += 1;
					state_stack += State.TEMPLATE;
				}
				type = TokenType.OPEN_TEMPLATE;
			} else {
				token_begin.pos++; // @ is not part of the identifier
				current++;
				int len = 0;
				while (current < end && is_ident_char (current[0])) {
					current++;
					len++;
				}
				type = TokenType.IDENTIFIER;
			}
		} else if (current[0].isdigit ()) {
			type = read_number ();
		} else {
			switch (current[0]) {
			case '{':
				type = TokenType.OPEN_BRACE;
				current++;
				state_stack += State.BRACE;
				break;
			case '}':
				type = TokenType.CLOSE_BRACE;
				current++;
				if (state_stack.length > 0) {
					state_stack.length--;
				}
				break;
			case '(':
				type = TokenType.OPEN_PARENS;
				current++;
				state_stack += State.PARENS;
				break;
			case ')':
				type = TokenType.CLOSE_PARENS;
				current++;
				if (state_stack.length > 0) {
					state_stack.length--;
				}
				if (in_template () || in_verbatim_template ()) {
					type = TokenType.COMMA;
				}
				break;
			case '[':
				type = TokenType.OPEN_BRACKET;
				current++;
				state_stack += State.BRACKET;
				break;
			case ']':
				type = TokenType.CLOSE_BRACKET;
				current++;
				if (state_stack.length > 0) {
					state_stack.length--;
				}
				break;
			case '.':
				type = TokenType.DOT;
				current++;
				if (current < end - 1) {
					if (current[0] == '.' && current[1] == '.') {
						type = TokenType.ELLIPSIS;
						current += 2;
					}
				}
				break;
			case ':':
				type = TokenType.COLON;
				current++;
				if (current < end && current[0] == ':') {
					type = TokenType.DOUBLE_COLON;
					current++;
				}
				break;
			case ',':
				type = TokenType.COMMA;
				current++;
				break;
			case ';':
				type = TokenType.SEMICOLON;
				current++;
				break;
			case '#':
				type = TokenType.HASH;
				current++;
				break;
			case '?':
				type = TokenType.INTERR;
				current++;
				if (current < end && current[0] == '?') {
					type = TokenType.OP_COALESCING;
					current++;
				}
				break;
			case '|':
				type = TokenType.BITWISE_OR;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.ASSIGN_BITWISE_OR;
						current++;
						break;
					case '|':
						type = TokenType.OP_OR;
						current++;
						break;
					}
				}
				break;
			case '&':
				type = TokenType.BITWISE_AND;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.ASSIGN_BITWISE_AND;
						current++;
						break;
					case '&':
						type = TokenType.OP_AND;
						current++;
						break;
					}
				}
				break;
			case '^':
				type = TokenType.CARRET;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.ASSIGN_BITWISE_XOR;
					current++;
				}
				break;
			case '~':
				type = TokenType.TILDE;
				current++;
				break;
			case '=':
				type = TokenType.ASSIGN;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.OP_EQ;
						current++;
						break;
					case '>':
						type = TokenType.LAMBDA;
						current++;
						break;
					}
				}
				break;
			case '<':
				type = TokenType.OP_LT;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.OP_LE;
						current++;
						break;
					case '<':
						type = TokenType.OP_SHIFT_LEFT;
						current++;
						if (current < end && current[0] == '=') {
							type = TokenType.ASSIGN_SHIFT_LEFT;
							current++;
						}
						break;
					}
				}
				break;
			case '>':
				type = TokenType.OP_GT;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.OP_GE;
					current++;
				}
				break;
			case '!':
				type = TokenType.OP_NEG;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.OP_NE;
					current++;
				}
				break;
			case '+':
				type = TokenType.PLUS;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.ASSIGN_ADD;
						current++;
						break;
					case '+':
						type = TokenType.OP_INC;
						current++;
						break;
					}
				}
				break;
			case '-':
				type = TokenType.MINUS;
				current++;
				if (current < end) {
					switch (current[0]) {
					case '=':
						type = TokenType.ASSIGN_SUB;
						current++;
						break;
					case '-':
						type = TokenType.OP_DEC;
						current++;
						break;
					case '>':
						type = TokenType.OP_PTR;
						current++;
						break;
					}
				}
				break;
			case '*':
				type = TokenType.STAR;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.ASSIGN_MUL;
					current++;
				}
				break;
			case '/':
				switch (previous) {
				case TokenType.ASSIGN:
				case TokenType.COMMA:
				case TokenType.MINUS:
				case TokenType.OP_AND:
				case TokenType.OP_COALESCING:
				case TokenType.OP_EQ:
				case TokenType.OP_GE:
				case TokenType.OP_GT:
				case TokenType.OP_LE:
				case TokenType.OP_LT:
				case TokenType.OP_NE:
				case TokenType.OP_NEG:
				case TokenType.OP_OR:
				case TokenType.OPEN_BRACE:
				case TokenType.OPEN_PARENS:
				case TokenType.PLUS:
				case TokenType.RETURN:
					type = TokenType.OPEN_REGEX_LITERAL;
					state_stack += State.REGEX_LITERAL;
					current++;
					break;
				default:
					type = TokenType.DIV;
					current++;
					if (current < end && current[0] == '=') {
						type = TokenType.ASSIGN_DIV;
						current++;
					}
					break;
				}
				break;
			case '%':
				type = TokenType.PERCENT;
				current++;
				if (current < end && current[0] == '=') {
					type = TokenType.ASSIGN_PERCENT;
					current++;
				}
				break;
			case '\'':
			case '"':
				if (begin[0] == '\'') {
					type = TokenType.CHARACTER_LITERAL;
				} else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
					type = TokenType.VERBATIM_STRING_LITERAL;
					token_length_in_chars = 6;
					current += 3;
					while (current < end - 4) {
						if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
							break;
						} else if (current[0] == '\n') {
							current++;
							line++;
							column = 1;
							token_length_in_chars = 3;
						} else {
							unichar u = ((string) current).get_char_validated ((long) (end - current));
							if (u != (unichar) (-1)) {
								current += u.to_utf8 (null);
								token_length_in_chars++;
							} else {
								Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
							}
						}
					}
					if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
						current += 3;
					} else {
						Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"\"\"");
					}
					break;
				} else {
					type = TokenType.STRING_LITERAL;
				}
				token_length_in_chars = 2;
				current++;
				while (current < end && current[0] != begin[0]) {
					if (current[0] == '\\') {
						current++;
						token_length_in_chars++;
						if (current >= end) {
							break;
						}

						switch (current[0]) {
						case '\'':
						case '"':
						case '\\':
						case '0':
						case 'b':
						case 'f':
						case 'n':
						case 'r':
						case 't':
						case 'v':
						case '$':
							current++;
							token_length_in_chars++;
							break;
						case 'u':
							// u escape character has four hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
								current++;
								token_length_in_chars++;
							}
							if (digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\u used with no following hex digits");
							} else if (digit_length < 4) {
								Report.error (get_source_reference (token_length_in_chars), "incomplete universal character name");
							}
							break;
						case 'x':
							// hexadecimal escape character requires two hex digits
							current++;
							token_length_in_chars++;
							int digit_length;
							bool empty = true;
							for (digit_length = 0; current < end && current[0].isxdigit ();) {
								if (current[0] != '0') {
									digit_length++;
								} else {
									empty = false;
								}
								current++;
								token_length_in_chars++;
							}
							if (empty && digit_length < 1) {
								Report.error (get_source_reference (token_length_in_chars), "\\x used with no following hex digits");
							} else if (digit_length > 2) {
								Report.error (get_source_reference (token_length_in_chars), "hex escape sequence out of range");
							}
							break;
						default:
							Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
							break;
						}
					} else if (current[0] == '\n') {
						current++;
						line++;
						column = 1;
						token_length_in_chars = 1;
					} else {
						unichar u = ((string) current).get_char_validated ((long) (end - current));
						if (u != (unichar) (-1)) {
							current += u.to_utf8 (null);
							token_length_in_chars++;
						} else {
							current++;
							Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
						}
					}
					if (current < end && begin[0] == '\'' && current[0] != '\'') {
						// multiple characters in single character literal
						Report.error (get_source_reference (token_length_in_chars), "invalid character literal");
					}
				}
				if (current < end) {
					current++;
				} else {
					Report.error (get_source_reference (token_length_in_chars), "syntax error, expected %c", begin[0]);
				}
				break;
			default:
				unichar u = ((string) current).get_char_validated ((long) (end - current));
				if (u != (unichar) (-1)) {
					current += u.to_utf8 (null);
					Report.error (get_source_reference (0), "syntax error, unexpected character");
				} else {
					current++;
					Report.error (get_source_reference (0), "invalid UTF-8 character");
				}
				column++;
				return read_token (out token_begin, out token_end);
			}
		}

		if (token_length_in_chars < 0) {
			column += (int) (current - begin);
		} else {
			column += token_length_in_chars;
		}

		token_end = SourceLocation (current, line, column - 1);
		previous = type;

		return type;
	}

	static bool matches (char* begin, string keyword) {
		char* keyword_array = (char*) keyword;
		long len = keyword.length;
		for (int i = 0; i < len; i++) {
			if (begin[i] != keyword_array[i]) {
				return false;
			}
		}
		return true;
	}

	bool pp_whitespace () {
		bool found = false;
		while (current < end && current[0].isspace () && current[0] != '\n') {
			found = true;
			current++;
			column++;
		}
		return found;
	}

	void pp_space () {
		while (pp_whitespace () || comment ()) {
		}
	}

	void pp_directive () {
		// hash sign
		current++;
		column++;

		if (line == 1 && column == 2 && current < end && current[0] == '!') {
			// hash bang: #!
			// skip until end of line or end of file
			while (current < end && current[0] != '\n') {
				current++;
			}
			return;
		}

		pp_space ();

		char* begin = current;
		int len = 0;
		while (current < end && current[0].isalnum ()) {
			current++;
			column++;
			len++;
		}

		if (len == 2 && matches (begin, "if")) {
			parse_pp_if ();
		} else if (len == 4 && matches (begin, "elif")) {
			parse_pp_elif ();
		} else if (len == 4 && matches (begin, "else")) {
			parse_pp_else ();
		} else if (len == 5 && matches (begin, "endif")) {
			parse_pp_endif ();
		} else {
			Report.error (get_source_reference (-len, len), "syntax error, invalid preprocessing directive");
		}

		if (conditional_stack.length > 0
		    && conditional_stack[conditional_stack.length - 1].skip_section) {
			// skip lines until next preprocessing directive
			bool bol = false;
			while (current < end) {
				if (bol && current[0] == '#') {
					// go back to begin of line
					current -= (column - 1);
					column = 1;
					return;
				}
				if (current[0] == '\n') {
					line++;
					column = 0;
					bol = true;
				} else if (!current[0].isspace ()) {
					bol = false;
				}
				current++;
				column++;
			}
		}
	}

	void pp_eol () {
		pp_space ();
		if (current >= end || current[0] != '\n') {
			Report.error (get_source_reference (0), "syntax error, expected newline");
		}
	}

	void parse_pp_if () {
		pp_space ();

		bool condition = parse_pp_expression ();

		pp_eol ();

		conditional_stack += Conditional ();

		if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
			// condition true => process code within if
			conditional_stack[conditional_stack.length - 1].matched = true;
		} else {
			// skip lines until next preprocessing directive
			conditional_stack[conditional_stack.length - 1].skip_section = true;
		}
	}

	void parse_pp_elif () {
		pp_space ();

		bool condition = parse_pp_expression ();

		pp_eol ();

		if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
			Report.error (get_source_reference (0), "syntax error, unexpected #elif");
			return;
		}

		if (condition && !conditional_stack[conditional_stack.length - 1].matched
		    && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
			// condition true => process code within if
			conditional_stack[conditional_stack.length - 1].matched = true;
			conditional_stack[conditional_stack.length - 1].skip_section = false;
		} else {
			// skip lines until next preprocessing directive
			conditional_stack[conditional_stack.length - 1].skip_section = true;
		}
	}

	void parse_pp_else () {
		pp_eol ();

		if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
			Report.error (get_source_reference (0), "syntax error, unexpected #else");
			return;
		}

		if (!conditional_stack[conditional_stack.length - 1].matched
		    && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
			// condition true => process code within if
			conditional_stack[conditional_stack.length - 1].matched = true;
			conditional_stack[conditional_stack.length - 1].skip_section = false;
		} else {
			// skip lines until next preprocessing directive
			conditional_stack[conditional_stack.length - 1].skip_section = true;
		}
	}

	void parse_pp_endif () {
		pp_eol ();

		if (conditional_stack.length == 0) {
			Report.error (get_source_reference (0), "syntax error, unexpected #endif");
			return;
		}

		conditional_stack.length--;
	}

	bool parse_pp_symbol () {
		int len = 0;
		while (current < end && is_ident_char (current[0])) {
			current++;
			column++;
			len++;
		}

		if (len == 0) {
			Report.error (get_source_reference (0), "syntax error, expected identifier");
			return false;
		}

		string identifier = ((string) (current - len)).substring (0, len);
		bool defined;
		if (identifier == "true") {
			defined = true;
		} else if (identifier == "false") {
			defined = false;
		} else {
			defined = source_file.context.is_defined (identifier);
		}

		return defined;
	}

	bool parse_pp_primary_expression () {
		if (current >= end) {
			Report.error (get_source_reference (0), "syntax error, expected identifier");
		} else if (is_ident_char (current[0])) {
			return parse_pp_symbol ();
		} else if (current[0] == '(') {
			current++;
			column++;
			pp_space ();
			bool result = parse_pp_expression ();
			pp_space ();
			if (current < end && current[0] ==  ')') {
				current++;
				column++;
			} else {
				Report.error (get_source_reference (0), "syntax error, expected `)'");
			}
			return result;
		} else {
			Report.error (get_source_reference (0), "syntax error, expected identifier");
		}
		return false;
	}

	bool parse_pp_unary_expression () {
		if (current < end && current[0] == '!') {
			current++;
			column++;
			pp_space ();
			return !parse_pp_unary_expression ();
		}

		return parse_pp_primary_expression ();
	}

	bool parse_pp_equality_expression () {
		bool left = parse_pp_unary_expression ();
		pp_space ();
		while (true) {
			if (current < end - 1 && current[0] == '=' && current[1] == '=') {
				current += 2;
				column += 2;
				pp_space ();
				bool right = parse_pp_unary_expression ();
				left = (left == right);
			} else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
				current += 2;
				column += 2;
				pp_space ();
				bool right = parse_pp_unary_expression ();
				left = (left != right);
			} else {
				break;
			}
		}
		return left;
	}

	bool parse_pp_and_expression () {
		bool left = parse_pp_equality_expression ();
		pp_space ();
		while (current < end - 1 && current[0] == '&' && current[1] == '&') {
			current += 2;
			column += 2;
			pp_space ();
			bool right = parse_pp_equality_expression ();
			left = left && right;
		}
		return left;
	}

	bool parse_pp_or_expression () {
		bool left = parse_pp_and_expression ();
		pp_space ();
		while (current < end - 1 && current[0] == '|' && current[1] == '|') {
			current += 2;
			column += 2;
			pp_space ();
			bool right = parse_pp_and_expression ();
			left = left || right;
		}
		return left;
	}

	bool parse_pp_expression () {
		return parse_pp_or_expression ();
	}

	bool whitespace () {
		bool found = false;
		bool bol = (column == 1);
		while (current < end && current[0].isspace ()) {
			if (current[0] == '\n') {
				line++;
				column = 0;
				bol = true;
			}
			found = true;
			current++;
			column++;
		}
		if (bol && current < end && current[0] == '#') {
			pp_directive ();
			return true;
		}
		return found;
	}

	bool comment (bool file_comment = false) {
		if (current == null
		    || current > end - 2
		    || current[0] != '/'
		    || (current[1] != '/' && current[1] != '*')) {
			return false;
		}

		if (current[1] == '/') {
			SourceReference source_reference = null;
			if (file_comment) {
				source_reference = get_source_reference (0);
			}

			// single-line comment
			current += 2;
			char* begin = current;

			// skip until end of line or end of file
			while (current < end && current[0] != '\n') {
				current++;
			}

			if (source_reference != null) {
				push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
			}
		} else {
			SourceReference source_reference = null;

			if (file_comment && current[2] == '*') {
				return false;
			}

			if (current[2] == '*' || file_comment) {
				source_reference = get_source_reference (0);
			}

			current += 2;
			column += 2;

			char* begin = current;
			while (current < end - 1
			       && (current[0] != '*' || current[1] != '/')) {
				if (current[0] == '\n') {
					line++;
					column = 0;
				}
				current++;
				column++;
			}

			if (current == end - 1) {
				Report.error (get_source_reference (0), "syntax error, expected */");
				return true;
			}

			if (source_reference != null) {
				push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
			}

			current += 2;
			column += 2;
		}

		return true;
	}

	void space () {
		while (whitespace () || comment ()) {
		}
	}

	public void parse_file_comments () {
		while (whitespace () || comment (true)) {
		}
	}

	void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
		if (comment_item[0] == '*') {
			if (_comment != null) {
				// extra doc comment, add it to source file comments
				source_file.add_comment (_comment);
			}
			_comment = new Comment (comment_item, source_reference);
		}

		if (file_comment) {
			source_file.add_comment (new Comment (comment_item, source_reference));
			_comment = null;
		}
	}

	/**
	 * Clears and returns the content of the comment stack.
	 *
	 * @return saved comment
	 */
	public Comment? pop_comment () {
		if (_comment == null) {
			return null;
		}

		var comment = _comment;
		_comment = null;
		return comment;
	}
}