summaryrefslogtreecommitdiff
path: root/libvaladoc/highlighter
diff options
context:
space:
mode:
authorRico Tzschichholz <ricotz@ubuntu.com>2017-06-27 13:09:30 +0200
committerRico Tzschichholz <ricotz@ubuntu.com>2017-06-27 13:19:08 +0200
commit2b742fce82eb1326faaee3b2cc4ff993e701ef53 (patch)
tree44e4d274b22274029d43cd18126810551125a394 /libvaladoc/highlighter
parent7609126be290e01dd452a3fc1bdf8d57af363569 (diff)
parent93d9fe647be1f2effc0bfeeec903b5e030182f6c (diff)
downloadvala-2b742fce82eb1326faaee3b2cc4ff993e701ef53.tar.gz
Merge valadoc 0.36.0
Consider valadoc a part of vala's toolchain and therefore let it live in the main repository. With this merge there is no need to maintain multiple driver sources since only one is required from now on. There is no dependency on gee-0.8 and vala's internal gee copy has made to be sufficient. The libvaladoc library will be suffixed with vala's version suffix too. Besides this renaming the rest of the valadoc file layout is kept the same. https://bugzilla.gnome.org/show_bug.cgi?id=782782
Diffstat (limited to 'libvaladoc/highlighter')
-rw-r--r--libvaladoc/highlighter/codescanner.vala572
-rw-r--r--libvaladoc/highlighter/codetoken.vala58
-rw-r--r--libvaladoc/highlighter/highlighter.vala366
-rw-r--r--libvaladoc/highlighter/scanner.vala32
-rw-r--r--libvaladoc/highlighter/xmlscanner.vala374
5 files changed, 1402 insertions, 0 deletions
diff --git a/libvaladoc/highlighter/codescanner.vala b/libvaladoc/highlighter/codescanner.vala
new file mode 100644
index 000000000..6d65c2eb7
--- /dev/null
+++ b/libvaladoc/highlighter/codescanner.vala
@@ -0,0 +1,572 @@
+/* codescanner.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+using GLib;
+
+
+/**
+ * A cheap scanner used to highlight C and Vala source code.
+ */
+public class Valadoc.Highlighter.CodeScanner : Object, Scanner {
+ private Vala.HashMap<string, CodeTokenType?> keywords;
+ private bool enable_string_templates;
+ private bool enabel_verbatim_string;
+ private bool enable_preprocessor_define;
+ private bool enable_preprocessor_include;
+ private bool enable_keyword_escape;
+
+
+ private Queue<CodeToken> token_queue = new Queue<CodeToken> ();
+ private unowned string content;
+ private unowned string pos;
+
+
+ public CodeScanner (string content, bool enable_string_templates, bool enabel_verbatim_string,
+ bool enable_preprocessor_define, bool enable_preprocessor_include, bool enable_keyword_escape,
+ Vala.HashMap<string, CodeTokenType?> keywords)
+ {
+ this.content = content;
+ this.pos = content;
+
+ this.enable_string_templates = enable_string_templates;
+ this.enabel_verbatim_string = enabel_verbatim_string;
+ this.enable_preprocessor_define = enable_preprocessor_define;
+ this.enable_preprocessor_include = enable_preprocessor_include;
+ this.enable_keyword_escape = enable_keyword_escape;
+
+ this.keywords = keywords;
+ }
+
+ public CodeToken next () {
+ if (!token_queue.is_empty ()) {
+ return token_queue.pop_head ();
+ }
+
+
+ unowned string start;
+
+ for (start = pos; pos[0] != '\0'; pos = pos.next_char ()) {
+ if (((char*) pos) == ((char*) content) || pos[0] == '\n') {
+ unowned string line_start = pos;
+
+ while (pos[0] == ' ' || pos[0] == '\t' || pos[0] == '\n') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '\0') {
+ break;
+ } else if (enable_preprocessor_include && pos.has_prefix ("#include")) {
+ unowned string end = pos;
+ if (queue_c_include ()) {
+ return dispatch (start, end);
+ } else {
+ pos = line_start;
+ continue;
+ }
+ } else if (pos.has_prefix ("#if") || pos.has_prefix ("#else") || pos.has_prefix ("#elif") || pos.has_prefix ("#endif")
+ || (enable_preprocessor_define && (pos.has_prefix ("#defined") || pos.has_prefix ("#ifdef")))) {
+
+ unowned string end = pos;
+ queue_until ('\n', CodeTokenType.PREPROCESSOR);
+ return dispatch (start, end);
+ }
+ }
+
+ if (pos[0] == '\'') {
+ unowned string end = pos;
+ queue_string_literal ("\'");
+ return dispatch (start, end);
+ }
+
+ if (pos[0] == '"' || (enable_string_templates && pos[0] == '@' && pos[1] == '"')) {
+ unowned string end = pos;
+ if (enabel_verbatim_string && (pos.has_prefix ("\"\"\"") || (enable_string_templates && pos.has_prefix ("@\"\"\"")))) {
+ queue_string_literal ("\"\"\"");
+ } else {
+ queue_string_literal ("\"");
+ }
+ return dispatch (start, end);
+ }
+
+ if (pos[0] >= '0' && pos[0] <= '9') {
+ unowned string end = pos;
+ queue_numeric_literal ();
+ return dispatch (start, end);
+ }
+
+ if (pos.has_prefix ("/*")) {
+ unowned string end = pos;
+ queue_multiline_comment ();
+ return dispatch (start, end);
+ }
+
+ if (pos.has_prefix ("//")) {
+ unowned string end = pos;
+ queue_until ('\n', CodeTokenType.COMMENT);
+ return dispatch (start, end);
+ }
+
+ if ((((char*) pos) == ((char*) content) || !isidstartchar (pos[-1])) && isidstartchar (pos[0])) {
+ unowned string end = pos;
+ if (queue_keyword ()) {
+ return dispatch (start, end);
+ } else {
+ continue;
+ }
+ }
+ }
+
+ token_queue.push_tail (new CodeToken (CodeTokenType.EOF, ""));
+ return dispatch (start, pos);
+ }
+
+ private bool queue_c_include () {
+ unowned string include_start = pos;
+ unowned string start = pos;
+ pos = pos.offset (8);
+
+ while (pos[0] == ' ' || pos[0] == '\t') {
+ pos = pos.offset (1);
+ }
+
+ char? end_char = null;
+ if (pos[0] == '"') {
+ end_char = '"';
+ } else if (pos[0] == '<') {
+ end_char = '>';
+ }
+
+ if (end_char != null) {
+ queue_token (start, pos, CodeTokenType.PREPROCESSOR);
+
+ unowned string literal_start = pos;
+ pos = pos.offset (1);
+
+ while (pos[0] != end_char && pos[0] != '\n' && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == end_char) {
+ pos = pos.offset (1);
+
+ queue_token (literal_start, pos, CodeTokenType.LITERAL);
+ start = pos;
+ } else {
+ pos = include_start;
+ token_queue.clear ();
+ return false;
+ }
+ }
+
+ while (pos[0] == ' ' || pos[0] == '\t') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '\n' || pos[0] == '\0') {
+ queue_token (start, pos, CodeTokenType.PREPROCESSOR);
+ return true;
+ } else {
+ pos = include_start;
+ token_queue.clear ();
+ return false;
+ }
+ }
+
+ private bool queue_keyword () {
+ unowned string start = pos;
+ if (pos[0] == '@') {
+ pos = pos.offset (1);
+ }
+ while (isidchar (pos[0])) {
+ pos = pos.offset (1);
+ }
+
+ long length = start.pointer_to_offset (pos);
+ string word = start.substring (0, length);
+ CodeTokenType? token_type = keywords.get (word);
+ if (token_type == null) {
+ pos = start;
+ return false;
+ }
+
+ token_queue.push_tail (new CodeToken (token_type, word));
+ return true;
+ }
+
+ private void queue_multiline_comment () {
+ unowned string start = pos;
+ pos = pos.offset (2);
+
+ while (!(pos[0] == '*' && pos[1] == '/') && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] != '\0') {
+ pos = pos.offset (2);
+ }
+
+ queue_token (start, pos, CodeTokenType.COMMENT);
+ }
+
+ private void queue_until (char end_char, CodeTokenType token_type) {
+ unowned string start = pos;
+ pos = pos.offset (1);
+
+ while (pos[0] != end_char && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] != '\0' && pos[0] != '\n') {
+ pos = pos.offset (1);
+ }
+
+ queue_token (start, pos, token_type);
+ }
+
+ private void queue_string_literal (string end_chars) {
+ unowned string start = pos;
+ bool is_template = false;
+
+ if (pos[0] == '@') {
+ pos = pos.offset (end_chars.length + 1);
+ is_template = true;
+ } else {
+ pos = pos.offset (end_chars.length);
+ }
+
+ while (!pos.has_prefix (end_chars) && pos[0] != '\0') {
+ long skip = 0;
+
+ if ((pos[0] == '%' && has_printf_format_prefix (out skip))
+ || (pos[0] == '\\' && has_escape_prefix (out skip))
+ || (is_template && pos[0] == '$' && has_template_literal_prefix (out skip)))
+ {
+ queue_token (start, pos, CodeTokenType.LITERAL);
+
+ unowned string sub_start = pos;
+ pos = pos.offset (skip);
+ queue_token (sub_start, pos, CodeTokenType.ESCAPE);
+ start = pos;
+ } else {
+ pos = pos.offset (1);
+ }
+ }
+
+ if (pos[0] != '\0') {
+ pos = pos.offset (end_chars.length);
+ }
+
+ queue_token (start, pos, CodeTokenType.LITERAL);
+ }
+
+ private bool has_template_literal_prefix (out long skip) {
+ if (isidchar (pos[1])) {
+ skip = 1;
+ while (isidchar (pos[skip])) {
+ skip++;
+ }
+ return true;
+ }
+
+ if (pos[1] == '(') {
+ int level = 1;
+ skip = 2;
+
+ while (level > 0) {
+ switch (pos[skip]) {
+ case '(':
+ level++;
+ break;
+ case ')':
+ level--;
+ break;
+ case '\0':
+ skip = 0;
+ return false;
+ }
+ skip++;
+ }
+ return true;
+ }
+
+ skip = 0;
+ return false;
+ }
+
+ private bool has_escape_prefix (out long skip) {
+ switch (pos[1]) {
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ case '\\':
+ case '\'':
+ case '\"':
+ case '?':
+ skip = 2;
+ return true;
+
+ case 'x':
+ if (pos[2].isxdigit ()) {
+ for (skip = 2; pos[skip].isxdigit (); skip++) {
+ skip++;
+ }
+
+ skip++;
+ return true;
+ }
+
+ skip = 0;
+ return false;
+
+ default:
+ if (pos[1].isdigit ()) {
+ skip = 2;
+
+ if (pos[2].isdigit ()) {
+ skip++;
+
+ if (pos[3].isdigit ()) {
+ skip++;
+ }
+ }
+
+ return true;
+ }
+
+ skip = 0;
+ return false;
+ }
+ }
+
+ private bool has_printf_format_prefix (out long skip) {
+ // %[flag][min width][precision][length modifier][conversion specifier]
+ unowned string pos = this.pos;
+ unowned string start = pos;
+
+ // '%'
+ pos = pos.offset (1);
+
+ if (pos[0] == '%') {
+ pos = pos.offset (1);
+ skip = 2;
+ return true;
+ }
+
+
+ // flags:
+ while ("#0+- ".index_of_char (pos[0]) > 0) {
+ pos = pos.offset (1);
+ }
+
+ // min width:
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+
+ // precision
+ if (pos[0] == '.' && pos[1].isdigit ()) {
+ pos = pos.offset (2);
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+ }
+
+ // length:
+ switch (pos[0]) {
+ case 'h':
+ pos = pos.offset (1);
+ if (pos[0] == 'h') {
+ pos = pos.offset (1);
+ }
+ break;
+
+ case 'l':
+ pos = pos.offset (1);
+ if (pos[0] == 'l') {
+ pos = pos.offset (1);
+ }
+ break;
+
+ case 'j':
+ case 'z':
+ case 't':
+ case 'L':
+ pos = pos.offset (1);
+ break;
+ }
+
+ // conversion specifier:
+ switch (pos[0]) {
+ case 'd':
+ case 'i':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'f':
+ case 'F':
+ case 'e':
+ case 'E':
+ case 'g':
+ case 'G':
+ case 'a':
+ case 'A':
+ case 'c':
+ case 's':
+ case 'p':
+ case 'n':
+ pos = pos.offset (1);
+ break;
+
+ default:
+ skip = 0;
+ return false;
+ }
+
+ skip = start.pointer_to_offset (pos);
+ return true;
+ }
+
+ private enum NumericType {
+ INTEGER,
+ REAL,
+ NONE
+ }
+
+ // based on libvala
+ private void queue_numeric_literal () {
+ NumericType numeric_type = NumericType.INTEGER;
+ unowned string start = pos;
+
+
+ // integer part
+ if (pos[0] == '0' && pos[1] == 'x' && pos[2].isxdigit ()) {
+ // hexadecimal integer literal
+ pos = pos.offset (2);
+ while (pos[0].isxdigit ()) {
+ pos = pos.offset (1);
+ }
+ } else {
+ // decimal number
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+ }
+
+
+ // fractional part
+ if (pos[0] == '.' && pos[1].isdigit ()) {
+ numeric_type = NumericType.REAL;
+ pos = pos.offset (1);
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+ }
+
+
+ // exponent part
+ if (pos[0] == 'e' || pos[0] == 'E') {
+ numeric_type = NumericType.REAL;
+ pos = pos.offset (1);
+ if (pos[0] == '+' || pos[0] == '-') {
+ pos = pos.offset (1);
+ }
+ while (pos[0].isdigit ()) {
+ pos = pos.offset (1);
+ }
+ }
+
+
+ // type suffix
+ switch (pos[0]) {
+ case 'l':
+ case 'L':
+ if (numeric_type == NumericType.INTEGER) {
+ pos = pos.offset (1);
+ if (pos[0] == 'l' || pos[0] == 'L') {
+ pos = pos.offset (1);
+ }
+ }
+ break;
+
+ case 'u':
+ case 'U':
+ if (numeric_type == NumericType.INTEGER) {
+ pos = pos.offset (1);
+ if (pos[0] == 'l' || pos[0] == 'L') {
+ pos = pos.offset (1);
+ if (pos[0] == 'l' || pos[0] == 'L') {
+ pos = pos.offset (1);
+ }
+ }
+ }
+ break;
+
+ case 'f':
+ case 'F':
+ case 'd':
+ case 'D':
+ numeric_type = NumericType.REAL;
+ pos = pos.offset (1);
+ break;
+ }
+
+ if (pos[0].isalnum ()) {
+ numeric_type = NumericType.NONE;
+ }
+
+ queue_token (start, pos, (numeric_type != NumericType.NONE)
+ ? CodeTokenType.LITERAL
+ : CodeTokenType.PLAIN);
+ }
+
+ private CodeToken dispatch (string start, string end) {
+ assert (token_queue.is_empty () == false);
+
+ if (((char*) start) == ((char*) end)) {
+ return token_queue.pop_head ();
+ }
+
+ long length = start.pointer_to_offset (end);
+ string content = start.substring (0, length);
+ return new CodeToken (CodeTokenType.PLAIN, content);
+ }
+
+ private void queue_token (string start, string end, CodeTokenType token_type) {
+ long length = start.pointer_to_offset (end);
+ string content = start.substring (0, length);
+ token_queue.push_tail (new CodeToken (token_type, content));
+ }
+
+ private inline bool isidchar (char c) {
+ return c.isalnum () || c == '_';
+ }
+
+ private inline bool isidstartchar (char c) {
+ return c.isalnum () || c == '_' || (c == '@' && enable_keyword_escape);
+ }
+}
+
diff --git a/libvaladoc/highlighter/codetoken.vala b/libvaladoc/highlighter/codetoken.vala
new file mode 100644
index 000000000..1a02195ea
--- /dev/null
+++ b/libvaladoc/highlighter/codetoken.vala
@@ -0,0 +1,58 @@
+/* codetoken.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+
+public class Valadoc.Highlighter.CodeToken {
+ public CodeTokenType token_type { get; private set; }
+ public string content { get; private set;}
+
+ public CodeToken (CodeTokenType type, string content) {
+ this.token_type = type;
+ this.content = content;
+ }
+}
+
+
+public enum Valadoc.Highlighter.CodeTokenType {
+ XML_ESCAPE,
+ XML_ELEMENT,
+ XML_ATTRIBUTE,
+ XML_ATTRIBUTE_VALUE,
+ XML_COMMENT,
+ XML_CDATA,
+
+ PREPROCESSOR,
+ COMMENT,
+ KEYWORD,
+ LITERAL,
+ ESCAPE,
+ PLAIN,
+ TYPE,
+ EOF;
+
+ public unowned string to_string () {
+ EnumClass enumc = (EnumClass) typeof (CodeTokenType).class_ref ();
+ unowned EnumValue? eval = enumc.get_value (this);
+ return_val_if_fail (eval != null, null);
+ return eval.value_nick;
+ }
+}
diff --git a/libvaladoc/highlighter/highlighter.vala b/libvaladoc/highlighter/highlighter.vala
new file mode 100644
index 000000000..5eace1cfc
--- /dev/null
+++ b/libvaladoc/highlighter/highlighter.vala
@@ -0,0 +1,366 @@
+/* codehighlighter.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+using GLib;
+using Valadoc.Content;
+
+
+public class Valadoc.Highlighter.Highlighter : Object {
+ private Vala.HashMap<string, CodeTokenType?> vala_keywords;
+ private Vala.HashMap<string, CodeTokenType?> c_keywords;
+
+
+ /**
+ * Used to highlight vala source code.
+ */
+ public Run highlight_vala (string source_code) {
+ if (vala_keywords == null) {
+ vala_keywords = new Vala.HashMap<string, CodeTokenType?> (str_hash, str_equal);
+
+ // ** Types: **
+ vala_keywords.set ("string", CodeTokenType.TYPE);
+ vala_keywords.set ("bool", CodeTokenType.TYPE);
+ vala_keywords.set ("void", CodeTokenType.TYPE);
+
+ vala_keywords.set ("double", CodeTokenType.TYPE);
+ vala_keywords.set ("float", CodeTokenType.TYPE);
+
+ vala_keywords.set ("char", CodeTokenType.TYPE);
+ vala_keywords.set ("uchar", CodeTokenType.TYPE);
+ vala_keywords.set ("unichar", CodeTokenType.TYPE);
+
+ vala_keywords.set ("short", CodeTokenType.TYPE);
+ vala_keywords.set ("ushort", CodeTokenType.TYPE);
+
+ vala_keywords.set ("long", CodeTokenType.TYPE);
+ vala_keywords.set ("ulong", CodeTokenType.TYPE);
+
+ vala_keywords.set ("size_t", CodeTokenType.TYPE);
+ vala_keywords.set ("ssize_t", CodeTokenType.TYPE);
+
+ vala_keywords.set ("int", CodeTokenType.TYPE);
+ vala_keywords.set ("int8", CodeTokenType.TYPE);
+ vala_keywords.set ("int16", CodeTokenType.TYPE);
+ vala_keywords.set ("int32", CodeTokenType.TYPE);
+ vala_keywords.set ("int64", CodeTokenType.TYPE);
+
+ vala_keywords.set ("uint", CodeTokenType.TYPE);
+ vala_keywords.set ("uint8", CodeTokenType.TYPE);
+ vala_keywords.set ("uint16", CodeTokenType.TYPE);
+ vala_keywords.set ("uint32", CodeTokenType.TYPE);
+ vala_keywords.set ("uint64", CodeTokenType.TYPE);
+
+
+ // ** Literals: **
+ vala_keywords.set ("null", CodeTokenType.LITERAL);
+ vala_keywords.set ("true", CodeTokenType.LITERAL);
+ vala_keywords.set ("false", CodeTokenType.LITERAL);
+
+
+ // ** Keywords: **
+ vala_keywords.set ("return", CodeTokenType.KEYWORD);
+ vala_keywords.set ("lock", CodeTokenType.KEYWORD);
+ vala_keywords.set ("var", CodeTokenType.KEYWORD);
+ vala_keywords.set ("yield", CodeTokenType.KEYWORD);
+ vala_keywords.set ("global", CodeTokenType.KEYWORD);
+ vala_keywords.set ("construct", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("value", CodeTokenType.KEYWORD);
+ vala_keywords.set ("get", CodeTokenType.KEYWORD);
+ vala_keywords.set ("set", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("owned", CodeTokenType.KEYWORD);
+ vala_keywords.set ("unowned", CodeTokenType.KEYWORD);
+ vala_keywords.set ("const", CodeTokenType.KEYWORD);
+ vala_keywords.set ("weak", CodeTokenType.KEYWORD);
+ vala_keywords.set ("dynamic", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("out", CodeTokenType.KEYWORD);
+ vala_keywords.set ("ref", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("break", CodeTokenType.KEYWORD);
+ vala_keywords.set ("continue", CodeTokenType.KEYWORD);
+ vala_keywords.set ("return", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("if", CodeTokenType.KEYWORD);
+ vala_keywords.set ("else", CodeTokenType.KEYWORD);
+ vala_keywords.set ("switch", CodeTokenType.KEYWORD);
+ vala_keywords.set ("case", CodeTokenType.KEYWORD);
+ vala_keywords.set ("default", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("do", CodeTokenType.KEYWORD);
+ vala_keywords.set ("while", CodeTokenType.KEYWORD);
+ vala_keywords.set ("for", CodeTokenType.KEYWORD);
+ vala_keywords.set ("foreach", CodeTokenType.KEYWORD);
+ vala_keywords.set ("in", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("try", CodeTokenType.KEYWORD);
+ vala_keywords.set ("catch", CodeTokenType.KEYWORD);
+ vala_keywords.set ("finally", CodeTokenType.KEYWORD);
+ vala_keywords.set ("throw", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("class", CodeTokenType.KEYWORD);
+ vala_keywords.set ("interface", CodeTokenType.KEYWORD);
+ vala_keywords.set ("struct", CodeTokenType.KEYWORD);
+ vala_keywords.set ("enum", CodeTokenType.KEYWORD);
+ vala_keywords.set ("delegate", CodeTokenType.KEYWORD);
+ vala_keywords.set ("errordomain", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("abstract", CodeTokenType.KEYWORD);
+ vala_keywords.set ("virtual", CodeTokenType.KEYWORD);
+ vala_keywords.set ("override", CodeTokenType.KEYWORD);
+ vala_keywords.set ("signal", CodeTokenType.KEYWORD);
+ vala_keywords.set ("extern", CodeTokenType.KEYWORD);
+ vala_keywords.set ("static", CodeTokenType.KEYWORD);
+ vala_keywords.set ("async", CodeTokenType.KEYWORD);
+ vala_keywords.set ("inline", CodeTokenType.KEYWORD);
+ vala_keywords.set ("new", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("public", CodeTokenType.KEYWORD);
+ vala_keywords.set ("private", CodeTokenType.KEYWORD);
+ vala_keywords.set ("protected", CodeTokenType.KEYWORD);
+ vala_keywords.set ("internal", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("throws", CodeTokenType.KEYWORD);
+ vala_keywords.set ("requires", CodeTokenType.KEYWORD);
+ vala_keywords.set ("ensures", CodeTokenType.KEYWORD);
+ vala_keywords.set ("assert", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("namespace", CodeTokenType.KEYWORD);
+ vala_keywords.set ("using", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("as", CodeTokenType.KEYWORD);
+ vala_keywords.set ("is", CodeTokenType.KEYWORD);
+ vala_keywords.set ("in", CodeTokenType.KEYWORD);
+ vala_keywords.set ("new", CodeTokenType.KEYWORD);
+ vala_keywords.set ("delete", CodeTokenType.KEYWORD);
+ vala_keywords.set ("sizeof", CodeTokenType.KEYWORD);
+ vala_keywords.set ("typeof", CodeTokenType.KEYWORD);
+
+ vala_keywords.set ("this", CodeTokenType.KEYWORD);
+ vala_keywords.set ("base", CodeTokenType.KEYWORD);
+ }
+
+ bool enable_string_templates = true;
+ bool enable_preprocessor_define = false;
+ bool enable_preprocessor_include = false;
+ bool enable_keyword_escape = true;
+ bool enabel_verbatim_string = true;
+
+ CodeScanner scanner = new CodeScanner (source_code, enable_string_templates, enabel_verbatim_string,
+ enable_preprocessor_define, enable_preprocessor_include, enable_keyword_escape,
+ vala_keywords);
+
+ return highlight_code (scanner);
+ }
+
+ /**
+ * Used to highlight C source code.
+ */
+ public Run highlight_c (string source_code) {
+ if (c_keywords == null) {
+ c_keywords = new Vala.HashMap<string, CodeTokenType?> (str_hash, str_equal);
+
+ // ** Types: **
+ c_keywords.set ("auto", CodeTokenType.TYPE);
+ c_keywords.set ("char", CodeTokenType.TYPE);
+ c_keywords.set ("const", CodeTokenType.TYPE);
+ c_keywords.set ("double", CodeTokenType.TYPE);
+ c_keywords.set ("extern", CodeTokenType.TYPE);
+ c_keywords.set ("int", CodeTokenType.TYPE);
+ c_keywords.set ("float", CodeTokenType.TYPE);
+ c_keywords.set ("long", CodeTokenType.TYPE);
+ c_keywords.set ("register", CodeTokenType.TYPE);
+ c_keywords.set ("short", CodeTokenType.TYPE);
+ c_keywords.set ("signed", CodeTokenType.TYPE);
+ c_keywords.set ("static", CodeTokenType.TYPE);
+ c_keywords.set ("unsigned", CodeTokenType.TYPE);
+ c_keywords.set ("void", CodeTokenType.TYPE);
+ c_keywords.set ("volatile", CodeTokenType.TYPE);
+
+ c_keywords.set ("gboolean", CodeTokenType.TYPE);
+ c_keywords.set ("gpointer", CodeTokenType.TYPE);
+ c_keywords.set ("gconstpointer", CodeTokenType.TYPE);
+ c_keywords.set ("gchar", CodeTokenType.TYPE);
+ c_keywords.set ("guchar", CodeTokenType.TYPE);
+ c_keywords.set ("gint", CodeTokenType.TYPE);
+ c_keywords.set ("guint", CodeTokenType.TYPE);
+ c_keywords.set ("gshort", CodeTokenType.TYPE);
+ c_keywords.set ("gushort", CodeTokenType.TYPE);
+ c_keywords.set ("glong", CodeTokenType.TYPE);
+ c_keywords.set ("gulong", CodeTokenType.TYPE);
+ c_keywords.set ("gint8", CodeTokenType.TYPE);
+ c_keywords.set ("guint8", CodeTokenType.TYPE);
+ c_keywords.set ("gint16", CodeTokenType.TYPE);
+ c_keywords.set ("guint16", CodeTokenType.TYPE);
+ c_keywords.set ("gint32", CodeTokenType.TYPE);
+ c_keywords.set ("guint32", CodeTokenType.TYPE);
+ c_keywords.set ("gint64", CodeTokenType.TYPE);
+ c_keywords.set ("guint64", CodeTokenType.TYPE);
+ c_keywords.set ("gfloat", CodeTokenType.TYPE);
+ c_keywords.set ("gdouble", CodeTokenType.TYPE);
+ c_keywords.set ("gsize", CodeTokenType.TYPE);
+ c_keywords.set ("gssize", CodeTokenType.TYPE);
+ c_keywords.set ("goffset", CodeTokenType.TYPE);
+ c_keywords.set ("gintptr", CodeTokenType.TYPE);
+ c_keywords.set ("guintptr", CodeTokenType.TYPE);
+
+
+ // ** Literals: **
+ c_keywords.set ("NULL", CodeTokenType.LITERAL);
+ c_keywords.set ("TRUE", CodeTokenType.LITERAL);
+ c_keywords.set ("FALSE", CodeTokenType.LITERAL);
+
+
+ // ** Keywords: **
+ c_keywords.set ("break", CodeTokenType.KEYWORD);
+ c_keywords.set ("case", CodeTokenType.KEYWORD);
+ c_keywords.set ("continue", CodeTokenType.KEYWORD);
+ c_keywords.set ("default", CodeTokenType.KEYWORD);
+ c_keywords.set ("do", CodeTokenType.KEYWORD);
+ c_keywords.set ("else", CodeTokenType.KEYWORD);
+ c_keywords.set ("enum", CodeTokenType.KEYWORD);
+ c_keywords.set ("for", CodeTokenType.KEYWORD);
+ c_keywords.set ("goto", CodeTokenType.KEYWORD);
+ c_keywords.set ("if", CodeTokenType.KEYWORD);
+ c_keywords.set ("return", CodeTokenType.KEYWORD);
+ c_keywords.set ("sizeof", CodeTokenType.KEYWORD);
+ c_keywords.set ("struct", CodeTokenType.KEYWORD);
+ c_keywords.set ("switch", CodeTokenType.KEYWORD);
+ c_keywords.set ("typedef", CodeTokenType.KEYWORD);
+ c_keywords.set ("union", CodeTokenType.KEYWORD);
+ c_keywords.set ("while", CodeTokenType.KEYWORD);
+ c_keywords.set ("assert", CodeTokenType.KEYWORD);
+ }
+
+ bool enable_string_templates = false;
+ bool enable_preprocessor_define = true;
+ bool enable_preprocessor_include = true;
+ bool enable_keyword_escape = false;
+ bool enabel_verbatim_string = false;
+
+ CodeScanner scanner = new CodeScanner (source_code, enable_string_templates, enabel_verbatim_string,
+ enable_preprocessor_define, enable_preprocessor_include, enable_keyword_escape,
+ c_keywords);
+
+ return highlight_code (scanner);
+ }
+
+ /**
+ * Used to highlight C source code.
+ */
+ public Run highlight_xml (string source_code) {
+ XmlScanner scanner = new XmlScanner (source_code);
+ return highlight_code (scanner);
+ }
+
+ /**
+ * Used to highlight source code.
+ */
+ private Run highlight_code (Scanner scanner) {
+ Run code = new Run (Run.Style.MONOSPACED);
+
+ for (CodeToken token = scanner.next (); token.token_type != CodeTokenType.EOF; token = scanner.next ()) {
+ switch (token.token_type) {
+ case CodeTokenType.PREPROCESSOR:
+ Run run = new Run (Run.Style.LANG_PREPROCESSOR);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.COMMENT:
+ Run run = new Run (Run.Style.LANG_COMMENT);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.KEYWORD:
+ Run run = new Run (Run.Style.LANG_KEYWORD);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.LITERAL:
+ Run run = new Run (Run.Style.LANG_LITERAL);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.TYPE:
+ Run run = new Run (Run.Style.LANG_BASIC_TYPE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.ESCAPE:
+ Run run = new Run (Run.Style.LANG_ESCAPE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_ESCAPE:
+ Run run = new Run (Run.Style.XML_ESCAPE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_ELEMENT:
+ Run run = new Run (Run.Style.XML_ELEMENT);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_ATTRIBUTE:
+ Run run = new Run (Run.Style.XML_ATTRIBUTE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_ATTRIBUTE_VALUE:
+ Run run = new Run (Run.Style.XML_ATTRIBUTE_VALUE);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_COMMENT:
+ Run run = new Run (Run.Style.XML_COMMENT);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ case CodeTokenType.XML_CDATA:
+ Run run = new Run (Run.Style.XML_CDATA);
+ run.content.add (new Text (token.content));
+ code.content.add (run);
+ break;
+
+ default:
+ code.content.add (new Text (token.content));
+ break;
+ }
+ }
+
+ return code;
+ }
+}
+
+
diff --git a/libvaladoc/highlighter/scanner.vala b/libvaladoc/highlighter/scanner.vala
new file mode 100644
index 000000000..20eedcfb2
--- /dev/null
+++ b/libvaladoc/highlighter/scanner.vala
@@ -0,0 +1,32 @@
+/* scanner.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+using GLib;
+
+
+/**
+ * Scanner interface used to highlight source code.
+ */
+public interface Valadoc.Highlighter.Scanner : Object {
+
+ public abstract CodeToken next ();
+}
diff --git a/libvaladoc/highlighter/xmlscanner.vala b/libvaladoc/highlighter/xmlscanner.vala
new file mode 100644
index 000000000..38b87c5ee
--- /dev/null
+++ b/libvaladoc/highlighter/xmlscanner.vala
@@ -0,0 +1,374 @@
+/* xmlscanner.vala
+ *
+ * Copyright (C) 2015 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Florian Brosch <flo.brosch@gmail.com>
+ */
+
+using GLib;
+
+
+/**
+ * A cheap scanner used to highlight XML.
+ */
+public class Valadoc.Highlighter.XmlScanner : Object, Scanner {
+ private Queue<CodeToken> token_queue = new Queue<CodeToken> ();
+ private unowned string content;
+ private unowned string pos;
+
+
+ public XmlScanner (string content) {
+ this.content = content;
+ this.pos = content;
+ }
+
+ public CodeToken next () {
+ if (!token_queue.is_empty ()) {
+ return token_queue.pop_head ();
+ }
+
+ unowned string start;
+
+ for (start = pos; pos[0] != '\0'; pos = pos.next_char ()) {
+ if (pos[0] == '&') {
+ unowned string begin = pos;
+ if (queue_escape ()) {
+ return dispatch (start, begin);
+ }
+ } else if (pos[0] == '<') {
+ if (pos[1] == '/') {
+ unowned string end = pos;
+ if (queue_end_element ()) {
+ return dispatch (start, end);
+ }
+ } else if (pos[1] == '!' && pos[2] == '-' && pos[3] == '-') {
+ unowned string end = pos;
+ if (queue_comment ()) {
+ return dispatch (start, end);
+ }
+ } else if (pos[1] == '!' && pos[2] == '[' && pos[3] == 'C' && pos[4] == 'D' && pos[5] == 'A' && pos[6] == 'T' && pos[7] == 'A' && pos[8] == '[') {
+ unowned string end = pos;
+ pos = pos.offset (9);
+ token_queue.push_tail (new CodeToken (CodeTokenType.XML_CDATA, "<![CDATA["));
+ return dispatch (start, end);
+ } else {
+ unowned string end = pos;
+ if (queue_start_element (start, pos[1] == '?')) {
+ return dispatch (start, end);
+ } else {
+ continue;
+ }
+ }
+ } else if (pos[0] == ']' && pos[1] == ']' && pos[2] == '>') {
+ unowned string end = pos;
+ pos = pos.offset (3);
+ token_queue.push_tail (new CodeToken (CodeTokenType.XML_CDATA, "]]>"));
+ return dispatch (start, end);
+ }
+ }
+
+ token_queue.push_tail (new CodeToken (CodeTokenType.EOF, ""));
+ return dispatch (start, pos);
+ }
+
+ private bool queue_start_element (string dispatch_start, bool xml_decl) {
+ assert (token_queue.is_empty ());
+
+ unowned string element_start = pos;
+ if (xml_decl) {
+ pos = pos.offset (2);
+ } else {
+ pos = pos.offset (1);
+ }
+
+ skip_optional_spaces (ref pos);
+
+ if (skip_id (ref pos) == false) {
+ token_queue.clear ();
+ pos = element_start;
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ queue_token (element_start, pos, CodeTokenType.XML_ELEMENT);
+
+ if (queue_attributes () == false) {
+ token_queue.clear ();
+ pos = element_start;
+ return false;
+ }
+
+ unowned string element_end_start = pos;
+
+ if (!xml_decl && pos[0] == '>') {
+ pos = pos.offset (1);
+ } else if (!xml_decl && pos[0] == '/' && pos[1] == '>') {
+ pos = pos.offset (2);
+ } else if (xml_decl && pos[0] == '?' && pos[1] == '>') {
+ pos = pos.offset (2);
+ } else {
+ token_queue.clear ();
+ pos = element_start;
+ return false;
+ }
+
+ queue_token (element_end_start, pos, CodeTokenType.XML_ELEMENT);
+ return true;
+ }
+
+ private bool queue_attributes () {
+ while (is_id_char (pos[0])) {
+ unowned string begin = pos;
+
+ if (skip_id (ref pos) == false) {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ if (pos[0] == '=') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ queue_token (begin, pos, CodeTokenType.XML_ATTRIBUTE);
+ begin = pos;
+
+ if (pos[0] == '"') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ while (pos[0] != '"' && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '"') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ queue_token (begin, pos, CodeTokenType.XML_ATTRIBUTE_VALUE);
+ }
+
+ return true;
+ }
+
+ private bool queue_end_element () {
+ unowned string start = pos;
+ pos = pos.offset (2);
+
+ skip_optional_spaces (ref pos);
+
+ if (skip_id (ref pos) == false) {
+ pos = start;
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ if (pos[0] == '>') {
+ pos = pos.offset (1);
+ } else {
+ pos = start;
+ return false;
+ }
+
+ queue_token (start, pos, CodeTokenType.XML_ELEMENT);
+ return true;
+ }
+
+ private bool queue_escape () {
+ unowned string start = pos;
+ pos = pos.offset (1);
+
+ if (skip_id (ref pos) == false) {
+ pos = start;
+ return false;
+ }
+
+ if (pos[0] == ';') {
+ pos = pos.offset (1);
+ } else {
+ pos = start;
+ return false;
+ }
+
+ queue_token (start, pos, CodeTokenType.XML_ESCAPE);
+ return true;
+ }
+
+ private bool queue_comment () {
+ unowned string start = pos;
+ pos = pos.offset (4);
+
+ while (pos[0] != '>' && pos[0] != '\0') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '>') {
+ pos = pos.offset (1);
+ } else {
+ pos = start;
+ return false;
+ }
+
+ queue_token (start, pos, CodeTokenType.XML_COMMENT);
+ return true;
+ }
+
+ private static bool skip_id (ref unowned string pos) {
+ bool has_next_segment = true;
+ bool has_id = false;
+
+ while (has_next_segment) {
+ has_id = false;
+
+ while (is_id_char (pos[0])) {
+ pos = pos.offset (1);
+ has_id = true;
+ }
+
+ if (pos[0] == ':' && has_id) {
+ has_next_segment = true;
+ pos = pos.offset (1);
+ } else {
+ has_next_segment = false;
+ }
+ }
+
+ return has_id;
+ }
+
+ private static bool skip_optional_spaces (ref unowned string pos) {
+ bool skipped = false;
+
+ while (pos[0].isspace ()) {
+ pos = pos.offset (1);
+ skipped = true;
+ }
+
+ return skipped;
+ }
+
+ private CodeToken dispatch (string start, string end) {
+ assert (token_queue.is_empty () == false);
+
+ if (((char*) start) == ((char*) end)) {
+ return token_queue.pop_head ();
+ }
+
+ long length = start.pointer_to_offset (end);
+ string content = start.substring (0, length);
+ return new CodeToken (CodeTokenType.PLAIN, content);
+ }
+
+ private void queue_token (string start, string end, CodeTokenType token_type) {
+ long length = start.pointer_to_offset (end);
+ string content = start.substring (0, length);
+ token_queue.push_tail (new CodeToken (token_type, content));
+ }
+
+ private static inline bool is_id_char (char c) {
+ return c.isalnum () || c == '_' || c == '-';
+ }
+
+ internal static bool is_xml (string source) {
+ unowned string pos = source;
+
+ skip_optional_spaces (ref pos);
+
+ if (pos[0] == '<') {
+ // Comment:
+ if (pos.has_prefix ("<!--")) {
+ return true;
+ }
+
+ // CDATA:
+ if (pos.has_prefix ("<![CDATA[")) {
+ return true;
+ }
+
+
+ // Start Tag:
+ bool proc_instr = false;
+ pos = pos.offset (1);
+
+ if (pos[0] == '?') {
+ pos = pos.offset (1);
+ proc_instr = true;
+ }
+
+ // ID:
+ if (skip_id (ref pos) == false) {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ while (skip_id (ref pos)) {
+ if (pos[0] == '=') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+
+ if (pos[0] == '"') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ while (pos[0] != '\0' && pos[0] != '\n' && pos[0] != '"') {
+ pos = pos.offset (1);
+ }
+
+ if (pos[0] == '"') {
+ pos = pos.offset (1);
+ } else {
+ return false;
+ }
+
+ skip_optional_spaces (ref pos);
+ }
+
+ if (proc_instr && pos[0] == '?' && pos[1] == '>') {
+ return true;
+ }
+
+ if (!proc_instr && (pos[0] == '>' || (pos[0] == '/' && pos[1] == '>'))) {
+ return true;
+ }
+
+ return false;
+ } else {
+ return false;
+ }
+ }
+}
+