summaryrefslogtreecommitdiff
path: root/libvaladoc/markupreader.vala
diff options
context:
space:
mode:
Diffstat (limited to 'libvaladoc/markupreader.vala')
-rw-r--r--libvaladoc/markupreader.vala337
1 files changed, 337 insertions, 0 deletions
diff --git a/libvaladoc/markupreader.vala b/libvaladoc/markupreader.vala
new file mode 100644
index 000000000..9f24177e4
--- /dev/null
+++ b/libvaladoc/markupreader.vala
@@ -0,0 +1,337 @@
+/* markupreader.vala
+ *
+ * Copyright (C) 2008-2009 Jürg Billeter
+ * Copyright (C) 2011 Florian Brosch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Jürg Billeter <j@bitron.ch>
+ */
+
+using GLib;
+using Gee;
+
+
+/**
+ * Simple reader for a subset of XML.
+ */
+public class Valadoc.MarkupReader : Object {
+ public string filename {
+ private set;
+ get;
+ }
+
+ public string name {
+ private set;
+ get;
+ }
+
+ public string content {
+ private set;
+ get;
+ }
+
+ private MappedFile mapped_file;
+
+ private string[] lines;
+ private char* begin;
+ private char* current;
+ private char* end;
+
+ private int line;
+ private int column;
+
+ private Map<string, string> attributes = new HashMap<string, string> ();
+ private bool empty_element;
+
+ private ErrorReporter reporter;
+
+ public MarkupReader.from_string (string filename, string content, ErrorReporter reporter) {
+ this.filename = filename;
+ this.reporter = reporter;
+
+ lines = content.split ("\n");
+ begin = content;
+ end = begin + content.length;
+ current = begin;
+
+ column = 1;
+ line = 1;
+ }
+
+ public MarkupReader (string filename, ErrorReporter reporter) {
+ this.filename = filename;
+ this.reporter = reporter;
+
+ try {
+ mapped_file = new MappedFile (filename, false);
+ begin = mapped_file.get_contents ();
+ lines = ((string) begin).split ("\n");
+ end = begin + mapped_file.get_length ();
+
+ current = begin;
+
+ line = 1;
+ column = 1;
+ } catch (FileError e) {
+ reporter.simple_error (null, "Unable to map file '%s': %s", filename, e.message);
+ }
+ }
+
+ public string? get_line_content (int line_nr) {
+ if (this.lines.length > line_nr) {
+ return this.lines[line_nr];
+ }
+
+ return null;
+ }
+
+ public string? get_attribute (string attr) {
+ return attributes[attr];
+ }
+
+ /*
+ * Returns a copy of the current attributes.
+ *
+ * @return map of current attributes
+ */
+ public Map<string,string> get_attributes () {
+ var result = new HashMap<string, string> ();
+ foreach (var key in attributes.keys) {
+ result.set (key, attributes.get (key));
+ }
+ return result;
+ }
+
+ private string read_name () {
+ char* begin = current;
+ while (current < end) {
+ if (current[0] == ' ' || current[0] == '\t' || current[0] == '>'
+ || current[0] == '/' || current[0] == '=' || current[0] == '\n') {
+ break;
+ }
+ unichar u = ((string) current).get_char_validated ((long) (end - current));
+ if (u != (unichar) (-1)) {
+ current += u.to_utf8 (null);
+ } else {
+ reporter.simple_error ("%s:%d".printf (filename, line),
+ "invalid UTF-8 character");
+ }
+ }
+ if (current == begin) {
+ // syntax error: invalid name
+ }
+ return ((string) begin).substring (0, (int) (current - begin));
+ }
+
+ public MarkupTokenType read_token (out MarkupSourceLocation token_begin, out MarkupSourceLocation token_end) {
+ attributes.clear ();
+
+ if (empty_element) {
+ empty_element = false;
+ token_begin = MarkupSourceLocation (begin, line, column);
+ token_end = MarkupSourceLocation (begin, line, column);
+ return MarkupTokenType.END_ELEMENT;
+ }
+
+ content = null;
+ name = null;
+
+ space ();
+
+ MarkupTokenType type = MarkupTokenType.NONE;
+ char* begin = current;
+ token_begin = MarkupSourceLocation (begin, line, column);
+
+ if (current >= end) {
+ type = MarkupTokenType.EOF;
+ } else if (current[0] == '<') {
+ current++;
+ if (current >= end) {
+ // error
+ } else if (current[0] == '?') {
+ // processing instruction
+ } else if (current[0] == '!') {
+ // comment or doctype
+ current++;
+ if (current < end - 1 && current[0] == '-' && current[1] == '-') {
+ // comment
+ current += 2;
+ while (current < end - 2) {
+ if (current[0] == '-' && current[1] == '-' && current[2] == '>') {
+ // end of comment
+ current += 3;
+ break;
+ } else if (current[0] == '\n') {
+ line++;
+ column = 0;
+ }
+ current++;
+ }
+
+ // ignore comment, read next token
+ return read_token (out token_begin, out token_end);
+ }
+ } else if (current[0] == '/') {
+ type = MarkupTokenType.END_ELEMENT;
+ current++;
+ name = read_name ();
+ if (current >= end || current[0] != '>') {
+ // error
+ }
+ current++;
+ } else {
+ type = MarkupTokenType.START_ELEMENT;
+ name = read_name ();
+ space ();
+ while (current < end && current[0] != '>' && current[0] != '/') {
+ string attr_name = read_name ();
+ if (current >= end || current[0] != '=') {
+ // error
+ }
+ current++;
+ // FIXME allow single quotes
+ if (current >= end || current[0] != '"') {
+ // error
+ }
+ current++;
+
+ string attr_value = text ('"', false);
+
+ if (current >= end || current[0] != '"') {
+ // error
+ }
+ current++;
+ attributes.set (attr_name, attr_value);
+ space ();
+ }
+ if (current[0] == '/') {
+ empty_element = true;
+ current++;
+ space ();
+ } else {
+ empty_element = false;
+ }
+ if (current >= end || current[0] != '>') {
+ // error
+ }
+ current++;
+ }
+ } else {
+ space ();
+
+ if (current[0] != '<') {
+ content = text ('<', true);
+ } else {
+ // no text
+ // read next token
+ return read_token (out token_begin, out token_end);
+ }
+
+ type = MarkupTokenType.TEXT;
+ }
+
+ token_end = MarkupSourceLocation (current, line, column - 1);
+
+ return type;
+ }
+
+ private string text (char end_char, bool rm_trailing_whitespace) {
+ StringBuilder content = new StringBuilder ();
+ char* text_begin = current;
+ char* last_linebreak = current;
+
+ while (current < end && current[0] != end_char) {
+ unichar u = ((string) current).get_char_validated ((long) (end - current));
+ if (u == (unichar) (-1)) {
+ reporter.simple_error ("%s:%d".printf (filename, line),
+ "invalid UTF-8 character");
+ } else if (u == '&') {
+ char* next_pos = current + u.to_utf8 (null);
+ if (((string) next_pos).has_prefix ("amp;")) {
+ content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
+ content.append_c ('&');
+ current += 5;
+ text_begin = current;
+ } else if (((string) next_pos).has_prefix ("quot;")) {
+ content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
+ content.append_c ('"');
+ current += 6;
+ text_begin = current;
+ } else if (((string) next_pos).has_prefix ("apos;")) {
+ content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
+ content.append_c ('\'');
+ current += 6;
+ text_begin = current;
+ } else if (((string) next_pos).has_prefix ("lt;")) {
+ content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
+ content.append_c ('<');
+ current += 4;
+ text_begin = current;
+ } else if (((string) next_pos).has_prefix ("gt;")) {
+ content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
+ content.append_c ('>');
+ current += 4;
+ text_begin = current;
+ } else if (((string) next_pos).has_prefix ("percnt;")) {
+ content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
+ content.append_c ('>');
+ current += 8;
+ text_begin = current;
+ } else {
+ current += u.to_utf8 (null);
+ }
+ } else {
+ if (u == '\n') {
+ line++;
+ column = 0;
+ last_linebreak = current;
+ }
+
+ current += u.to_utf8 (null);
+ column++;
+ }
+ }
+
+ if (text_begin != current) {
+ content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
+ }
+
+ column += (int) (current - last_linebreak);
+
+ // Removes trailing whitespace
+ if (rm_trailing_whitespace) {
+ char* str_pos = ((char*)content.str) + content.len;
+ for (str_pos--; str_pos > ((char*)content.str) && str_pos[0].isspace(); str_pos--);
+ content.erase ((ssize_t) (str_pos-((char*) content.str) + 1), -1);
+ }
+
+ return content.str;
+ }
+
+ private void space () {
+ while (current < end && current[0].isspace ()) {
+ if (current[0] == '\n') {
+ line++;
+ column = 0;
+ }
+ current++;
+ column++;
+ }
+ }
+}
+
+