diff options
Diffstat (limited to 'libvaladoc/markupreader.vala')
-rw-r--r-- | libvaladoc/markupreader.vala | 337 |
1 files changed, 337 insertions, 0 deletions
diff --git a/libvaladoc/markupreader.vala b/libvaladoc/markupreader.vala new file mode 100644 index 000000000..9f24177e4 --- /dev/null +++ b/libvaladoc/markupreader.vala @@ -0,0 +1,337 @@ +/* markupreader.vala + * + * Copyright (C) 2008-2009 Jürg Billeter + * Copyright (C) 2011 Florian Brosch + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: + * Jürg Billeter <j@bitron.ch> + */ + +using GLib; +using Gee; + + +/** + * Simple reader for a subset of XML. + */ +public class Valadoc.MarkupReader : Object { + public string filename { + private set; + get; + } + + public string name { + private set; + get; + } + + public string content { + private set; + get; + } + + private MappedFile mapped_file; + + private string[] lines; + private char* begin; + private char* current; + private char* end; + + private int line; + private int column; + + private Map<string, string> attributes = new HashMap<string, string> (); + private bool empty_element; + + private ErrorReporter reporter; + + public MarkupReader.from_string (string filename, string content, ErrorReporter reporter) { + this.filename = filename; + this.reporter = reporter; + + lines = content.split ("\n"); + begin = content; + end = begin + content.length; + current = begin; + + column = 1; + line = 1; + } + + public MarkupReader (string filename, ErrorReporter reporter) { + this.filename = filename; + this.reporter = reporter; + + try { + mapped_file = new MappedFile (filename, false); + begin = mapped_file.get_contents (); + lines = ((string) begin).split ("\n"); + end = begin + mapped_file.get_length (); + + current = begin; + + line = 1; + column = 1; + } catch (FileError e) { + reporter.simple_error (null, "Unable to map file '%s': %s", filename, e.message); + } + } + + public string? get_line_content (int line_nr) { + if (this.lines.length > line_nr) { + return this.lines[line_nr]; + } + + return null; + } + + public string? get_attribute (string attr) { + return attributes[attr]; + } + + /* + * Returns a copy of the current attributes. + * + * @return map of current attributes + */ + public Map<string,string> get_attributes () { + var result = new HashMap<string, string> (); + foreach (var key in attributes.keys) { + result.set (key, attributes.get (key)); + } + return result; + } + + private string read_name () { + char* begin = current; + while (current < end) { + if (current[0] == ' ' || current[0] == '\t' || current[0] == '>' + || current[0] == '/' || current[0] == '=' || current[0] == '\n') { + break; + } + unichar u = ((string) current).get_char_validated ((long) (end - current)); + if (u != (unichar) (-1)) { + current += u.to_utf8 (null); + } else { + reporter.simple_error ("%s:%d".printf (filename, line), + "invalid UTF-8 character"); + } + } + if (current == begin) { + // syntax error: invalid name + } + return ((string) begin).substring (0, (int) (current - begin)); + } + + public MarkupTokenType read_token (out MarkupSourceLocation token_begin, out MarkupSourceLocation token_end) { + attributes.clear (); + + if (empty_element) { + empty_element = false; + token_begin = MarkupSourceLocation (begin, line, column); + token_end = MarkupSourceLocation (begin, line, column); + return MarkupTokenType.END_ELEMENT; + } + + content = null; + name = null; + + space (); + + MarkupTokenType type = MarkupTokenType.NONE; + char* begin = current; + token_begin = MarkupSourceLocation (begin, line, column); + + if (current >= end) { + type = MarkupTokenType.EOF; + } else if (current[0] == '<') { + current++; + if (current >= end) { + // error + } else if (current[0] == '?') { + // processing instruction + } else if (current[0] == '!') { + // comment or doctype + current++; + if (current < end - 1 && current[0] == '-' && current[1] == '-') { + // comment + current += 2; + while (current < end - 2) { + if (current[0] == '-' && current[1] == '-' && current[2] == '>') { + // end of comment + current += 3; + break; + } else if (current[0] == '\n') { + line++; + column = 0; + } + current++; + } + + // ignore comment, read next token + return read_token (out token_begin, out token_end); + } + } else if (current[0] == '/') { + type = MarkupTokenType.END_ELEMENT; + current++; + name = read_name (); + if (current >= end || current[0] != '>') { + // error + } + current++; + } else { + type = MarkupTokenType.START_ELEMENT; + name = read_name (); + space (); + while (current < end && current[0] != '>' && current[0] != '/') { + string attr_name = read_name (); + if (current >= end || current[0] != '=') { + // error + } + current++; + // FIXME allow single quotes + if (current >= end || current[0] != '"') { + // error + } + current++; + + string attr_value = text ('"', false); + + if (current >= end || current[0] != '"') { + // error + } + current++; + attributes.set (attr_name, attr_value); + space (); + } + if (current[0] == '/') { + empty_element = true; + current++; + space (); + } else { + empty_element = false; + } + if (current >= end || current[0] != '>') { + // error + } + current++; + } + } else { + space (); + + if (current[0] != '<') { + content = text ('<', true); + } else { + // no text + // read next token + return read_token (out token_begin, out token_end); + } + + type = MarkupTokenType.TEXT; + } + + token_end = MarkupSourceLocation (current, line, column - 1); + + return type; + } + + private string text (char end_char, bool rm_trailing_whitespace) { + StringBuilder content = new StringBuilder (); + char* text_begin = current; + char* last_linebreak = current; + + while (current < end && current[0] != end_char) { + unichar u = ((string) current).get_char_validated ((long) (end - current)); + if (u == (unichar) (-1)) { + reporter.simple_error ("%s:%d".printf (filename, line), + "invalid UTF-8 character"); + } else if (u == '&') { + char* next_pos = current + u.to_utf8 (null); + if (((string) next_pos).has_prefix ("amp;")) { + content.append (((string) text_begin).substring (0, (int) (current - text_begin))); + content.append_c ('&'); + current += 5; + text_begin = current; + } else if (((string) next_pos).has_prefix ("quot;")) { + content.append (((string) text_begin).substring (0, (int) (current - text_begin))); + content.append_c ('"'); + current += 6; + text_begin = current; + } else if (((string) next_pos).has_prefix ("apos;")) { + content.append (((string) text_begin).substring (0, (int) (current - text_begin))); + content.append_c ('\''); + current += 6; + text_begin = current; + } else if (((string) next_pos).has_prefix ("lt;")) { + content.append (((string) text_begin).substring (0, (int) (current - text_begin))); + content.append_c ('<'); + current += 4; + text_begin = current; + } else if (((string) next_pos).has_prefix ("gt;")) { + content.append (((string) text_begin).substring (0, (int) (current - text_begin))); + content.append_c ('>'); + current += 4; + text_begin = current; + } else if (((string) next_pos).has_prefix ("percnt;")) { + content.append (((string) text_begin).substring (0, (int) (current - text_begin))); + content.append_c ('>'); + current += 8; + text_begin = current; + } else { + current += u.to_utf8 (null); + } + } else { + if (u == '\n') { + line++; + column = 0; + last_linebreak = current; + } + + current += u.to_utf8 (null); + column++; + } + } + + if (text_begin != current) { + content.append (((string) text_begin).substring (0, (int) (current - text_begin))); + } + + column += (int) (current - last_linebreak); + + // Removes trailing whitespace + if (rm_trailing_whitespace) { + char* str_pos = ((char*)content.str) + content.len; + for (str_pos--; str_pos > ((char*)content.str) && str_pos[0].isspace(); str_pos--); + content.erase ((ssize_t) (str_pos-((char*) content.str) + 1), -1); + } + + return content.str; + } + + private void space () { + while (current < end && current[0].isspace ()) { + if (current[0] == '\n') { + line++; + column = 0; + } + current++; + column++; + } + } +} + + |