From 5ee15661dbc2da70927f588e310315233aff6eea Mon Sep 17 00:00:00 2001 From: murphy Date: Mon, 10 Apr 2006 03:06:50 +0000 Subject: Large update: Scanners for HTML, RHTML and Nitro-XHTML added. CSS style changes/enhancments (mainly the new background color for inline code, affects all Ruby code.) Demos and tests adjusted. Plugin: new PluginHost::default method. Scanner: - New setup method - ability to re-use a scanner - ability to keep the tokens - minor changes to token caching and string flattening Encoder: Error if token content is neither String nor Symbol. HTML encoder: - more warnings for unclosed tokens - output now UTF-8 Ruby Scanner: - bug: symbols before => now do not include =; {:foo=>bar} is valid Ruby code - try to close all open tokens - constants now all with specific namespace (for speed, I hope) Styles: new :entity/en class. Test suite now gives hinted HTML output. --- lib/coderay/scanners/html.rb | 57 ++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 20 deletions(-) (limited to 'lib/coderay/scanners/html.rb') diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 62da13b..a1efa9e 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,8 +1,8 @@ -#require 'coderay/common_patterns' - module CodeRay module Scanners # HTML Scanner + # + # $Id$ class HTML < Scanner include Streamable @@ -27,10 +27,21 @@ module CodeRay module Scanners ; /ox + PLAIN_STRING_CONTENT = { + "'" => /[^&'>\n]+/, + '"' => /[^&">\n]+/, + } + private + def setup + @state = :initial + @plain_string_content = nil + end + def scan_tokens tokens, options - - state = :initial + + state = @state + plain_string_content = @plain_string_content until eos? @@ -55,17 +66,13 @@ module CodeRay module Scanners kind = :comment elsif scan(/<\/[-\w_.:]*>/m) kind = :tag - elsif match = scan(/<[-\w_.:]*/m) + elsif match = scan(/<[-\w_.:]*>?/m) kind = :tag - if match?(/>/) - match << getch - else - state = :attribute - end + state = :attribute unless match[-1] == ?> elsif scan(/[^<>&]+/) kind = :plain elsif scan(/#{ENTITY}/ox) - kind = :char + kind = :entity elsif scan(/>/) kind = :error else @@ -79,6 +86,8 @@ module CodeRay module Scanners elsif scan(/#{ATTR_NAME}/o) kind = :attribute_name state = :attribute_equal + else + getch end when :attribute_equal @@ -98,29 +107,32 @@ module CodeRay module Scanners if scan(/#{ATTR_VALUE_UNQUOTED}/o) kind = :attribute_value state = :attribute - elsif scan(/"/) + elsif match = scan(/["']/) tokens << [:open, :string] state = :attribute_value_string + plain_string_content = PLAIN_STRING_CONTENT[match] kind = :delimiter elsif scan(/#{TAG_END}/o) kind = :tag state = :initial + else + getch end when :attribute_value_string - if scan(/[^"&\n]+/) + if scan(plain_string_content) kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] + elsif scan(/['"]/) + tokens << [matched, :delimiter] tokens << [:close, :string] state = :attribute next elsif scan(/#{ENTITY}/ox) - kind = :char - elsif match(/\n/) + kind = :entity + elsif match(/[\n>]/) tokens << [:close, :string] - state = :attribute - next + kind = error + state = :initial end else @@ -136,10 +148,15 @@ module CodeRay module Scanners [[match, kind], line], tokens end raise_inspect 'Empty token', tokens unless match - + tokens << [match, kind] end + if options[:keep_state] + @state = state + @plain_string_content = plain_string_content + end + tokens end -- cgit v1.2.1