From 26a8e5a0388199ac686db28d631b05a5b5aa02e1 Mon Sep 17 00:00:00 2001 From: murphy Date: Tue, 11 Jul 2006 05:37:50 +0000 Subject: Changed error handling of all scanners: :error tokens are OK now, even in debug mode, but token kind is nil unless assigned. Small fixes for C and Ruby scanners. Renamed local variable type to kind in Ruby scanner. Improved RHTML scanner to recognize -%> as delimiter. HTML encoder: improved handling of malformed token strings. Fixed PluginHost#inspect including docu. Scanner#raise_inspect also shows state if given. --- lib/coderay/scanners/_map.rb | 29 +- lib/coderay/scanners/c.rb | 318 +++++++------- lib/coderay/scanners/delphi.rb | 260 ++++++------ lib/coderay/scanners/html.rb | 341 +++++++-------- lib/coderay/scanners/nitro_html.rb | 125 ------ lib/coderay/scanners/nitro_xhtml.rb | 130 ++++++ lib/coderay/scanners/rhtml.rb | 138 ++++--- lib/coderay/scanners/ruby.rb | 804 ++++++++++++++++++------------------ 8 files changed, 1090 insertions(+), 1055 deletions(-) delete mode 100644 lib/coderay/scanners/nitro_html.rb create mode 100644 lib/coderay/scanners/nitro_xhtml.rb (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index 6268a6c..1c5fc89 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -1,14 +1,15 @@ -module CodeRay -module Scanners - - map :cpp => :c, - :plain => :plaintext, - :pascal => :delphi, - :irb => :ruby, - :xml => :html, - :xhtml => :nitro_html - - default :plain - -end -end +module CodeRay +module Scanners + + map :cpp => :c, + :plain => :plaintext, + :pascal => :delphi, + :irb => :ruby, + :xml => :html, + :xhtml => :nitro_xhtml, + :nitro => :nitro_xhtml + + default :plain + +end +end diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index 66b8de1..be113d0 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -1,155 +1,163 @@ -module CodeRay -module Scanners - - class C < Scanner - - register_for :c - - RESERVED_WORDS = [ - 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', - 'for', 'goto', 'if', 'return', 'switch', 'while', - 'struct', 'union', 'enum', 'typedef', - 'static', 'register', 'auto', 'extern', - 'sizeof', - 'volatile', 'const', # C89 - 'inline', 'restrict', # C99 - ] - - PREDEFINED_TYPES = [ - 'int', 'long', 'short', 'char', 'void', - 'signed', 'unsigned', 'float', 'double', - 'bool', 'complex', # C99 - ] - - PREDEFINED_CONSTANTS = [ - 'EOF', 'NULL', - 'true', 'false', # C99 - ] - - IDENT_KIND = WordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(PREDEFINED_TYPES, :pre_type). - add(PREDEFINED_CONSTANTS, :pre_constant) - - ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x - UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x - - def scan_tokens tokens, options - - state = :initial - - until eos? - - kind = :error - match = nil - - case state - - when :initial - - if scan(/ \s+ | \\\n /x) - kind = :space - - elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) - kind = :comment - - elsif match = scan(/ \# \s* if \s* 0 /x) - match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? - kind = :comment - - elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - if kind == :ident and check(/:(?!:)/) - match << scan(/:/) - kind = :label - end - - elsif match = scan(/L?"/) - tokens << [:open, :string] - if match[0] == ?L - tokens << ['L', :modifier] - match = '"' - end - state = :string - kind = :delimiter - - elsif scan(/#\s*(\w*)/) - kind = :preprocessor # FIXME multiline preprocs - state = :include_expected if self[1] == 'include' - - elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) - kind = :char - - elsif scan(/0[xX][0-9A-Fa-f]+/) - kind = :hex - - elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) - kind = :oct - - elsif scan(/(?:\d+)(?![.eEfF])/) - kind = :integer - - elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) - kind = :float - - else - getch - end - - when :string - if scan(/[^\\"]+/) - kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - elsif scan(/ \\ | $ /x) - kind = :error - state = :initial - else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens - end - - when :include_expected - if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) - kind = :include - state = :initial - - elsif match = scan(/\s+/) - kind = :space - state = :initial if match.index ?\n - - else - getch - - end - - else - raise_inspect 'Unknown state', tokens - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - - end - - tokens - end - - end - -end -end +module CodeRay +module Scanners + + class C < Scanner + + register_for :c + + RESERVED_WORDS = [ + 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', + 'for', 'goto', 'if', 'return', 'switch', 'while', + 'struct', 'union', 'enum', 'typedef', + 'static', 'register', 'auto', 'extern', + 'sizeof', + 'volatile', 'const', # C89 + 'inline', 'restrict', # C99 + ] + + PREDEFINED_TYPES = [ + 'int', 'long', 'short', 'char', 'void', + 'signed', 'unsigned', 'float', 'double', + 'bool', 'complex', # C99 + ] + + PREDEFINED_CONSTANTS = [ + 'EOF', 'NULL', + 'true', 'false', # C99 + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + kind = :comment + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + match << scan(/:/) + kind = :label + end + + elsif match = scan(/L?"/) + tokens << [:open, :string] + if match[0] == ?L + tokens << ['L', :modifier] + match = '"' + end + state = :string + kind = :delimiter + + elsif scan(/#\s*(\w*)/) + kind = :preprocessor # FIXME multiline preprocs + state = :include_expected if self[1] == 'include' + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + kind = :error + + end + + when :string + if scan(/[^\\\n"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + tokens << [:close, :string] + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + when :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + getch + kind = :error + + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + if state == :string + tokens << [:close, :string] + end + + tokens + end + + end + +end +end diff --git a/lib/coderay/scanners/delphi.rb b/lib/coderay/scanners/delphi.rb index d9d9e1d..c141874 100644 --- a/lib/coderay/scanners/delphi.rb +++ b/lib/coderay/scanners/delphi.rb @@ -1,129 +1,131 @@ -module CodeRay -module Scanners - - class Delphi < Scanner - - register_for :delphi - - RESERVED_WORDS = [ - 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', - 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do', - 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization', - 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', - 'inherited', 'initialization', 'inline', 'interface', 'is', 'label', - 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed', - 'procedure', 'program', 'property', 'raise', 'record', 'repeat', - 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar', - 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', - 'xor', 'on' - ] - - DIRECTIVES = [ - 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl', - 'contains', 'deprecated', 'dispid', 'dynamic', 'export', - 'external', 'far', 'forward', 'implements', 'local', - 'near', 'nodefault', 'on', 'overload', 'override', - 'package', 'pascal', 'platform', 'private', 'protected', 'public', - 'published', 'read', 'readonly', 'register', 'reintroduce', - 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', - 'virtual', 'write', 'writeonly' - ] - - IDENT_KIND = CaseIgnoringWordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(DIRECTIVES, :directive) - - def scan_tokens tokens, options - - state = :initial - - until eos? - - kind = :error - match = nil - - if state == :initial - - if scan(/ \s+ /x) - kind = :space - - elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) - kind = :preprocessor - - elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) - kind = :comment - - elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - - elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) - tokens << [:open, :char] - tokens << ["'", :delimiter] - tokens << [self[1], :content] - tokens << ["'", :delimiter] - tokens << [:close, :char] - next - - elsif match = scan(/ ' /x) - tokens << [:open, :string] - state = :string - kind = :delimiter - - elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) - kind = :char - - elsif scan(/ \$ [0-9A-Fa-f]+ /x) - kind = :hex - - elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) - kind = :integer - - elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) - kind = :float - - else - getch - end - - elsif state == :string - if scan(/[^\n']+/) - kind = :content - elsif scan(/''/) - kind = :char - elsif scan(/'/) - tokens << ["'", :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/\n/) - state = :initial - else - raise "else case \' reached; %p not handled." % peek(1), tokens - end - - else - raise 'else-case reached', tokens - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - - end - - tokens - end - - end - -end -end +module CodeRay +module Scanners + + class Delphi < Scanner + + register_for :delphi + + RESERVED_WORDS = [ + 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', + 'const', 'constructor', 'destructor', 'dispinterface', 'div', 'do', + 'downto', 'else', 'end', 'except', 'exports', 'file', 'finalization', + 'finally', 'for', 'function', 'goto', 'if', 'implementation', 'in', + 'inherited', 'initialization', 'inline', 'interface', 'is', 'label', + 'library', 'mod', 'nil', 'not', 'object', 'of', 'or', 'out', 'packed', + 'procedure', 'program', 'property', 'raise', 'record', 'repeat', + 'resourcestring', 'set', 'shl', 'shr', 'string', 'then', 'threadvar', + 'to', 'try', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', + 'xor', 'on' + ] + + DIRECTIVES = [ + 'absolute', 'abstract', 'assembler', 'at', 'automated', 'cdecl', + 'contains', 'deprecated', 'dispid', 'dynamic', 'export', + 'external', 'far', 'forward', 'implements', 'local', + 'near', 'nodefault', 'on', 'overload', 'override', + 'package', 'pascal', 'platform', 'private', 'protected', 'public', + 'published', 'read', 'readonly', 'register', 'reintroduce', + 'requires', 'resident', 'safecall', 'stdcall', 'stored', 'varargs', + 'virtual', 'write', 'writeonly' + ] + + IDENT_KIND = CaseIgnoringWordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(DIRECTIVES, :directive) + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + if state == :initial + + if scan(/ \s+ /x) + kind = :space + + elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) + kind = :preprocessor + + elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) + kind = :comment + + elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + + elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) + tokens << [:open, :char] + tokens << ["'", :delimiter] + tokens << [self[1], :content] + tokens << ["'", :delimiter] + tokens << [:close, :char] + next + + elsif match = scan(/ ' /x) + tokens << [:open, :string] + state = :string + kind = :delimiter + + elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) + kind = :char + + elsif scan(/ \$ [0-9A-Fa-f]+ /x) + kind = :hex + + elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) + kind = :integer + + elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) + kind = :float + + else + kind = :error + getch + + end + + elsif state == :string + if scan(/[^\n']+/) + kind = :content + elsif scan(/''/) + kind = :char + elsif scan(/'/) + tokens << ["'", :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/\n/) + state = :initial + else + raise "else case \' reached; %p not handled." % peek(1), tokens + end + + else + raise 'else-case reached', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + tokens + end + + end + +end +end diff --git a/lib/coderay/scanners/html.rb b/lib/coderay/scanners/html.rb index 7cdc07e..181e5d3 100644 --- a/lib/coderay/scanners/html.rb +++ b/lib/coderay/scanners/html.rb @@ -1,167 +1,174 @@ -module CodeRay -module Scanners - - # HTML Scanner - # - # $Id$ - class HTML < Scanner - - include Streamable - register_for :html - - ATTR_NAME = /[\w.:-]+/ - ATTR_VALUE_UNQUOTED = ATTR_NAME - TAG_END = /\/?>/ - HEX = /[0-9a-fA-F]/ - ENTITY = / - & - (?: - \w+ - | - \# - (?: - \d+ - | - x#{HEX}+ - ) - ) - ; - /ox - - PLAIN_STRING_CONTENT = { - "'" => /[^&'>\n]+/, - '"' => /[^&">\n]+/, - } - - private - def setup - @state = :initial - @plain_string_content = nil - end - - def scan_tokens tokens, options - - state = @state - plain_string_content = @plain_string_content - - until eos? - - kind = :error - match = nil - - if scan(/\s+/m) - kind = :space - - else - - case state - - when :initial - if scan(//m) - kind = :comment - elsif scan(//m) - kind = :preprocessor - elsif scan(/<\?xml.*?\?>/m) - kind = :preprocessor - elsif scan(/<\?.*?\?>|<%.*?%>/m) - kind = :comment - elsif scan(/<\/[-\w_.:]*>/m) - kind = :tag - elsif match = scan(/<[-\w_.:]*>?/m) - kind = :tag - state = :attribute unless match[-1] == ?> - elsif scan(/[^<>&]+/) - kind = :plain - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/[>&]/) - kind = :error - else - raise_inspect '[BUG] else-case reached with state %p' % [state], tokens - end - - when :attribute - if scan(/#{TAG_END}/) - kind = :tag - state = :initial - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name - state = :attribute_equal - else - getch - end - - when :attribute_equal - if scan(/=/) - kind = :operator - state = :attribute_value - elsif scan(/#{ATTR_NAME}/o) - kind = :attribute_name - elsif scan(/#{TAG_END}/o) - kind = :tag - state = :initial - elsif scan(/./) - state = :attribute - end - - when :attribute_value - if scan(/#{ATTR_VALUE_UNQUOTED}/o) - kind = :attribute_value - state = :attribute - elsif match = scan(/["']/) - tokens << [:open, :string] - state = :attribute_value_string - plain_string_content = PLAIN_STRING_CONTENT[match] - kind = :delimiter - elsif scan(/#{TAG_END}/o) - kind = :tag - state = :initial - else - getch - end - - when :attribute_value_string - if scan(plain_string_content) - kind = :content - elsif scan(/['"]/) - tokens << [matched, :delimiter] - tokens << [:close, :string] - state = :attribute - next - elsif scan(/#{ENTITY}/ox) - kind = :entity - elsif scan(/[\n>]/) - tokens << [:close, :string] - kind = :error - state = :initial - end - - else - raise_inspect 'Unknown state: %p' % [state], tokens - - end - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - end - - if options[:keep_state] - @state = state - @plain_string_content = plain_string_content - end - - tokens - end - - end - -end -end +module CodeRay +module Scanners + + # HTML Scanner + # + # $Id$ + class HTML < Scanner + + include Streamable + register_for :html + + ATTR_NAME = /[\w.:-]+/ + ATTR_VALUE_UNQUOTED = ATTR_NAME + TAG_END = /\/?>/ + HEX = /[0-9a-fA-F]/ + ENTITY = / + & + (?: + \w+ + | + \# + (?: + \d+ + | + x#{HEX}+ + ) + ) + ; + /ox + + PLAIN_STRING_CONTENT = { + "'" => /[^&'>\n]+/, + '"' => /[^&">\n]+/, + } + + def reset + super + @state = :initial + end + + private + def setup + @state = :initial + @plain_string_content = nil + end + + def scan_tokens tokens, options + + state = @state + plain_string_content = @plain_string_content + + until eos? + + kind = nil + match = nil + + if scan(/\s+/m) + kind = :space + + else + + case state + + when :initial + if scan(//m) + kind = :comment + elsif scan(//m) + kind = :preprocessor + elsif scan(/<\?xml.*?\?>/m) + kind = :preprocessor + elsif scan(/<\?.*?\?>|<%.*?%>/m) + kind = :comment + elsif scan(/<\/[-\w_.:]*>/m) + kind = :tag + elsif match = scan(/<[-\w_.:]+>?/m) + kind = :tag + state = :attribute unless match[-1] == ?> + elsif scan(/[^<>&]+/) + kind = :plain + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[<>&]/) + kind = :error + else + raise_inspect '[BUG] else-case reached with state %p' % [state], tokens + end + + when :attribute + if scan(/#{TAG_END}/) + kind = :tag + state = :initial + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + state = :attribute_equal + else + kind = :error + getch + end + + when :attribute_equal + if scan(/=/) + kind = :operator + state = :attribute_value + elsif scan(/#{ATTR_NAME}/o) + kind = :attribute_name + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + elsif scan(/./) + state = :attribute + end + + when :attribute_value + if scan(/#{ATTR_VALUE_UNQUOTED}/o) + kind = :attribute_value + state = :attribute + elsif match = scan(/["']/) + tokens << [:open, :string] + state = :attribute_value_string + plain_string_content = PLAIN_STRING_CONTENT[match] + kind = :delimiter + elsif scan(/#{TAG_END}/o) + kind = :tag + state = :initial + else + kind = :error + getch + end + + when :attribute_value_string + if scan(plain_string_content) + kind = :content + elsif scan(/['"]/) + tokens << [matched, :delimiter] + tokens << [:close, :string] + state = :attribute + next + elsif scan(/#{ENTITY}/ox) + kind = :entity + elsif scan(/[\n>]/) + tokens << [:close, :string] + kind = :error + state = :initial + end + + else + raise_inspect 'Unknown state: %p' % [state], tokens + + end + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens, state + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + end + + if options[:keep_state] + @state = state + @plain_string_content = plain_string_content + end + + tokens + end + + end + +end +end diff --git a/lib/coderay/scanners/nitro_html.rb b/lib/coderay/scanners/nitro_html.rb deleted file mode 100644 index 5955195..0000000 --- a/lib/coderay/scanners/nitro_html.rb +++ /dev/null @@ -1,125 +0,0 @@ -module CodeRay -module Scanners - - load :html - load :ruby - - # RHTML Scanner - # - # $Id$ - class NitroHTML < Scanner - - include Streamable - register_for :nitro_html - - NITRO_RUBY_BLOCK = / - <\?r - (?> - [^\?]* - (?> \?(?!>) [^\?]* )* - ) - (?: \?> )? - | - - (?> - [^<]* - (?> <(?!\/ruby>) [^<]* )* - ) - (?: <\/ruby> )? - | - <% - (?> - [^%]* - (?> %(?!>) [^%]* )* - ) - (?: %> )? - /mx - - NITRO_VALUE_BLOCK = / - \# - (?: - \{ - [^{}]* - (?> - \{ [^}]* \} - (?> [^{}]* ) - )* - \}? - | \| [^|]* \|? - | \( [^)]* \)? - | \[ [^\]]* \]? - | \\ [^\\]* \\? - ) - /x - - NITRO_ENTITY = / - % (?: \#\d+ | \w+ ) ; - / - - START_OF_RUBY = / - (?=[<\#%]) - < (?: \?r | % | ruby> ) - | \# [{(|] - | % (?: \#\d+ | \w+ ) ; - /x - - CLOSING_PAREN = Hash.new do |h, p| - h[p] = p - end.update( { - '(' => ')', - '[' => ']', - '{' => '}', - } ) - - private - - def setup - @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true - @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true - end - - def scan_tokens tokens, options - - until eos? - - if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? - @html_scanner.tokenize match - - elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) - start_tag = match[0,2] - delimiter = CLOSING_PAREN[start_tag[1,1]] - end_tag = match[-1,1] == delimiter ? delimiter : '' - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -1 - end_tag.size] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) - start_tag = '' ? '?>' : '' - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -(end_tag.size)-1] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - elsif entity = scan(/#{NITRO_ENTITY}/o) - tokens << [entity, :entity] - - else - raise_inspect 'else-case reached!', tokens - end - - end - - tokens - - end - - end - -end -end diff --git a/lib/coderay/scanners/nitro_xhtml.rb b/lib/coderay/scanners/nitro_xhtml.rb new file mode 100644 index 0000000..baef162 --- /dev/null +++ b/lib/coderay/scanners/nitro_xhtml.rb @@ -0,0 +1,130 @@ +module CodeRay +module Scanners + + load :html + load :ruby + + # Nitro XHTML Scanner + # + # $Id$ + class NitroXHTML < Scanner + + include Streamable + register_for :nitro_xhtml + + NITRO_RUBY_BLOCK = / + <\?r + (?> + [^\?]* + (?> \?(?!>) [^\?]* )* + ) + (?: \?> )? + | + + (?> + [^<]* + (?> <(?!\/ruby>) [^<]* )* + ) + (?: <\/ruby> )? + | + <% + (?> + [^%]* + (?> %(?!>) [^%]* )* + ) + (?: %> )? + /mx + + NITRO_VALUE_BLOCK = / + \# + (?: + \{ + [^{}]* + (?> + \{ [^}]* \} + (?> [^{}]* ) + )* + \}? + | \| [^|]* \|? + | \( [^)]* \)? + | \[ [^\]]* \]? + | \\ [^\\]* \\? + ) + /x + + NITRO_ENTITY = / + % (?: \#\d+ | \w+ ) ; + / + + START_OF_RUBY = / + (?=[<\#%]) + < (?: \?r | % | ruby> ) + | \# [{(|] + | % (?: \#\d+ | \w+ ) ; + /x + + CLOSING_PAREN = Hash.new do |h, p| + h[p] = p + end.update( { + '(' => ')', + '[' => ']', + '{' => '}', + } ) + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def reset_instance + super + @html_scanner.reset + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_RUBY})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{NITRO_VALUE_BLOCK}/o) + start_tag = match[0,2] + delimiter = CLOSING_PAREN[start_tag[1,1]] + end_tag = match[-1,1] == delimiter ? delimiter : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif match = scan(/#{NITRO_RUBY_BLOCK}/o) + start_tag = '' ? '?>' : '' + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -(end_tag.size)-1] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + elsif entity = scan(/#{NITRO_ENTITY}/o) + tokens << [entity, :entity] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end + +end +end diff --git a/lib/coderay/scanners/rhtml.rb b/lib/coderay/scanners/rhtml.rb index 15a7566..8afb727 100644 --- a/lib/coderay/scanners/rhtml.rb +++ b/lib/coderay/scanners/rhtml.rb @@ -1,65 +1,73 @@ -module CodeRay -module Scanners - - load :html - load :ruby - - # RHTML Scanner - # - # $Id$ - class RHTML < Scanner - - include Streamable - register_for :rhtml - - ERB_RUBY_BLOCK = / - <%(?!%)[=-]? - (?> - [^%]* - (?> %(?!>) [^%]* )* - ) - (?: %> )? - /x - - START_OF_ERB = / - <%(?!%) - /x - - private - - def setup - @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true - @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true - end - - def scan_tokens tokens, options - - until eos? - - if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? - @html_scanner.tokenize match - - elsif match = scan(/#{ERB_RUBY_BLOCK}/o) - start_tag = match[/\A<%[-=]?/] - end_tag = match[/%?>?\z/] - tokens << [:open, :inline] - tokens << [start_tag, :delimiter] - code = match[start_tag.size .. -1 - end_tag.size] - @ruby_scanner.tokenize code - tokens << [end_tag, :delimiter] unless end_tag.empty? - tokens << [:close, :inline] - - else - raise_inspect 'else-case reached!', tokens - end - - end - - tokens - - end - - end - -end -end +module CodeRay +module Scanners + + load :html + load :ruby + + # RHTML Scanner + # + # $Id$ + class RHTML < Scanner + + include Streamable + register_for :rhtml + + ERB_RUBY_BLOCK = / + <%(?!%)[=-]? + (?> + [^\-%]* # normal* + (?> # special + (?: %(?!>) | -(?!%>) ) + [^\-%]* # normal* + )* + ) + (?: -?%> )? + /x + + START_OF_ERB = / + <%(?!%) + /x + + private + + def setup + @ruby_scanner = CodeRay.scanner :ruby, :tokens => @tokens, :keep_tokens => true + @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true + end + + def reset_instance + super + @html_scanner.reset + end + + def scan_tokens tokens, options + + until eos? + + if (match = scan_until(/(?=#{START_OF_ERB})/o) || scan_until(/\z/)) and not match.empty? + @html_scanner.tokenize match + + elsif match = scan(/#{ERB_RUBY_BLOCK}/o) + start_tag = match[/\A<%[-=]?/] + end_tag = match[/-?%?>?\z/] + tokens << [:open, :inline] + tokens << [start_tag, :delimiter] + code = match[start_tag.size .. -1 - end_tag.size] + @ruby_scanner.tokenize code + tokens << [end_tag, :delimiter] unless end_tag.empty? + tokens << [:close, :inline] + + else + raise_inspect 'else-case reached!', tokens + end + + end + + tokens + + end + + end + +end +end diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 3ce5003..76c87ca 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -1,400 +1,404 @@ -module CodeRay -module Scanners - - # This scanner is really complex, since Ruby _is_ a complex language! - # - # It tries to highlight 100% of all common code, - # and 90% of strange codes. - # - # It is optimized for HTML highlighting, and is not very useful for - # parsing or pretty printing. - # - # For now, I think it's better than the scanners in VIM or Syntax, or - # any highlighter I was able to find, except Caleb's RubyLexer. - # - # I hope it's also better than the rdoc/irb lexer. - class Ruby < Scanner - - include Streamable - - register_for :ruby - - helper :patterns - - DEFAULT_OPTIONS = { - :parse_regexps => true, - } - - private - def scan_tokens tokens, options - parse_regexp = false # options[:parse_regexps] - first_bake = saved_tokens = nil - last_token_dot = false - fancy_allowed = regexp_allowed = true - heredocs = nil - last_state = nil - state = :initial - depth = nil - states = [] - - patterns = Patterns # avoid constant lookup - - until eos? - type = :error - match = nil - kind = nil - - if state.instance_of? patterns::StringState -# {{{ - match = scan_until(state.pattern) || scan_until(/\z/) - tokens << [match, :content] unless match.empty? - break if eos? - - if state.heredoc and self[1] # end of heredoc - match = getch.to_s - match << scan_until(/$/) unless eos? - tokens << [match, :delimiter] - tokens << [:close, state.type] - state = state.next_state - next - end - - case match = getch - - when state.delim - if state.paren - state.paren_depth -= 1 - if state.paren_depth > 0 - tokens << [match, :nesting_delimiter] - next - end - end - tokens << [match, :delimiter] - if state.type == :regexp and not eos? - modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) - tokens << [modifiers, :modifier] unless modifiers.empty? - if parse_regexp - extended = modifiers.index ?x - tokens = saved_tokens - regexp = tokens - for text, type in regexp - if text.is_a? ::String - case type - when :content - text.scan(/([^#]+)|(#.*)/) do |plain, comment| - if plain - tokens << [plain, :content] - else - tokens << [comment, :comment] - end - end - when :character - if text[/\\(?:[swdSWDAzZbB]|\d+)/] - tokens << [text, :modifier] - else - tokens << [text, type] - end - else - tokens << [text, type] - end - else - tokens << [text, type] - end - end - first_bake = saved_tokens = nil - end - end - tokens << [:close, state.type] - fancy_allowed = regexp_allowed = false - state = state.next_state - - when '\\' - if state.interpreted - if esc = scan(/ #{patterns::ESCAPE} /ox) - tokens << [match + esc, :char] - else - tokens << [match, :error] - end - else - case m = getch - when state.delim, '\\' - tokens << [match + m, :char] - when nil - tokens << [match, :error] - else - tokens << [match + m, :content] - end - end - - when '#' - case peek(1)[0] - when ?{ - states.push [state, depth, heredocs] - fancy_allowed = regexp_allowed = true - state = :initial - depth = 1 - tokens << [:open, :inline] - tokens << [match + getch, :delimiter] - when ?$, ?@ - tokens << [match, :escape] - last_state = state # scan one token as normal code, then return here - state = :initial - else - raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens - end - - when state.paren - state.paren_depth += 1 - tokens << [match, :nesting_delimiter] - - when /#{patterns::REGEXP_SYMBOLS}/ox - tokens << [match, :function] - - else - raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens - - end - next -# }}} - else -# {{{ - if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or - ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) - fancy_allowed = true - case m = match[0] - when ?\s, ?\t, ?\f - match << scan(/\s*/) unless eos? or heredocs - type = :space - when ?\n, ?\\ - type = :space - if m == ?\n - regexp_allowed = true - state = :initial if state == :undef_comma_expected - end - if heredocs - unscan # heredoc scanning needs \n at start - state = heredocs.shift - tokens << [:open, state.type] - heredocs = nil if heredocs.empty? - next - else - match << scan(/\s*/) unless eos? - end - when ?#, ?=, ?_ - type = :comment - regexp_allowed = true - else - raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens - end - tokens << [match, type] - next - - elsif state == :initial - - # IDENTS # - if match = scan(/#{patterns::METHOD_NAME}/o) - if last_token_dot - type = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end - else - type = patterns::IDENT_KIND[match] - if type == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) - type = :constant - elsif type == :reserved - state = patterns::DEF_NEW_STATE[match] - end - end - ## experimental! - fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) - - # OPERATORS # - elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or - (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) - if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ - regexp_allowed = fancy_allowed = :set - end - last_token_dot = :set if match == '.' or match == '::' - type = :operator - unless states.empty? - case match - when '{' - depth += 1 - when '}' - depth -= 1 - if depth == 0 - state, depth, heredocs = states.pop - tokens << [match, :delimiter] - type = :inline - match = :close - end - end - end - - elsif match = scan(/ ['"] /mx) - tokens << [:open, :string] - type = :delimiter - state = patterns::StringState.new :string, match == '"', match # important for streaming - - elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) - type = :instance_variable - - elsif regexp_allowed and match = scan(/\//) - tokens << [:open, :regexp] - type = :delimiter - interpreted = true - state = patterns::StringState.new :regexp, interpreted, match - if parse_regexp - tokens = [] - saved_tokens = tokens - end - - elsif match = scan(/#{patterns::NUMERIC}/o) - type = if self[1] then :float else :integer end - - elsif match = scan(/#{patterns::SYMBOL}/o) - case delim = match[1] - when ?', ?" - tokens << [:open, :symbol] - tokens << [':', :symbol] - match = delim.chr - type = :delimiter - state = patterns::StringState.new :symbol, delim == ?", match - else - type = :symbol - end - - elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) - regexp_allowed = fancy_allowed = :set - type = :operator - - elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) - indented = self[1] == '-' - quote = self[3] - delim = self[quote ? 4 : 2] - type = patterns::QUOTE_TO_TYPE[quote] - tokens << [:open, type] - tokens << [match, :delimiter] - match = :close - heredoc = patterns::StringState.new type, quote != '\'', delim, (indented ? :indented : :linestart ) - heredocs ||= [] # create heredocs if empty - heredocs << heredoc - - elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) - type, interpreted = *patterns::FancyStringType.fetch(self[1]) do - raise_inspect 'Unknown fancy string: %%%p' % k, tokens - end - tokens << [:open, type] - state = patterns::StringState.new type, interpreted, self[2] - type = :delimiter - - elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o) - type = :integer - - elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) - regexp_allowed = fancy_allowed = :set - type = :operator - - elsif match = scan(/`/) - if last_token_dot - type = :operator - else - tokens << [:open, :shell] - type = :delimiter - state = patterns::StringState.new :shell, true, match - end - - elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) - type = :global_variable - - elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) - type = :class_variable - - else - match = getch - - end - - elsif state == :def_expected - state = :initial - if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) - type = :method - else - next - end - - elsif state == :undef_expected - state = :undef_comma_expected - if match = scan(/#{patterns::METHOD_NAME_EX}/o) - type = :method - elsif match = scan(/#{patterns::SYMBOL}/o) - case delim = match[1] - when ?', ?" - tokens << [:open, :symbol] - tokens << [':', :symbol] - match = delim.chr - type = :delimiter - state = patterns::StringState.new :symbol, delim == ?", match - state.next_state = :undef_comma_expected - else - type = :symbol - end - else - state = :initial - next - end - - elsif state == :undef_comma_expected - if match = scan(/,/) - type = :operator - state = :undef_expected - else - state = :initial - next - end - - elsif state == :module_expected - if match = scan(/< true, + } + + private + def scan_tokens tokens, options + parse_regexp = false # options[:parse_regexps] + first_bake = saved_tokens = nil + last_token_dot = false + fancy_allowed = regexp_allowed = true + heredocs = nil + last_state = nil + state = :initial + depth = nil + inline_block_stack = [] + + patterns = Patterns # avoid constant lookup + + until eos? + match = nil + kind = nil + + if state.instance_of? patterns::StringState +# {{{ + match = scan_until(state.pattern) || scan_until(/\z/) + tokens << [match, :content] unless match.empty? + break if eos? + + if state.heredoc and self[1] # end of heredoc + match = getch.to_s + match << scan_until(/$/) unless eos? + tokens << [match, :delimiter] + tokens << [:close, state.type] + state = state.next_state + next + end + + case match = getch + + when state.delim + if state.paren + state.paren_depth -= 1 + if state.paren_depth > 0 + tokens << [match, :nesting_delimiter] + next + end + end + tokens << [match, :delimiter] + if state.type == :regexp and not eos? + modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox) + tokens << [modifiers, :modifier] unless modifiers.empty? + if parse_regexp + extended = modifiers.index ?x + tokens = saved_tokens + regexp = tokens + for text, kind in regexp + if text.is_a? ::String + case kind + when :content + text.scan(/([^#]+)|(#.*)/) do |plain, comment| + if plain + tokens << [plain, :content] + else + tokens << [comment, :comment] + end + end + when :character + if text[/\\(?:[swdSWDAzZbB]|\d+)/] + tokens << [text, :modifier] + else + tokens << [text, kind] + end + else + tokens << [text, kind] + end + else + tokens << [text, kind] + end + end + first_bake = saved_tokens = nil + end + end + tokens << [:close, state.type] + fancy_allowed = regexp_allowed = false + state = state.next_state + + when '\\' + if state.interpreted + if esc = scan(/ #{patterns::ESCAPE} /ox) + tokens << [match + esc, :char] + else + tokens << [match, :error] + end + else + case m = getch + when state.delim, '\\' + tokens << [match + m, :char] + when nil + tokens << [match, :error] + else + tokens << [match + m, :content] + end + end + + when '#' + case peek(1)[0] + when ?{ + inline_block_stack << [state, depth, heredocs] + fancy_allowed = regexp_allowed = true + state = :initial + depth = 1 + tokens << [:open, :inline] + tokens << [match + getch, :delimiter] + when ?$, ?@ + tokens << [match, :escape] + last_state = state # scan one token as normal code, then return here + state = :initial + else + raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens + end + + when state.paren + state.paren_depth += 1 + tokens << [match, :nesting_delimiter] + + when /#{patterns::REGEXP_SYMBOLS}/ox + tokens << [match, :function] + + else + raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens + + end + next +# }}} + else +# {{{ + if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or + ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) ) + fancy_allowed = true + case m = match[0] + when ?\s, ?\t, ?\f + match << scan(/\s*/) unless eos? or heredocs + kind = :space + when ?\n, ?\\ + kind = :space + if m == ?\n + regexp_allowed = true + state = :initial if state == :undef_comma_expected + end + if heredocs + unscan # heredoc scanning needs \n at start + state = heredocs.shift + tokens << [:open, state.type] + heredocs = nil if heredocs.empty? + next + else + match << scan(/\s*/) unless eos? + end + when ?#, ?=, ?_ + kind = :comment + regexp_allowed = true + else + raise_inspect 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens + end + tokens << [match, kind] + next + + elsif state == :initial + + # IDENTS # + if match = scan(/#{patterns::METHOD_NAME}/o) + if last_token_dot + kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end + else + kind = patterns::IDENT_KIND[match] + if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/) + kind = :constant + elsif kind == :reserved + state = patterns::DEF_NEW_STATE[match] + end + end + ## experimental! + fancy_allowed = regexp_allowed = :set if patterns::REGEXP_ALLOWED[match] or check(/\s+(?:%\S|\/\S)/) + + # OPERATORS # + elsif (not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)) or + (last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)) + if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/ + regexp_allowed = fancy_allowed = :set + end + last_token_dot = :set if match == '.' or match == '::' + kind = :operator + unless inline_block_stack.empty? + case match + when '{' + depth += 1 + when '}' + depth -= 1 + if depth == 0 # closing brace of inline block reached + state, depth, heredocs = inline_block_stack.pop + tokens << [match, :delimiter] + kind = :inline + match = :close + end + end + end + + elsif match = scan(/ ['"] /mx) + tokens << [:open, :string] + kind = :delimiter + state = patterns::StringState.new :string, match == '"', match # important for streaming + + elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o) + kind = :instance_variable + + elsif regexp_allowed and match = scan(/\//) + tokens << [:open, :regexp] + kind = :delimiter + interpreted = true + state = patterns::StringState.new :regexp, interpreted, match + if parse_regexp + tokens = [] + saved_tokens = tokens + end + + elsif match = scan(/#{patterns::NUMERIC}/o) + kind = if self[1] then :float else :integer end + + elsif match = scan(/#{patterns::SYMBOL}/o) + case delim = match[1] + when ?', ?" + tokens << [:open, :symbol] + tokens << [':', :symbol] + match = delim.chr + kind = :delimiter + state = patterns::StringState.new :symbol, delim == ?", match + else + kind = :symbol + end + + elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x) + regexp_allowed = fancy_allowed = :set + kind = :operator + + elsif fancy_allowed and match = scan(/#{patterns::HEREDOC_OPEN}/o) + indented = self[1] == '-' + quote = self[3] + delim = self[quote ? 4 : 2] + kind = patterns::QUOTE_TO_TYPE[quote] + tokens << [:open, kind] + tokens << [match, :delimiter] + match = :close + heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart ) + heredocs ||= [] # create heredocs if empty + heredocs << heredoc + + elsif fancy_allowed and match = scan(/#{patterns::FANCY_START_SAVE}/o) + kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do + raise_inspect 'Unknown fancy string: %%%p' % k, tokens + end + tokens << [:open, kind] + state = patterns::StringState.new kind, interpreted, self[2] + kind = :delimiter + + elsif fancy_allowed and match = scan(/#{patterns::CHARACTER}/o) + kind = :integer + + elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x) + regexp_allowed = fancy_allowed = :set + kind = :operator + + elsif match = scan(/`/) + if last_token_dot + kind = :operator + else + tokens << [:open, :shell] + kind = :delimiter + state = patterns::StringState.new :shell, true, match + end + + elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o) + kind = :global_variable + + elsif match = scan(/#{patterns::CLASS_VARIABLE}/o) + kind = :class_variable + + else + kind = :error + match = getch + + end + + elsif state == :def_expected + state = :initial + if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o) + kind = :method + else + next + end + + elsif state == :undef_expected + state = :undef_comma_expected + if match = scan(/#{patterns::METHOD_NAME_EX}/o) + kind = :method + elsif match = scan(/#{patterns::SYMBOL}/o) + case delim = match[1] + when ?', ?" + tokens << [:open, :symbol] + tokens << [':', :symbol] + match = delim.chr + kind = :delimiter + state = patterns::StringState.new :symbol, delim == ?", match + state.next_state = :undef_comma_expected + else + kind = :symbol + end + else + state = :initial + next + end + + elsif state == :undef_comma_expected + if match = scan(/,/) + kind = :operator + state = :undef_expected + else + state = :initial + next + end + + elsif state == :module_expected + if match = scan(/< 1 + state = this_block.first + tokens << [:close, state.type] + end + + tokens + end + + end + +end +end + +# vim:fdm=marker -- cgit v1.2.1