From 26a8e5a0388199ac686db28d631b05a5b5aa02e1 Mon Sep 17 00:00:00 2001 From: murphy Date: Tue, 11 Jul 2006 05:37:50 +0000 Subject: Changed error handling of all scanners: :error tokens are OK now, even in debug mode, but token kind is nil unless assigned. Small fixes for C and Ruby scanners. Renamed local variable type to kind in Ruby scanner. Improved RHTML scanner to recognize -%> as delimiter. HTML encoder: improved handling of malformed token strings. Fixed PluginHost#inspect including docu. Scanner#raise_inspect also shows state if given. --- lib/coderay/scanners/c.rb | 318 ++++++++++++++++++++++++---------------------- 1 file changed, 163 insertions(+), 155 deletions(-) (limited to 'lib/coderay/scanners/c.rb') diff --git a/lib/coderay/scanners/c.rb b/lib/coderay/scanners/c.rb index 66b8de1..be113d0 100644 --- a/lib/coderay/scanners/c.rb +++ b/lib/coderay/scanners/c.rb @@ -1,155 +1,163 @@ -module CodeRay -module Scanners - - class C < Scanner - - register_for :c - - RESERVED_WORDS = [ - 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', - 'for', 'goto', 'if', 'return', 'switch', 'while', - 'struct', 'union', 'enum', 'typedef', - 'static', 'register', 'auto', 'extern', - 'sizeof', - 'volatile', 'const', # C89 - 'inline', 'restrict', # C99 - ] - - PREDEFINED_TYPES = [ - 'int', 'long', 'short', 'char', 'void', - 'signed', 'unsigned', 'float', 'double', - 'bool', 'complex', # C99 - ] - - PREDEFINED_CONSTANTS = [ - 'EOF', 'NULL', - 'true', 'false', # C99 - ] - - IDENT_KIND = WordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(PREDEFINED_TYPES, :pre_type). - add(PREDEFINED_CONSTANTS, :pre_constant) - - ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x - UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x - - def scan_tokens tokens, options - - state = :initial - - until eos? - - kind = :error - match = nil - - case state - - when :initial - - if scan(/ \s+ | \\\n /x) - kind = :space - - elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) - kind = :comment - - elsif match = scan(/ \# \s* if \s* 0 /x) - match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? - kind = :comment - - elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - if kind == :ident and check(/:(?!:)/) - match << scan(/:/) - kind = :label - end - - elsif match = scan(/L?"/) - tokens << [:open, :string] - if match[0] == ?L - tokens << ['L', :modifier] - match = '"' - end - state = :string - kind = :delimiter - - elsif scan(/#\s*(\w*)/) - kind = :preprocessor # FIXME multiline preprocs - state = :include_expected if self[1] == 'include' - - elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) - kind = :char - - elsif scan(/0[xX][0-9A-Fa-f]+/) - kind = :hex - - elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) - kind = :oct - - elsif scan(/(?:\d+)(?![.eEfF])/) - kind = :integer - - elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) - kind = :float - - else - getch - end - - when :string - if scan(/[^\\"]+/) - kind = :content - elsif scan(/"/) - tokens << ['"', :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) - kind = :char - elsif scan(/ \\ | $ /x) - kind = :error - state = :initial - else - raise_inspect "else case \" reached; %p not handled." % peek(1), tokens - end - - when :include_expected - if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) - kind = :include - state = :initial - - elsif match = scan(/\s+/) - kind = :space - state = :initial if match.index ?\n - - else - getch - - end - - else - raise_inspect 'Unknown state', tokens - - end - - match ||= matched - if $DEBUG and (not kind or kind == :error) - raise_inspect 'Error token %p in line %d' % - [[match, kind], line], tokens - end - raise_inspect 'Empty token', tokens unless match - - tokens << [match, kind] - - end - - tokens - end - - end - -end -end +module CodeRay +module Scanners + + class C < Scanner + + register_for :c + + RESERVED_WORDS = [ + 'asm', 'break', 'case', 'continue', 'default', 'do', 'else', + 'for', 'goto', 'if', 'return', 'switch', 'while', + 'struct', 'union', 'enum', 'typedef', + 'static', 'register', 'auto', 'extern', + 'sizeof', + 'volatile', 'const', # C89 + 'inline', 'restrict', # C99 + ] + + PREDEFINED_TYPES = [ + 'int', 'long', 'short', 'char', 'void', + 'signed', 'unsigned', 'float', 'double', + 'bool', 'complex', # C99 + ] + + PREDEFINED_CONSTANTS = [ + 'EOF', 'NULL', + 'true', 'false', # C99 + ] + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(PREDEFINED_TYPES, :pre_type). + add(PREDEFINED_CONSTANTS, :pre_constant) + + ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x + + def scan_tokens tokens, options + + state = :initial + + until eos? + + kind = nil + match = nil + + case state + + when :initial + + if scan(/ \s+ | \\\n /x) + kind = :space + + elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + kind = :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + kind = :comment + + elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) + kind = :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident and check(/:(?!:)/) + match << scan(/:/) + kind = :label + end + + elsif match = scan(/L?"/) + tokens << [:open, :string] + if match[0] == ?L + tokens << ['L', :modifier] + match = '"' + end + state = :string + kind = :delimiter + + elsif scan(/#\s*(\w*)/) + kind = :preprocessor # FIXME multiline preprocs + state = :include_expected if self[1] == 'include' + + elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + kind = :char + + elsif scan(/0[xX][0-9A-Fa-f]+/) + kind = :hex + + elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) + kind = :oct + + elsif scan(/(?:\d+)(?![.eEfF])/) + kind = :integer + + elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + kind = :float + + else + getch + kind = :error + + end + + when :string + if scan(/[^\\\n"]+/) + kind = :content + elsif scan(/"/) + tokens << ['"', :delimiter] + tokens << [:close, :string] + state = :initial + next + elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + kind = :char + elsif scan(/ \\ | $ /x) + tokens << [:close, :string] + kind = :error + state = :initial + else + raise_inspect "else case \" reached; %p not handled." % peek(1), tokens + end + + when :include_expected + if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + kind = :include + state = :initial + + elsif match = scan(/\s+/) + kind = :space + state = :initial if match.index ?\n + + else + getch + kind = :error + + end + + else + raise_inspect 'Unknown state', tokens + + end + + match ||= matched + if $DEBUG and not kind + raise_inspect 'Error token %p in line %d' % + [[match, kind], line], tokens + end + raise_inspect 'Empty token', tokens unless match + + tokens << [match, kind] + + end + + if state == :string + tokens << [:close, :string] + end + + tokens + end + + end + +end +end -- cgit v1.2.1