From 48e144a20829faaeca9a7db8fbc6128f1f5d7297 Mon Sep 17 00:00:00 2001 From: murphy Date: Tue, 4 Oct 2005 04:04:07 +0000 Subject: Two new encoders: debug and xml. encoder.rb: new token handling encoders/statistic.rb: using new handling ruby_helper.rb: small improvements ruby.rb: - escapes in subtoken - Float detection changed - some multi-char operators are now scanned as one token - def and module definition handling changed bin/coderay: improved, new interface (still in progress) plugin.rb: more expressive load error message --- lib/coderay/scanners/helpers/ruby_helper.rb | 18 ++++++++--------- lib/coderay/scanners/ruby.rb | 31 ++++++++++++++++------------- 2 files changed, 26 insertions(+), 23 deletions(-) (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/helpers/ruby_helper.rb b/lib/coderay/scanners/helpers/ruby_helper.rb index 241b392..a44ca79 100644 --- a/lib/coderay/scanners/helpers/ruby_helper.rb +++ b/lib/coderay/scanners/helpers/ruby_helper.rb @@ -60,19 +60,17 @@ module CodeRay module Scanners QUOTE_TO_TYPE.default = :string REGEXP_MODIFIERS = /[mixounse]*/ - REGEXP_SYMBOLS = / - [|?*+?(){}\[\].^$] - /x + REGEXP_SYMBOLS = /[|?*+?(){}\[\].^$]/ - DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error + DECIMAL = /\d+(?:_\d+)*/ OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ BINARY = /0b[01]+(?:_[01]+)*/ EXPONENT = / [eE] [+-]? #{DECIMAL} /ox - FLOAT_OR_INT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? )? /ox - FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) /ox - NUMERIC = / #{OCTAL} | #{HEXADECIMAL} | #{BINARY} | #{FLOAT_OR_INT} /ox + FLOAT_SUFFIX = / #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? /ox + FLOAT_OR_INT = / #{DECIMAL} (?: #{FLOAT_SUFFIX} () )? /ox + NUMERIC = / (?=0) (?: #{OCTAL} | #{HEXADECIMAL} | #{BINARY} ) | #{FLOAT_OR_INT} /ox SYMBOL = / : @@ -103,7 +101,7 @@ module CodeRay module Scanners ) /mx - # NOTE: This is not completel correct, but + # NOTE: This is not completely correct, but # nobody needs heredoc delimiters ending with \n. HEREDOC_OPEN = / << (-)? # $1 = float @@ -115,7 +113,7 @@ module CodeRay module Scanners ) /mx - RDOC = / + RUBYDOC = / =begin (?!\S) .*? (?: \Z | ^=end (?!\S) [^\n]* ) @@ -127,6 +125,8 @@ module CodeRay module Scanners (?: \Z | (?=^\#CODE) ) /mx + RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo + RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x FANCY_START = / % ( [qQwWxsr] | (?![\w\s=]) ) (.) /mox diff --git a/lib/coderay/scanners/ruby.rb b/lib/coderay/scanners/ruby.rb index 72e59bd..a50893a 100644 --- a/lib/coderay/scanners/ruby.rb +++ b/lib/coderay/scanners/ruby.rb @@ -128,13 +128,14 @@ module CodeRay module Scanners fancy_allowed = regexp_allowed = true state = :initial depth = 1 - tokens << [match + getch, :escape] + tokens << [:open, :escape] + tokens << [match + getch, :delimiter] when ?$, ?@ tokens << [match, :escape] last_state = state # scan one token as normal code, then return here state = :initial else - raise "else-case # reached; #%p not handled" % peek(1), tokens + raise 'else-case # reached; #%p not handled' % peek(1), tokens end when state.paren @@ -145,7 +146,7 @@ module CodeRay module Scanners tokens << [match, :function] else - raise "else-case \" reached; %p not handled, state = %p" % [match, state], tokens + raise 'else-case " reached; %p not handled, state = %p' % [match, state], tokens end next @@ -153,7 +154,7 @@ module CodeRay module Scanners else # {{{ if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or - ( bol? and match = scan(/ #{DATA} | #{RDOC} /ox) ) + ( bol? and match = scan(/#{RUBYDOC_OR_DATA}/o) ) fancy_allowed = true case m = match[0] when ?\s, ?\t, ?\f @@ -175,7 +176,7 @@ module CodeRay module Scanners type = :comment regexp_allowed = true else - raise "else-case _ reached, because case %p was not handled" % [matched[0].chr], tokens + raise 'else-case _ reached, because case %p was not handled' % [matched[0].chr], tokens end tokens << [match, type] next @@ -195,7 +196,9 @@ module CodeRay module Scanners depth -= 1 if depth == 0 state, depth, heredocs = states.pop + tokens << [match + getch, :delimiter] type = :escape + match = :close end end end @@ -221,7 +224,7 @@ module CodeRay module Scanners elsif match = scan(/#{INSTANCE_VARIABLE}/o) type = :instance_variable - elsif regexp_allowed and match = scan(/ \/ /mx) + elsif regexp_allowed and match = scan(/\//) tokens << [:open, :regexp] type = :delimiter interpreted = true @@ -232,7 +235,7 @@ module CodeRay module Scanners end elsif match = scan(/#{NUMERIC}/o) - type = if match[/#{FLOAT}/o] then :float else :integer end + type = if self[1] then :float else :integer end elsif fancy_allowed and match = scan(/#{SYMBOL}/o) case match[1] @@ -265,7 +268,7 @@ module CodeRay module Scanners elsif fancy_allowed and match = scan(/#{CHARACTER}/o) type = :integer - elsif match = scan(/ [\/%?)? | [?:] /x) regexp_allowed = fancy_allowed = :set type = :operator @@ -290,25 +293,25 @@ module CodeRay module Scanners end elsif state == :def_expected - if match = scan(/ (?: #{VARIABLE} (?: ::#{IDENT} )* \. )? #{METHOD_NAME_EX} /ox) + state = :initial + if match = scan(/#{METHOD_NAME_EX}/o) type = :method else - match = getch + next end - state = :initial elsif state == :module_expected if match = scan(/<