From 11690c364e94fa0b3c375f4ec7fbe8dd01762657 Mon Sep 17 00:00:00 2001 From: no author Date: Mon, 26 Sep 2005 23:01:09 +0000 Subject: Scanners in alpha state moved to hidden. --- lib/coderay/scanners/mush.rb | 102 -------------- lib/coderay/scanners/rubyfast.rb | 287 --------------------------------------- lib/coderay/scanners/rubylex.rb | 102 -------------- 3 files changed, 491 deletions(-) delete mode 100644 lib/coderay/scanners/mush.rb delete mode 100644 lib/coderay/scanners/rubyfast.rb delete mode 100644 lib/coderay/scanners/rubylex.rb (limited to 'lib/coderay/scanners') diff --git a/lib/coderay/scanners/mush.rb b/lib/coderay/scanners/mush.rb deleted file mode 100644 index 5217ed9..0000000 --- a/lib/coderay/scanners/mush.rb +++ /dev/null @@ -1,102 +0,0 @@ -module CodeRay module Scanners - - class Mush < Scanner - - register_for :mush - - RESERVED_WORDS = [ - ] - - IDENT_KIND = Scanner::WordList.new(:ident, :case_ignore). - add(RESERVED_WORDS, :reserved). - add(DIRECTIVES, :directive) - - def scan_tokens tokens, options - - state = :initial - - until eos? - - kind = :error - match = nil - - if state == :initial - - if scan(/ \s+ /x) - kind = :space - - elsif scan(%r! \{ \$ [^}]* \}? | \(\* \$ (?: .*? \*\) | .* ) !mx) - kind = :preprocessor - - elsif scan(%r! // [^\n]* | \{ [^}]* \}? | \(\* (?: .*? \*\) | .* ) !mx) - kind = :comment - - elsif scan(/ [-+*\/=<>:;,.@\^|\(\)\[\]]+ /x) - kind = :operator - - elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) - kind = IDENT_KIND[match] - - elsif match = scan(/ ' ( [^\n']|'' ) (?:'|$) /x) - tokens << [:open, :char] - tokens << ["'", :delimiter] - tokens << [self[1], :content] - tokens << ["'", :delimiter] - tokens << [:close, :char] - next - - elsif match = scan(/ ' /x) - tokens << [:open, :string] - state = :string - kind = :delimiter - - elsif scan(/ \# (?: \d+ | \$[0-9A-Fa-f]+ ) /x) - kind = :char - - elsif scan(/ \$ [0-9A-Fa-f]+ /x) - kind = :hex - - elsif scan(/ (?: \d+ ) (?![eE]|\.[^.]) /x) - kind = :integer - - elsif scan(/ \d+ (?: \.\d+ (?: [eE][+-]? \d+ )? | [eE][+-]? \d+ ) /x) - kind = :float - - else - getch - end - - elsif state == :string - if scan(/[^\n']+/) - kind = :content - elsif scan(/''/) - kind = :char - elsif scan(/'/) - tokens << ["'", :delimiter] - tokens << [:close, :string] - state = :initial - next - elsif scan(/\n/) - state = :initial - else - raise "else case \' reached; %p not handled." % peek(1), tokens - end - - else - raise 'else-case reached', tokens - - end - - match ||= matched - raise [match, kind], tokens if kind == :error - - tokens << [match, kind] - - end - - tokens - end - - end - -end end diff --git a/lib/coderay/scanners/rubyfast.rb b/lib/coderay/scanners/rubyfast.rb deleted file mode 100644 index baff382..0000000 --- a/lib/coderay/scanners/rubyfast.rb +++ /dev/null @@ -1,287 +0,0 @@ -module CodeRay module Scanners - - class Ruby < Scanner - - register_for :rubyfast - - RESERVED_WORDS = [ - 'and', 'def', 'end', 'in', 'or', 'unless', 'begin', - 'defined?', 'ensure', 'module', 'redo', 'super', 'until', - 'BEGIN', 'break', 'do', 'next', 'rescue', 'then', - 'when', 'END', 'case', 'else', 'for', 'retry', - 'while', 'alias', 'class', 'elsif', 'if', 'not', 'return', - 'undef', 'yield', - ] - - DEF_KEYWORDS = ['def'] - MODULE_KEYWORDS = ['class', 'module'] - DEF_NEW_STATE = WordList.new(:initial). - add(DEF_KEYWORDS, :def_expected). - add(MODULE_KEYWORDS, :module_expected) - - WORDS_ALLOWING_REGEXP = [ - 'and', 'or', 'not', 'while', 'until', 'unless', 'if', 'elsif', 'when' - ] - REGEXP_ALLOWED = WordList.new(false). - add(WORDS_ALLOWING_REGEXP, :set) - - PREDEFINED_CONSTANTS = [ - 'nil', 'true', 'false', 'self', - 'DATA', 'ARGV', 'ARGF', '__FILE__', '__LINE__', - ] - - IDENT_KIND = WordList.new(:ident). - add(RESERVED_WORDS, :reserved). - add(PREDEFINED_CONSTANTS, :pre_constant) - - IDENT = /[a-zA-Z_][a-zA-Z_0-9]*/ - - METHOD_NAME = / #{IDENT} [?!]? /xo - METHOD_NAME_EX = / - #{IDENT}[?!=]? # common methods: split, foo=, empty?, gsub! - | \*\*? # multiplication and power - | [-+~]@? # plus, minus - | [\/%&|^`] # division, modulo or format strings, &and, |or, ^xor, `system` - | \[\]=? # array getter and setter - | <=?>? | >=? # comparison, rocket operator - | << | >> # append or shift left, shift right - | ===? # simple equality and case equality - /ox - GLOBAL_VARIABLE = / \$ (?: #{IDENT} | [1-9] | 0[a-zA-Z_0-9]* | [~&+`'=\/,;_.<>!@$?*":\\] | -[a-zA-Z_0-9] ) /ox - - DOUBLEQ = / " [^"\#\\]* (?: (?: \#\{.*?\} | \#(?:$")? | \\. ) [^"\#\\]* )* "? /mox - SINGLEQ = / ' [^'\\]* (?: \\. [^'\\]* )* '? /mox - STRING = / #{SINGLEQ} | #{DOUBLEQ} /ox - - SHELL = / ` [^`\#\\]* (?: (?: \#\{.*?\} | \#(?:$`)? | \\. ) [^`\#\\]* )* `? /mox - REGEXP =%r! / [^/\#\\]* (?: (?: \#\{.*?\} | \#(?:$/)? | \\. ) [^/\#\\]* )* /? !mox - - DECIMAL = /\d+(?:_\d+)*/ # doesn't recognize 09 as octal error - OCTAL = /0_?[0-7]+(?:_[0-7]+)*/ - HEXADECIMAL = /0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*/ - BINARY = /0b[01]+(?:_[01]+)*/ - - EXPONENT = / [eE] [+-]? #{DECIMAL} /ox - FLOAT = / #{DECIMAL} (?: #{EXPONENT} | \. #{DECIMAL} #{EXPONENT}? ) / - INTEGER = /#{OCTAL}|#{HEXADECIMAL}|#{BINARY}|#{DECIMAL}/ - - ESCAPE_STRING = / - % (?!\s) - (?: - [qsw] - (?: - \( [^\)\\]* (?: \\. [^\)\\]* )* \)? - | - \[ [^\]\\]* (?: \\. [^\]\\]* )* \]? - | - \{ [^\}\\]* (?: \\. [^\}\\]* )* \}? - | - \< [^\>\\]* (?: \\. [^\>\\]* )* \>? - | - \\ [^\\ ]* \\? - | - ( [^a-zA-Z0-9] ) # $1 - (?:(?!\1)[^\\])* (?: \\. (?:(?!\1)[^\#\\])* )* \1? - ) - | - [QrxWr]? - (?: - \( [^\)\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\)\#\\]* )* \)? - | - \[ [^\]\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\]\#\\]* )* \]? - | - \{ [^\}\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\}\#\\]* )* \}? - | - \< [^\>\#\\]* (?: (?:\#\{.*?\}|\#|\\.) [^\>\#\\]* )* \>? - | - \# [^\# \\]* (?: \\. [^\# \\]* )* \#? - | - \\ [^\\\# ]* (?: (?:\#\{.*?\}|\# ) [^\\\# ]* )* \\? - | - ( [^a-zA-Z0-9] ) # $2 - (?:(?!\2)[^\#\\])* (?: (?:\#\{.*?\}|\#|\\.) (?:(?!\2)[^\#\\])* )* \2? - ) - ) - /mox - - SYMBOL = / - : - (?: - #{GLOBAL_VARIABLE} - | @@?#{IDENT} - | #{METHOD_NAME_EX} - | #{STRING} - )/ox - - HEREDOC = / - << (?! [\dc] ) - (?: [^\n]*? << )? - (?: - ([a-zA-Z_0-9]+) - (?: .*? ^\1$ | .* ) - | - -([a-zA-Z_0-9]+) - (?: .*? ^\s*\2$ | .* ) - | - (["\'`]) (.*?) \3 - (?: .*? ^\4$ | .* ) - | - - (["\'`]) (.*?) \5 - (?: .*? ^\s*\6$ | .* ) - ) - /mx - - RDOC = / - =begin (?!\S) [^\n]* \n? - (?: - (?! =end (?!\S) ) - [^\n]* \n? - )* - (?: - =end (?!\S) [^\n]* - )? - /mx - - DATA = / - __END__\n - (?: - (?=\#CODE) - | - .* - ) - / - - private - def scan_tokens tokens, options - - state = :initial - regexp_allowed = true - last_token_dot = false - - until eos? - match = nil - kind = :error - - if scan(/\s+/) # in every state - kind = :space - regexp_allowed = :set if regexp_allowed or matched.index(?\n) # delayed flag setting - - elsif scan(/ \#[^\n]* /x) # in every state - kind = :comment - regexp_allowed = :set if regexp_allowed - - elsif state == :initial - # IDENTIFIERS, KEYWORDS - if scan(GLOBAL_VARIABLE) - kind = :global_variable - elsif scan(/ @@ #{IDENT} /ox) - kind = :class_variable - elsif scan(/ @ #{IDENT} /ox) - kind = :instance_variable - elsif scan(/ #{DATA} | #{RDOC} /ox) - kind = :comment - elsif scan(METHOD_NAME) - match = matched - if last_token_dot - kind = - if match[/^[A-Z]/] - :constant - else - :ident - end - else - kind = IDENT_KIND[match] - if kind == :ident and match[/^[A-Z]/] - kind = :constant - elsif kind == :reserved - state = DEF_NEW_STATE[match] - regexp_allowed = REGEXP_ALLOWED[match] - end - end - - elsif scan(STRING) - kind = :string - elsif scan(SHELL) - kind = :shell - elsif scan(HEREDOC) - kind = :string - elsif check(/\//) and regexp_allowed - scan(REGEXP) - kind = :regexp - elsif scan(ESCAPE_STRING) - match = matched - kind = - case match[0] - when ?s - :symbol - when ?r - :regexp - when ?x - :shell - else - :string - end - - elsif scan(/:(?:#{GLOBAL_VARIABLE}|#{METHOD_NAME_EX}|#{STRING})/ox) - kind = :symbol - elsif scan(/ - \? (?: - [^\s\\] - | - \\ (?:M-\\C-|C-\\M-|M-\\c|c\\M-|c|C-|M-))? (?: \\ (?: . | [0-7]{3} | x[0-9A-Fa-f][0-9A-Fa-f] ) - ) - /mx) - kind = :integer - - elsif scan(/ [-+*\/%=<>;,|&!()\[\]{}~?] | \.\.?\.? | ::? /x) - kind = :operator - match = matched - regexp_allowed = :set if match[-1,1] =~ /[~=!<>|&^,\(\[+\-\/\*%]\z/ - last_token_dot = :set if match == '.' or match == '::' - elsif scan(FLOAT) - kind = :float - elsif scan(INTEGER) - kind = :integer - else - getch - end - - elsif state == :def_expected - if scan(/ (?:#{IDENT}::)* (?:#{IDENT}\.)? #{METHOD_NAME_EX} /ox) - kind = :method - else - getch - end - state = :initial - - elsif state == :module_expected - if scan(/< :comment, - :varname => :ident, - :number => :integer, - :ws => :space, - :escnl => :space, - :keyword => :reserved, - :methname => :method, - :renderexactlystring => :regexp, - :string => :string, - } - - def scan_tokens tokens, options - require 'tempfile' - Tempfile.open('~coderay_tempfile') do |file| - file.binmode - file.write code - file.rewind - lexer = RubyLexer.new 'code', file - loop do - begin - tok = lexer.get1token - rescue => kaboom - err = <<-EOE - ERROR!!! -#{kaboom.inspect} -#{kaboom.backtrace.join("\n")} - EOE - tokens << [err, :error] - Kernel.raise - end - break if tok.is_a? EoiToken - next if tok.is_a? FileAndLineToken - kind = tok.class.name[/(.*?)Token$/,1].downcase.to_sym - kind = Translate.fetch kind, kind - text = tok.ident - case kind - when :hereplaceholder - text = tok.ender - kind = :string - when :herebody, :outlinedherebody - text = tok.ident.ident - kind = :string - end - text = text.inspect unless text.is_a? String - p token if kind == :error - tokens << [text.dup, kind] - end - end - tokens - end - end - -end end -- cgit v1.2.1