summaryrefslogtreecommitdiff
path: root/pygments/lexers/data.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexers/data.py')
-rw-r--r--pygments/lexers/data.py509
1 files changed, 509 insertions, 0 deletions
diff --git a/pygments/lexers/data.py b/pygments/lexers/data.py
new file mode 100644
index 00000000..6b7cc107
--- /dev/null
+++ b/pygments/lexers/data.py
@@ -0,0 +1,509 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers.data
+ ~~~~~~~~~~~~~~~~~~~~
+
+ Lexers for data file format.
+
+ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
+ include, bygroups
+from pygments.token import Text, Comment, Keyword, Name, String, Number, \
+ Punctuation, Literal
+
+__all__ = ['YamlLexer', 'JsonLexer']
+
+
+class YamlLexerContext(LexerContext):
+ """Indentation context for the YAML lexer."""
+
+ def __init__(self, *args, **kwds):
+ super(YamlLexerContext, self).__init__(*args, **kwds)
+ self.indent_stack = []
+ self.indent = -1
+ self.next_indent = 0
+ self.block_scalar_indent = None
+
+
+class YamlLexer(ExtendedRegexLexer):
+ """
+ Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
+ language.
+
+ .. versionadded:: 0.11
+ """
+
+ name = 'YAML'
+ aliases = ['yaml']
+ filenames = ['*.yaml', '*.yml']
+ mimetypes = ['text/x-yaml']
+
+ def something(token_class):
+ """Do not produce empty tokens."""
+ def callback(lexer, match, context):
+ text = match.group()
+ if not text:
+ return
+ yield match.start(), token_class, text
+ context.pos = match.end()
+ return callback
+
+ def reset_indent(token_class):
+ """Reset the indentation levels."""
+ def callback(lexer, match, context):
+ text = match.group()
+ context.indent_stack = []
+ context.indent = -1
+ context.next_indent = 0
+ context.block_scalar_indent = None
+ yield match.start(), token_class, text
+ context.pos = match.end()
+ return callback
+
+ def save_indent(token_class, start=False):
+ """Save a possible indentation level."""
+ def callback(lexer, match, context):
+ text = match.group()
+ extra = ''
+ if start:
+ context.next_indent = len(text)
+ if context.next_indent < context.indent:
+ while context.next_indent < context.indent:
+ context.indent = context.indent_stack.pop()
+ if context.next_indent > context.indent:
+ extra = text[context.indent:]
+ text = text[:context.indent]
+ else:
+ context.next_indent += len(text)
+ if text:
+ yield match.start(), token_class, text
+ if extra:
+ yield match.start()+len(text), token_class.Error, extra
+ context.pos = match.end()
+ return callback
+
+ def set_indent(token_class, implicit=False):
+ """Set the previously saved indentation level."""
+ def callback(lexer, match, context):
+ text = match.group()
+ if context.indent < context.next_indent:
+ context.indent_stack.append(context.indent)
+ context.indent = context.next_indent
+ if not implicit:
+ context.next_indent += len(text)
+ yield match.start(), token_class, text
+ context.pos = match.end()
+ return callback
+
+ def set_block_scalar_indent(token_class):
+ """Set an explicit indentation level for a block scalar."""
+ def callback(lexer, match, context):
+ text = match.group()
+ context.block_scalar_indent = None
+ if not text:
+ return
+ increment = match.group(1)
+ if increment:
+ current_indent = max(context.indent, 0)
+ increment = int(increment)
+ context.block_scalar_indent = current_indent + increment
+ if text:
+ yield match.start(), token_class, text
+ context.pos = match.end()
+ return callback
+
+ def parse_block_scalar_empty_line(indent_token_class, content_token_class):
+ """Process an empty line in a block scalar."""
+ def callback(lexer, match, context):
+ text = match.group()
+ if (context.block_scalar_indent is None or
+ len(text) <= context.block_scalar_indent):
+ if text:
+ yield match.start(), indent_token_class, text
+ else:
+ indentation = text[:context.block_scalar_indent]
+ content = text[context.block_scalar_indent:]
+ yield match.start(), indent_token_class, indentation
+ yield (match.start()+context.block_scalar_indent,
+ content_token_class, content)
+ context.pos = match.end()
+ return callback
+
+ def parse_block_scalar_indent(token_class):
+ """Process indentation spaces in a block scalar."""
+ def callback(lexer, match, context):
+ text = match.group()
+ if context.block_scalar_indent is None:
+ if len(text) <= max(context.indent, 0):
+ context.stack.pop()
+ context.stack.pop()
+ return
+ context.block_scalar_indent = len(text)
+ else:
+ if len(text) < context.block_scalar_indent:
+ context.stack.pop()
+ context.stack.pop()
+ return
+ if text:
+ yield match.start(), token_class, text
+ context.pos = match.end()
+ return callback
+
+ def parse_plain_scalar_indent(token_class):
+ """Process indentation spaces in a plain scalar."""
+ def callback(lexer, match, context):
+ text = match.group()
+ if len(text) <= context.indent:
+ context.stack.pop()
+ context.stack.pop()
+ return
+ if text:
+ yield match.start(), token_class, text
+ context.pos = match.end()
+ return callback
+
+ tokens = {
+ # the root rules
+ 'root': [
+ # ignored whitespaces
+ (r'[ ]+(?=#|$)', Text),
+ # line breaks
+ (r'\n+', Text),
+ # a comment
+ (r'#[^\n]*', Comment.Single),
+ # the '%YAML' directive
+ (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
+ # the %TAG directive
+ (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
+ # document start and document end indicators
+ (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
+ 'block-line'),
+ # indentation spaces
+ (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True),
+ ('block-line', 'indentation')),
+ ],
+
+ # trailing whitespaces after directives or a block scalar indicator
+ 'ignored-line': [
+ # ignored whitespaces
+ (r'[ ]+(?=#|$)', Text),
+ # a comment
+ (r'#[^\n]*', Comment.Single),
+ # line break
+ (r'\n', Text, '#pop:2'),
+ ],
+
+ # the %YAML directive
+ 'yaml-directive': [
+ # the version number
+ (r'([ ]+)([0-9]+\.[0-9]+)',
+ bygroups(Text, Number), 'ignored-line'),
+ ],
+
+ # the %YAG directive
+ 'tag-directive': [
+ # a tag handle and the corresponding prefix
+ (r'([ ]+)(!|![0-9A-Za-z_-]*!)'
+ r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)',
+ bygroups(Text, Keyword.Type, Text, Keyword.Type),
+ 'ignored-line'),
+ ],
+
+ # block scalar indicators and indentation spaces
+ 'indentation': [
+ # trailing whitespaces are ignored
+ (r'[ ]*$', something(Text), '#pop:2'),
+ # whitespaces preceeding block collection indicators
+ (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
+ # block collection indicators
+ (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
+ # the beginning a block line
+ (r'[ ]*', save_indent(Text), '#pop'),
+ ],
+
+ # an indented line in the block context
+ 'block-line': [
+ # the line end
+ (r'[ ]*(?=#|$)', something(Text), '#pop'),
+ # whitespaces separating tokens
+ (r'[ ]+', Text),
+ # tags, anchors and aliases,
+ include('descriptors'),
+ # block collections and scalars
+ include('block-nodes'),
+ # flow collections and quoted scalars
+ include('flow-nodes'),
+ # a plain scalar
+ (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])',
+ something(Name.Variable),
+ 'plain-scalar-in-block-context'),
+ ],
+
+ # tags, anchors, aliases
+ 'descriptors': [
+ # a full-form tag
+ (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type),
+ # a tag in the form '!', '!suffix' or '!handle!suffix'
+ (r'!(?:[0-9A-Za-z_-]+)?'
+ r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type),
+ # an anchor
+ (r'&[0-9A-Za-z_-]+', Name.Label),
+ # an alias
+ (r'\*[0-9A-Za-z_-]+', Name.Variable),
+ ],
+
+ # block collections and scalars
+ 'block-nodes': [
+ # implicit key
+ (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
+ # literal and folded scalars
+ (r'[|>]', Punctuation.Indicator,
+ ('block-scalar-content', 'block-scalar-header')),
+ ],
+
+ # flow collections and quoted scalars
+ 'flow-nodes': [
+ # a flow sequence
+ (r'\[', Punctuation.Indicator, 'flow-sequence'),
+ # a flow mapping
+ (r'\{', Punctuation.Indicator, 'flow-mapping'),
+ # a single-quoted scalar
+ (r'\'', String, 'single-quoted-scalar'),
+ # a double-quoted scalar
+ (r'\"', String, 'double-quoted-scalar'),
+ ],
+
+ # the content of a flow collection
+ 'flow-collection': [
+ # whitespaces
+ (r'[ ]+', Text),
+ # line breaks
+ (r'\n+', Text),
+ # a comment
+ (r'#[^\n]*', Comment.Single),
+ # simple indicators
+ (r'[?:,]', Punctuation.Indicator),
+ # tags, anchors and aliases
+ include('descriptors'),
+ # nested collections and quoted scalars
+ include('flow-nodes'),
+ # a plain scalar
+ (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])',
+ something(Name.Variable),
+ 'plain-scalar-in-flow-context'),
+ ],
+
+ # a flow sequence indicated by '[' and ']'
+ 'flow-sequence': [
+ # include flow collection rules
+ include('flow-collection'),
+ # the closing indicator
+ (r'\]', Punctuation.Indicator, '#pop'),
+ ],
+
+ # a flow mapping indicated by '{' and '}'
+ 'flow-mapping': [
+ # include flow collection rules
+ include('flow-collection'),
+ # the closing indicator
+ (r'\}', Punctuation.Indicator, '#pop'),
+ ],
+
+ # block scalar lines
+ 'block-scalar-content': [
+ # line break
+ (r'\n', Text),
+ # empty line
+ (r'^[ ]+$',
+ parse_block_scalar_empty_line(Text, Name.Constant)),
+ # indentation spaces (we may leave the state here)
+ (r'^[ ]*', parse_block_scalar_indent(Text)),
+ # line content
+ (r'[^\n\r\f\v]+', Name.Constant),
+ ],
+
+ # the content of a literal or folded scalar
+ 'block-scalar-header': [
+ # indentation indicator followed by chomping flag
+ (r'([1-9])?[+-]?(?=[ ]|$)',
+ set_block_scalar_indent(Punctuation.Indicator),
+ 'ignored-line'),
+ # chomping flag followed by indentation indicator
+ (r'[+-]?([1-9])?(?=[ ]|$)',
+ set_block_scalar_indent(Punctuation.Indicator),
+ 'ignored-line'),
+ ],
+
+ # ignored and regular whitespaces in quoted scalars
+ 'quoted-scalar-whitespaces': [
+ # leading and trailing whitespaces are ignored
+ (r'^[ ]+', Text),
+ (r'[ ]+$', Text),
+ # line breaks are ignored
+ (r'\n+', Text),
+ # other whitespaces are a part of the value
+ (r'[ ]+', Name.Variable),
+ ],
+
+ # single-quoted scalars
+ 'single-quoted-scalar': [
+ # include whitespace and line break rules
+ include('quoted-scalar-whitespaces'),
+ # escaping of the quote character
+ (r'\'\'', String.Escape),
+ # regular non-whitespace characters
+ (r'[^ \t\n\r\f\v\']+', String),
+ # the closing quote
+ (r'\'', String, '#pop'),
+ ],
+
+ # double-quoted scalars
+ 'double-quoted-scalar': [
+ # include whitespace and line break rules
+ include('quoted-scalar-whitespaces'),
+ # escaping of special characters
+ (r'\\[0abt\tn\nvfre "\\N_LP]', String),
+ # escape codes
+ (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
+ String.Escape),
+ # regular non-whitespace characters
+ (r'[^ \t\n\r\f\v\"\\]+', String),
+ # the closing quote
+ (r'"', String, '#pop'),
+ ],
+
+ # the beginning of a new line while scanning a plain scalar
+ 'plain-scalar-in-block-context-new-line': [
+ # empty lines
+ (r'^[ ]+$', Text),
+ # line breaks
+ (r'\n+', Text),
+ # document start and document end indicators
+ (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
+ # indentation spaces (we may leave the block line state here)
+ (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
+ ],
+
+ # a plain scalar in the block context
+ 'plain-scalar-in-block-context': [
+ # the scalar ends with the ':' indicator
+ (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
+ # the scalar ends with whitespaces followed by a comment
+ (r'[ ]+(?=#)', Text, '#pop'),
+ # trailing whitespaces are ignored
+ (r'[ ]+$', Text),
+ # line breaks are ignored
+ (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
+ # other whitespaces are a part of the value
+ (r'[ ]+', Literal.Scalar.Plain),
+ # regular non-whitespace characters
+ (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+', Literal.Scalar.Plain),
+ ],
+
+ # a plain scalar is the flow context
+ 'plain-scalar-in-flow-context': [
+ # the scalar ends with an indicator character
+ (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
+ # the scalar ends with a comment
+ (r'[ ]+(?=#)', Text, '#pop'),
+ # leading and trailing whitespaces are ignored
+ (r'^[ ]+', Text),
+ (r'[ ]+$', Text),
+ # line breaks are ignored
+ (r'\n+', Text),
+ # other whitespaces are a part of the value
+ (r'[ ]+', Name.Variable),
+ # regular non-whitespace characters
+ (r'[^ \t\n\r\f\v,:?\[\]{}]+', Name.Variable),
+ ],
+
+ }
+
+ def get_tokens_unprocessed(self, text=None, context=None):
+ if context is None:
+ context = YamlLexerContext(text, 0)
+ return super(YamlLexer, self).get_tokens_unprocessed(text, context)
+
+
+class JsonLexer(RegexLexer):
+ """
+ For JSON data structures.
+
+ .. versionadded:: 1.5
+ """
+
+ name = 'JSON'
+ aliases = ['json']
+ filenames = ['*.json']
+ mimetypes = ['application/json']
+
+ flags = re.DOTALL
+
+ # integer part of a number
+ int_part = r'-?(0|[1-9]\d*)'
+
+ # fractional part of a number
+ frac_part = r'\.\d+'
+
+ # exponential part of a number
+ exp_part = r'[eE](\+|-)?\d+'
+
+ tokens = {
+ 'whitespace': [
+ (r'\s+', Text),
+ ],
+
+ # represents a simple terminal value
+ 'simplevalue': [
+ (r'(true|false|null)\b', Keyword.Constant),
+ (('%(int_part)s(%(frac_part)s%(exp_part)s|'
+ '%(exp_part)s|%(frac_part)s)') % vars(),
+ Number.Float),
+ (int_part, Number.Integer),
+ (r'"(\\\\|\\"|[^"])*"', String.Double),
+ ],
+
+
+ # the right hand side of an object, after the attribute name
+ 'objectattribute': [
+ include('value'),
+ (r':', Punctuation),
+ # comma terminates the attribute but expects more
+ (r',', Punctuation, '#pop'),
+ # a closing bracket terminates the entire object, so pop twice
+ (r'}', Punctuation, ('#pop', '#pop')),
+ ],
+
+ # a json object - { attr, attr, ... }
+ 'objectvalue': [
+ include('whitespace'),
+ (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'),
+ (r'}', Punctuation, '#pop'),
+ ],
+
+ # json array - [ value, value, ... }
+ 'arrayvalue': [
+ include('whitespace'),
+ include('value'),
+ (r',', Punctuation),
+ (r']', Punctuation, '#pop'),
+ ],
+
+ # a json value - either a simple value or a complex value (object or array)
+ 'value': [
+ include('whitespace'),
+ include('simplevalue'),
+ (r'{', Punctuation, 'objectvalue'),
+ (r'\[', Punctuation, 'arrayvalue'),
+ ],
+
+ # the root of a json document whould be a value
+ 'root': [
+ include('value'),
+ ],
+ }