summaryrefslogtreecommitdiff
path: root/rdflib/plugins/sparql/results/tsvresults.py
blob: 1395eaff9461b4721a53479edbed69fa43eaeef9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97

"""
This implements the Tab Separated SPARQL Result Format

It is implemented with pyparsing, reusing the elements from the SPARQL Parser
"""

import codecs

from pyparsing import (
    Optional, ZeroOrMore, Literal, ParserElement, ParseException, Suppress,
    FollowedBy, LineEnd)

from rdflib.query import Result, ResultParser

from rdflib.plugins.sparql.parser import (
    Var, STRING_LITERAL1, STRING_LITERAL2, IRIREF, BLANK_NODE_LABEL,
    NumericLiteral, BooleanLiteral, LANGTAG)
from rdflib.plugins.sparql.parserutils import Comp, Param, CompValue

from rdflib import Literal as RDFLiteral

ParserElement.setDefaultWhitespaceChars(" \n")


String = STRING_LITERAL1 | STRING_LITERAL2

RDFLITERAL = Comp('literal', Param('string', String) + Optional(
    Param('lang', LANGTAG.leaveWhitespace()
          ) | Literal('^^').leaveWhitespace(
    ) + Param('datatype', IRIREF).leaveWhitespace()))

NONE_VALUE = object()

EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
EMPTY.setParseAction(lambda x: NONE_VALUE)

TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral

ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
ROW.parseWithTabs()

HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
HEADER.parseWithTabs()


class TSVResultParser(ResultParser):
    def parse(self, source, content_type=None):

        if isinstance(source.read(0), bytes):
            # if reading from source returns bytes do utf-8 decoding
            source = codecs.getreader('utf-8')(source)

        try:
            r = Result('SELECT')

            header = source.readline()

            r.vars = list(HEADER.parseString(header.strip(), parseAll=True))
            r.bindings = []
            while True:
                line = source.readline()
                if not line:
                    break
                line = line.strip('\n')
                if line == "":
                    continue

                row = ROW.parseString(line, parseAll=True)
                r.bindings.append(
                    dict(zip(r.vars, (self.convertTerm(x) for x in row))))

            return r

        except ParseException as err:
            print(err.line)
            print(" " * (err.column - 1) + "^")
            print(err)

    def convertTerm(self, t):
        if t is NONE_VALUE:
            return None
        if isinstance(t, CompValue):
            if t.name == 'literal':
                return RDFLiteral(t.string, lang=t.lang, datatype=t.datatype)
            else:
                raise Exception("I dont know how to handle this: %s" % (t,))
        else:
            return t


if __name__ == '__main__':
    import sys
    r = Result.parse(file(sys.argv[1]), format='tsv')
    print(r.vars)
    print(r.bindings)
    # print r.serialize(format='json')