1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
"""
This implements the Tab Separated SPARQL Result Format
It is implemented with pyparsing, reusing the elements from the SPARQL Parser
"""
import codecs
from pyparsing import (
Optional, ZeroOrMore, Literal, ParserElement, ParseException, Suppress,
FollowedBy, LineEnd)
from rdflib.query import Result, ResultParser
from rdflib.plugins.sparql.parser import (
Var, STRING_LITERAL1, STRING_LITERAL2, IRIREF, BLANK_NODE_LABEL,
NumericLiteral, BooleanLiteral, LANGTAG)
from rdflib.plugins.sparql.parserutils import Comp, Param, CompValue
from rdflib import Literal as RDFLiteral
ParserElement.setDefaultWhitespaceChars(" \n")
String = STRING_LITERAL1 | STRING_LITERAL2
RDFLITERAL = Comp('literal', Param('string', String) + Optional(
Param('lang', LANGTAG.leaveWhitespace()
) | Literal('^^').leaveWhitespace(
) + Param('datatype', IRIREF).leaveWhitespace()))
NONE_VALUE = object()
EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
EMPTY.setParseAction(lambda x: NONE_VALUE)
TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral
ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
ROW.parseWithTabs()
HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
HEADER.parseWithTabs()
class TSVResultParser(ResultParser):
def parse(self, source, content_type=None):
if isinstance(source.read(0), bytes):
# if reading from source returns bytes do utf-8 decoding
source = codecs.getreader('utf-8')(source)
try:
r = Result('SELECT')
header = source.readline()
r.vars = list(HEADER.parseString(header.strip(), parseAll=True))
r.bindings = []
while True:
line = source.readline()
if not line:
break
line = line.strip('\n')
if line == "":
continue
row = ROW.parseString(line, parseAll=True)
r.bindings.append(
dict(zip(r.vars, (self.convertTerm(x) for x in row))))
return r
except ParseException as err:
print(err.line)
print(" " * (err.column - 1) + "^")
print(err)
def convertTerm(self, t):
if t is NONE_VALUE:
return None
if isinstance(t, CompValue):
if t.name == 'literal':
return RDFLiteral(t.string, lang=t.lang, datatype=t.datatype)
else:
raise Exception("I dont know how to handle this: %s" % (t,))
else:
return t
if __name__ == '__main__':
import sys
r = Result.parse(file(sys.argv[1]), format='tsv')
print(r.vars)
print(r.bindings)
# print r.serialize(format='json')
|