1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
"""
RDF- and RDFlib-centric file and URL path utilities.
"""
from os.path import splitext
def uri_leaf(uri):
"""
Get the "leaf" - fragment id or last segment - of a URI. Useful e.g. for
getting a term from a "namespace like" URI. Examples:
>>> uri_leaf('http://example.org/ns/things#item')
'item'
>>> uri_leaf('http://example.org/ns/stuff/item')
'item'
>>> uri_leaf('http://example.org/ns/stuff/')
>>>
>>> uri_leaf('urn:example.org:stuff')
'stuff'
>>> uri_leaf('example.org')
>>>
"""
for char in ('#', '/', ':'):
if uri.endswith(char):
break
# base, sep, leaf = uri.rpartition(char)
if char in uri:
sep = char
leaf = uri.rsplit(char)[-1]
else:
sep = ''
leaf = uri
if sep and leaf:
return leaf
SUFFIX_FORMAT_MAP = {
'rdf': 'xml',
'rdfs': 'xml',
'owl': 'xml',
'n3': 'n3',
'ttl': 'n3',
'nt': 'nt',
'trix': 'trix',
'xhtml': 'rdfa',
'html': 'rdfa',
'svg': 'rdfa',
'nq': 'nquads',
'trig': 'trig'
}
def guess_format(fpath, fmap=None):
"""
Guess RDF serialization based on file suffix. Uses
``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples:
>>> guess_format('path/to/file.rdf')
'xml'
>>> guess_format('path/to/file.owl')
'xml'
>>> guess_format('path/to/file.ttl')
'n3'
>>> guess_format('path/to/file.xhtml')
'rdfa'
>>> guess_format('path/to/file.svg')
'rdfa'
>>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'})
'grddl'
This also works with just the suffixes, with or without leading dot, and
regardless of letter case::
>>> guess_format('.rdf')
'xml'
>>> guess_format('rdf')
'xml'
>>> guess_format('RDF')
'xml'
"""
fmap = fmap or SUFFIX_FORMAT_MAP
return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower())
def _get_ext(fpath, lower=True):
"""
Gets the file extension from a file(path); stripped of leading '.' and in
lower case. Examples:
>>> _get_ext("path/to/file.txt")
'txt'
>>> _get_ext("OTHER.PDF")
'pdf'
>>> _get_ext("noext")
''
>>> _get_ext(".rdf")
'rdf'
"""
ext = splitext(fpath)[-1]
if ext == '' and fpath.startswith("."):
ext = fpath
if lower:
ext = ext.lower()
if ext.startswith('.'):
ext = ext[1:]
return ext
|