summaryrefslogtreecommitdiff
path: root/rdflib/extras/utils/pathutils.py
blob: a547411a63c7e89dbddc9e33e713ba10cc723d94 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
RDF- and RDFlib-centric file and URL path utilities.
"""

from os.path import splitext


def uri_leaf(uri):
    """
    Get the "leaf" - fragment id or last segment - of a URI. Useful e.g. for
    getting a term from a "namespace like" URI. Examples:

        >>> uri_leaf('http://example.org/ns/things#item')
        'item'
        >>> uri_leaf('http://example.org/ns/stuff/item')
        'item'
        >>> uri_leaf('http://example.org/ns/stuff/')
        >>>
        >>> uri_leaf('urn:example.org:stuff')
        'stuff'
        >>> uri_leaf('example.org')
        >>>
    """
    for char in ('#', '/', ':'):
        if uri.endswith(char):
            break
        # base, sep, leaf = uri.rpartition(char)
        if char in uri:
            sep = char
            leaf = uri.rsplit(char)[-1]
        else:
            sep = ''
            leaf = uri
        if sep and leaf:
            return leaf


SUFFIX_FORMAT_MAP = {
    'rdf': 'xml',
    'rdfs': 'xml',
    'owl': 'xml',
    'n3': 'n3',
    'ttl': 'n3',
    'nt': 'nt',
    'trix': 'trix',
    'xhtml': 'rdfa',
    'html': 'rdfa',
    'svg': 'rdfa',
    'nq': 'nquads',
    'trig': 'trig'
}


def guess_format(fpath, fmap=None):
    """
    Guess RDF serialization based on file suffix. Uses
    ``SUFFIX_FORMAT_MAP`` unless ``fmap`` is provided. Examples:

        >>> guess_format('path/to/file.rdf')
        'xml'
        >>> guess_format('path/to/file.owl')
        'xml'
        >>> guess_format('path/to/file.ttl')
        'n3'
        >>> guess_format('path/to/file.xhtml')
        'rdfa'
        >>> guess_format('path/to/file.svg')
        'rdfa'
        >>> guess_format('path/to/file.xhtml', {'xhtml': 'grddl'})
        'grddl'

    This also works with just the suffixes, with or without leading dot, and
    regardless of letter case::

        >>> guess_format('.rdf')
        'xml'
        >>> guess_format('rdf')
        'xml'
        >>> guess_format('RDF')
        'xml'
    """
    fmap = fmap or SUFFIX_FORMAT_MAP
    return fmap.get(_get_ext(fpath)) or fmap.get(fpath.lower())


def _get_ext(fpath, lower=True):
    """
    Gets the file extension from a file(path); stripped of leading '.' and in
    lower case. Examples:

        >>> _get_ext("path/to/file.txt")
        'txt'
        >>> _get_ext("OTHER.PDF")
        'pdf'
        >>> _get_ext("noext")
        ''
        >>> _get_ext(".rdf")
        'rdf'
    """
    ext = splitext(fpath)[-1]
    if ext == '' and fpath.startswith("."):
        ext = fpath
    if lower:
        ext = ext.lower()
    if ext.startswith('.'):
        ext = ext[1:]
    return ext