test/test_sparql_agg_distinct.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

from rdflib import Graph

query_tpl = '''
SELECT ?x (MIN(?y_) as ?y) (%s(DISTINCT ?z_) as ?z) {
  VALUES (?x ?y_ ?z_) {
    ("x1" 10 1)
    ("x1" 11 1)
    ("x2" 20 2)
  }
} GROUP BY ?x ORDER BY ?x
'''

def test_group_concat_distinct():
    g = Graph()
    results = g.query(query_tpl % 'GROUP_CONCAT')
    results = [ [ lit.toPython() for lit in line ] for line in results ]

    # this is the tricky part
    assert results[0][2] == "1", results[0][2]

    # still check the whole result, to be on the safe side
    assert results == [
        ["x1", 10, "1"],
        ["x2", 20, "2"],
    ], results

def test_sum_distinct():
    g = Graph()
    results = g.query(query_tpl % 'SUM')
    results = [ [ lit.toPython() for lit in line ] for line in results ]

    # this is the tricky part
    assert results[0][2] == 1, results[0][2]

    # still check the whole result, to be on the safe side
    assert results == [
        ["x1", 10, 1],
        ["x2", 20, 2],
    ], results

def test_avg_distinct():
    g = Graph()
    results = g.query("""
        SELECT ?x (MIN(?y_) as ?y) (AVG(DISTINCT ?z_) as ?z) {
          VALUES (?x ?y_ ?z_) {
            ("x1" 10 1)
            ("x1" 11 1)
            ("x1" 12 3)
            ("x2" 20 2)
          }
       } GROUP BY ?x ORDER BY ?x
    """)
    results = [ [ lit.toPython() for lit in line ] for line in results ]

    # this is the tricky part
    assert results[0][2] == 2, results[0][2]

    # still check the whole result, to be on the safe side
    assert results == [
        ["x1", 10, 2],
        ["x2", 20, 2],
    ], results

def test_count_distinct():
    g = Graph()
    
    g.parse(format="turtle", publicID="http://example.org/", data="""
    @prefix : <> .

    <#a>
      :knows <#b>, <#c> ;
      :age 42 .

    <#b>
      :knows <#a>, <#c> ;
      :age 36 .

    <#c>
      :knows <#b>, <#c> ;
      :age 20 .

    """)


    # Query 1: people knowing someone younger
    results = g.query("""
    PREFIX : <http://example.org/>

    SELECT DISTINCT ?x {
      ?x :age ?ax ; :knows [ :age ?ay ].
      FILTER( ?ax > ?ay )
    }
    """)
    assert len(results) == 2

    # nQuery 2: count people knowing someone younger
    results = g.query("""
    PREFIX : <http://example.org/>

    SELECT (COUNT(DISTINCT ?x) as ?cx) {
      ?x :age ?ax ; :knows [ :age ?ay ].
      FILTER( ?ax > ?ay )
    }
    """)
    assert list(results)[0][0].toPython() == 2