summaryrefslogtreecommitdiff
path: root/jstests/aggregation/expressions/percentile_expression_approx.js
blob: 34673489fdca442f74e697538ed9672e0e0fba42 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/**
 * Tests for the approximate percentile expression semantics.
 * @tags: [
 *   requires_fcv_70,
 *   featureFlagApproxPercentiles
 * ]
 */
(function() {
"use strict";

load("jstests/aggregation/extras/utils.js");

const coll = db[jsTestName()];

function testWithProject({doc, percentileSpec, expectedResult, msg}) {
    coll.drop();
    coll.insert(doc);
    const res = coll.aggregate([{$project: {p: percentileSpec}}]).toArray();
    // For $percentile the result should be ordered to match the spec, so assert exact equality.
    assert.eq(expectedResult, res[0].p, msg + ` result: ${tojson(res)}`);
}

/**
 * Tests with input as a single expression which evaluates to an array.
 */
testWithProject({
    doc: {x: [0, "non-numeric", 1, 2]},
    percentileSpec: {$percentile: {p: [0.5], input: "$x", method: "approximate"}},
    expectedResult: [1],
    msg: "Non-numeric data in input field which evaluates to an array should be ignored"
});

testWithProject({
    doc: {x: [10, 5, 27], x1: 5},
    percentileSpec: {$percentile: {p: [0], input: "$x", method: "approximate"}},
    expectedResult: [5],
    msg: "Minimun percentile"
});

testWithProject({
    doc: {x: [0, 1, 2]},
    percentileSpec: {$percentile: {p: [0.5, 0.9, 0.1], input: "$x", method: "approximate"}},
    expectedResult: [1, 2, 0],
    msg: "Multiple percentiles when input field evaluates to an array"
});

/**
 * Tests with input as an array of expressions.
 */
testWithProject({
    doc: {x: 0, x1: "non-numeric", x2: 1, x3: 2, x4: [2, 2, 2]},
    percentileSpec:
        {$percentile: {p: [0.5], input: ["$x", "$x1", "$x2", "$x3", "$x4"], method: "approximate"}},
    expectedResult: [1],
    msg: "Non-numeric data in input field passed in as an array should be ignored"
});

testWithProject({
    doc: {x: "non-numeric"},
    percentileSpec: {$percentile: {p: [0.5], input: ["$x"], method: "approximate"}},
    expectedResult: [null],
    msg: "Percentile of completely non-numeric data when input field is an array"
});

testWithProject({
    doc: {x: "non-numeric", x1: "also non-numeric", x2: [1, 2, 3]},
    percentileSpec:
        {$percentile: {p: [0.5, 0.9], input: ["$x", "$x1", "$x2"], method: "approximate"}},
    expectedResult: [null, null],
    msg: "Multiple percentiles of completely non-numeric data in input field passed as an array"
});

testWithProject({
    doc: {x: 10, x1: 5, x2: 27},
    percentileSpec: {$percentile: {p: [1], input: ["$x", "$x1", "$x2"], method: "approximate"}},
    expectedResult: [27],
    msg: "Maximum percentile"
});

testWithProject({
    doc: {x: 0, x1: 1, x2: 2},
    percentileSpec:
        {$percentile: {p: [0.5, 0.9, 0.1], input: ["$x", "$x1", "$x2"], method: "approximate"}},
    expectedResult: [1, 2, 0],
    msg: "Multiple percentiles when input field is passed as an array"
});

/**
 * Tests with input as a single expression.
 */
testWithProject({
    doc: {x: 0, x1: 1, x2: 2},
    percentileSpec: {$percentile: {p: [0.5, 0.9, 0.1], input: "$x", method: "approximate"}},
    expectedResult: [0, 0, 0],
    msg: "Multiple percentiles when single input expression resolves to a numeric scalar"
});

testWithProject({
    doc: {x: 0, x1: "non-numeric", x2: 2},
    percentileSpec: {$percentile: {p: [0.5, 0.9, 0.1], input: "$x1", method: "approximate"}},
    expectedResult: [null, null, null],
    msg: "Multiple percentiles when single input expression resolves to a non-numeric scalar"
});

/**
 * 'rand()' generates a uniform distribution from [0.0, 1.0] so we can check accuracy of the result
 * in terms of values rather than in terms of rank.
 */
(function testLargeInput() {
    Random.setRandomSeed(20230406);

    const n = 100000;
    let samples = [];
    for (let i = 0; i < n; i++) {
        samples.push(Random.rand());
    }
    let sortedSamples = [].concat(samples);
    sortedSamples.sort((a, b) => a - b);

    coll.drop();
    coll.insert({x: samples});
    const ps = [0.5, 0.999, 0.0001];
    const res =
        coll.aggregate(
                [{$project: {p: {$percentile: {p: ps, input: "$x", method: "approximate"}}}}])
            .toArray();

    for (let i = 0; i < ps.length; i++) {
        let pctl = res[0].p[i];
        assert.lt(ps[i] - 0.01, pctl, `p = ${ps[i]} left bound`);
        assert.lt(pctl, ps[i] + 0.01, `p = ${ps[i]} right bound`);
    }
})();

(function testLargeNonNumericInput() {
    const n = 100000;
    let samples = [];
    for (let i = 0; i < n; i++) {
        samples.push([i]);
    }

    testWithProject({
        doc: {x: samples},
        percentileSpec: {$percentile: {p: [0.5, 0.9, 0.1], input: "$x", method: "approximate"}},
        expectedResult: [null, null, null],
        msg: "Multiple percentiles on large non-numeric input"
    });
})();
})();