summaryrefslogtreecommitdiff
path: root/Python
diff options
context:
space:
mode:
authorDennis Sweeney <36520290+sweeneyde@users.noreply.github.com>2021-12-03 04:29:12 -0700
committerGitHub <noreply@github.com>2021-12-03 11:29:12 +0000
commit03768c4d139df46212a091ed931aad03bec18b57 (patch)
tree35ab5e03aacded970ee0b1d21077f18782176c08 /Python
parent99fcf1505218464c489d419d4500f126b6d6dc28 (diff)
downloadcpython-git-03768c4d139df46212a091ed931aad03bec18b57.tar.gz
bpo-45885: Specialize COMPARE_OP (GH-29734)
* Add COMPARE_OP_ADAPTIVE adaptive instruction. * Add COMPARE_OP_FLOAT_JUMP, COMPARE_OP_INT_JUMP and COMPARE_OP_STR_JUMP specialized instructions. * Introduce and use _PyUnicode_Equal
Diffstat (limited to 'Python')
-rw-r--r--Python/ceval.c122
-rw-r--r--Python/opcode_targets.h46
-rw-r--r--Python/specialize.c79
3 files changed, 224 insertions, 23 deletions
diff --git a/Python/ceval.c b/Python/ceval.c
index 97c684479a..05897c561a 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -3778,6 +3778,8 @@ check_eval_breaker:
}
TARGET(COMPARE_OP) {
+ PREDICTED(COMPARE_OP);
+ STAT_INC(COMPARE_OP, unquickened);
assert(oparg <= Py_GE);
PyObject *right = POP();
PyObject *left = TOP();
@@ -3792,6 +3794,125 @@ check_eval_breaker:
DISPATCH();
}
+ TARGET(COMPARE_OP_ADAPTIVE) {
+ assert(cframe.use_tracing == 0);
+ SpecializedCacheEntry *cache = GET_CACHE();
+ if (cache->adaptive.counter == 0) {
+ PyObject *right = TOP();
+ PyObject *left = SECOND();
+ next_instr--;
+ _Py_Specialize_CompareOp(left, right, next_instr, cache);
+ DISPATCH();
+ }
+ else {
+ STAT_INC(COMPARE_OP, deferred);
+ cache->adaptive.counter--;
+ oparg = cache->adaptive.original_oparg;
+ STAT_DEC(COMPARE_OP, unquickened);
+ JUMP_TO_INSTRUCTION(COMPARE_OP);
+ }
+ }
+
+ TARGET(COMPARE_OP_FLOAT_JUMP) {
+ assert(cframe.use_tracing == 0);
+ // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false)
+ SpecializedCacheEntry *caches = GET_CACHE();
+ int when_to_jump_mask = caches[0].adaptive.index;
+ PyObject *right = TOP();
+ PyObject *left = SECOND();
+ DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP);
+ DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP);
+ double dleft = PyFloat_AS_DOUBLE(left);
+ double dright = PyFloat_AS_DOUBLE(right);
+ int sign = (dleft > dright) - (dleft < dright);
+ DEOPT_IF(isnan(dleft), COMPARE_OP);
+ DEOPT_IF(isnan(dright), COMPARE_OP);
+ STAT_INC(COMPARE_OP, hit);
+ NEXTOPARG();
+ STACK_SHRINK(2);
+ Py_DECREF(left);
+ Py_DECREF(right);
+ assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
+ int jump = (1 << (sign + 1)) & when_to_jump_mask;
+ if (!jump) {
+ next_instr++;
+ NOTRACE_DISPATCH();
+ }
+ else {
+ JUMPTO(oparg);
+ CHECK_EVAL_BREAKER();
+ NOTRACE_DISPATCH();
+ }
+ }
+
+ TARGET(COMPARE_OP_INT_JUMP) {
+ assert(cframe.use_tracing == 0);
+ // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false)
+ SpecializedCacheEntry *caches = GET_CACHE();
+ int when_to_jump_mask = caches[0].adaptive.index;
+ PyObject *right = TOP();
+ PyObject *left = SECOND();
+ DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP);
+ DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP);
+ DEOPT_IF((size_t)(Py_SIZE(left) + 1) > 2, COMPARE_OP);
+ DEOPT_IF((size_t)(Py_SIZE(right) + 1) > 2, COMPARE_OP);
+ STAT_INC(COMPARE_OP, hit);
+ assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1);
+ Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0];
+ Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0];
+ int sign = (ileft > iright) - (ileft < iright);
+ NEXTOPARG();
+ STACK_SHRINK(2);
+ Py_DECREF(left);
+ Py_DECREF(right);
+ assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
+ int jump = (1 << (sign + 1)) & when_to_jump_mask;
+ if (!jump) {
+ next_instr++;
+ NOTRACE_DISPATCH();
+ }
+ else {
+ JUMPTO(oparg);
+ CHECK_EVAL_BREAKER();
+ NOTRACE_DISPATCH();
+ }
+ }
+
+ TARGET(COMPARE_OP_STR_JUMP) {
+ assert(cframe.use_tracing == 0);
+ // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false)
+ SpecializedCacheEntry *caches = GET_CACHE();
+ int invert = caches[0].adaptive.index;
+ PyObject *right = TOP();
+ PyObject *left = SECOND();
+ DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP);
+ DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP);
+ STAT_INC(COMPARE_OP, hit);
+ int res = _PyUnicode_Equal(left, right);
+ if (res < 0) {
+ goto error;
+ }
+ assert(caches[0].adaptive.original_oparg == Py_EQ ||
+ caches[0].adaptive.original_oparg == Py_NE);
+ NEXTOPARG();
+ assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE);
+ STACK_SHRINK(2);
+ Py_DECREF(left);
+ Py_DECREF(right);
+ assert(res == 0 || res == 1);
+ assert(invert == 0 || invert == 1);
+ int jump = res ^ invert;
+ if (!jump) {
+ next_instr++;
+ NOTRACE_DISPATCH();
+ }
+ else {
+ JUMPTO(oparg);
+ CHECK_EVAL_BREAKER();
+ NOTRACE_DISPATCH();
+ }
+ }
+
TARGET(IS_OP) {
PyObject *right = POP();
PyObject *left = TOP();
@@ -5083,6 +5204,7 @@ MISS_WITH_CACHE(LOAD_GLOBAL)
MISS_WITH_CACHE(LOAD_METHOD)
MISS_WITH_CACHE(CALL_FUNCTION)
MISS_WITH_CACHE(BINARY_OP)
+MISS_WITH_CACHE(COMPARE_OP)
MISS_WITH_CACHE(BINARY_SUBSCR)
MISS_WITH_OPARG_COUNTER(STORE_SUBSCR)
diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h
index c9d430d268..872a688311 100644
--- a/Python/opcode_targets.h
+++ b/Python/opcode_targets.h
@@ -20,23 +20,27 @@ static void *opcode_targets[256] = {
&&TARGET_BINARY_OP_MULTIPLY_FLOAT,
&&TARGET_BINARY_OP_SUBTRACT_INT,
&&TARGET_BINARY_OP_SUBTRACT_FLOAT,
+ &&TARGET_COMPARE_OP_ADAPTIVE,
+ &&TARGET_COMPARE_OP_FLOAT_JUMP,
+ &&TARGET_COMPARE_OP_INT_JUMP,
+ &&TARGET_COMPARE_OP_STR_JUMP,
+ &&TARGET_BINARY_SUBSCR,
&&TARGET_BINARY_SUBSCR_ADAPTIVE,
&&TARGET_BINARY_SUBSCR_GETITEM,
&&TARGET_BINARY_SUBSCR_LIST_INT,
&&TARGET_BINARY_SUBSCR_TUPLE_INT,
- &&TARGET_BINARY_SUBSCR,
- &&TARGET_BINARY_SUBSCR_DICT,
- &&TARGET_STORE_SUBSCR_ADAPTIVE,
- &&TARGET_STORE_SUBSCR_LIST_INT,
- &&TARGET_STORE_SUBSCR_DICT,
&&TARGET_GET_LEN,
&&TARGET_MATCH_MAPPING,
&&TARGET_MATCH_SEQUENCE,
&&TARGET_MATCH_KEYS,
- &&TARGET_CALL_FUNCTION_ADAPTIVE,
+ &&TARGET_BINARY_SUBSCR_DICT,
&&TARGET_PUSH_EXC_INFO,
- &&TARGET_CALL_FUNCTION_BUILTIN_O,
+ &&TARGET_STORE_SUBSCR_ADAPTIVE,
&&TARGET_POP_EXCEPT_AND_RERAISE,
+ &&TARGET_STORE_SUBSCR_LIST_INT,
+ &&TARGET_STORE_SUBSCR_DICT,
+ &&TARGET_CALL_FUNCTION_ADAPTIVE,
+ &&TARGET_CALL_FUNCTION_BUILTIN_O,
&&TARGET_CALL_FUNCTION_BUILTIN_FAST,
&&TARGET_CALL_FUNCTION_LEN,
&&TARGET_CALL_FUNCTION_ISINSTANCE,
@@ -44,29 +48,25 @@ static void *opcode_targets[256] = {
&&TARGET_JUMP_ABSOLUTE_QUICK,
&&TARGET_LOAD_ATTR_ADAPTIVE,
&&TARGET_LOAD_ATTR_INSTANCE_VALUE,
- &&TARGET_LOAD_ATTR_WITH_HINT,
- &&TARGET_LOAD_ATTR_SLOT,
- &&TARGET_LOAD_ATTR_MODULE,
- &&TARGET_LOAD_GLOBAL_ADAPTIVE,
&&TARGET_WITH_EXCEPT_START,
&&TARGET_GET_AITER,
&&TARGET_GET_ANEXT,
&&TARGET_BEFORE_ASYNC_WITH,
&&TARGET_BEFORE_WITH,
&&TARGET_END_ASYNC_FOR,
+ &&TARGET_LOAD_ATTR_WITH_HINT,
+ &&TARGET_LOAD_ATTR_SLOT,
+ &&TARGET_LOAD_ATTR_MODULE,
+ &&TARGET_LOAD_GLOBAL_ADAPTIVE,
&&TARGET_LOAD_GLOBAL_MODULE,
+ &&TARGET_STORE_SUBSCR,
+ &&TARGET_DELETE_SUBSCR,
&&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_LOAD_METHOD_ADAPTIVE,
&&TARGET_LOAD_METHOD_CACHED,
&&TARGET_LOAD_METHOD_CLASS,
- &&TARGET_STORE_SUBSCR,
- &&TARGET_DELETE_SUBSCR,
&&TARGET_LOAD_METHOD_MODULE,
&&TARGET_LOAD_METHOD_NO_DICT,
- &&TARGET_STORE_ATTR_ADAPTIVE,
- &&TARGET_STORE_ATTR_INSTANCE_VALUE,
- &&TARGET_STORE_ATTR_SLOT,
- &&TARGET_STORE_ATTR_WITH_HINT,
&&TARGET_GET_ITER,
&&TARGET_GET_YIELD_FROM_ITER,
&&TARGET_PRINT_EXPR,
@@ -74,20 +74,20 @@ static void *opcode_targets[256] = {
&&TARGET_YIELD_FROM,
&&TARGET_GET_AWAITABLE,
&&TARGET_LOAD_ASSERTION_ERROR,
+ &&TARGET_STORE_ATTR_ADAPTIVE,
+ &&TARGET_STORE_ATTR_INSTANCE_VALUE,
+ &&TARGET_STORE_ATTR_SLOT,
+ &&TARGET_STORE_ATTR_WITH_HINT,
&&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_STORE_FAST__LOAD_FAST,
&&TARGET_LOAD_FAST__LOAD_CONST,
- &&TARGET_LOAD_CONST__LOAD_FAST,
- &&TARGET_STORE_FAST__STORE_FAST,
- &&_unknown_opcode,
- &&_unknown_opcode,
&&TARGET_LIST_TO_TUPLE,
&&TARGET_RETURN_VALUE,
&&TARGET_IMPORT_STAR,
&&TARGET_SETUP_ANNOTATIONS,
&&TARGET_YIELD_VALUE,
- &&_unknown_opcode,
- &&_unknown_opcode,
+ &&TARGET_LOAD_CONST__LOAD_FAST,
+ &&TARGET_STORE_FAST__STORE_FAST,
&&TARGET_POP_EXCEPT,
&&TARGET_STORE_NAME,
&&TARGET_DELETE_NAME,
diff --git a/Python/specialize.c b/Python/specialize.c
index f5f12139df..b384675560 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -129,6 +129,7 @@ _Py_GetSpecializationStats(void) {
err += add_stat_dict(stats, STORE_ATTR, "store_attr");
err += add_stat_dict(stats, CALL_FUNCTION, "call_function");
err += add_stat_dict(stats, BINARY_OP, "binary_op");
+ err += add_stat_dict(stats, COMPARE_OP, "compare_op");
if (err < 0) {
Py_DECREF(stats);
return NULL;
@@ -187,6 +188,7 @@ _Py_PrintSpecializationStats(void)
print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr");
print_stats(out, &_specialization_stats[CALL_FUNCTION], "call_function");
print_stats(out, &_specialization_stats[BINARY_OP], "binary_op");
+ print_stats(out, &_specialization_stats[COMPARE_OP], "compare_op");
if (out != stderr) {
fclose(out);
}
@@ -239,6 +241,7 @@ static uint8_t adaptive_opcodes[256] = {
[CALL_FUNCTION] = CALL_FUNCTION_ADAPTIVE,
[STORE_ATTR] = STORE_ATTR_ADAPTIVE,
[BINARY_OP] = BINARY_OP_ADAPTIVE,
+ [COMPARE_OP] = COMPARE_OP_ADAPTIVE,
};
/* The number of cache entries required for a "family" of instructions. */
@@ -251,6 +254,7 @@ static uint8_t cache_requirements[256] = {
[CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[BINARY_OP] = 1, // _PyAdaptiveEntry
+ [COMPARE_OP] = 1, /* _PyAdaptiveEntry */
};
/* Return the oparg for the cache_offset and instruction index.
@@ -487,6 +491,10 @@ initial_counter_value(void) {
#define SPEC_FAIL_BAD_CALL_FLAGS 17
#define SPEC_FAIL_CLASS 18
+/* COMPARE_OP */
+#define SPEC_FAIL_STRING_COMPARE 13
+#define SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP 14
+#define SPEC_FAIL_BIG_INT 15
static int
specialize_module_load_attr(
@@ -1536,3 +1544,74 @@ success:
STAT_INC(BINARY_OP, specialization_success);
adaptive->counter = initial_counter_value();
}
+
+static int compare_masks[] = {
+ // 1-bit: jump if less than
+ // 2-bit: jump if equal
+ // 4-bit: jump if greater
+ [Py_LT] = 1 | 0 | 0,
+ [Py_LE] = 1 | 2 | 0,
+ [Py_EQ] = 0 | 2 | 0,
+ [Py_NE] = 1 | 0 | 4,
+ [Py_GT] = 0 | 0 | 4,
+ [Py_GE] = 0 | 2 | 4,
+};
+
+void
+_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
+ _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
+{
+ _PyAdaptiveEntry *adaptive = &cache->adaptive;
+ int op = adaptive->original_oparg;
+ int next_opcode = _Py_OPCODE(instr[1]);
+ if (next_opcode != POP_JUMP_IF_FALSE && next_opcode != POP_JUMP_IF_TRUE) {
+ // Can't ever combine, so don't don't bother being adaptive.
+ SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP);
+ *instr = _Py_MAKECODEUNIT(COMPARE_OP, adaptive->original_oparg);
+ goto failure;
+ }
+ assert(op <= Py_GE);
+ int when_to_jump_mask = compare_masks[op];
+ if (next_opcode == POP_JUMP_IF_FALSE) {
+ when_to_jump_mask = (1 | 2 | 4) & ~when_to_jump_mask;
+ }
+ if (Py_TYPE(lhs) != Py_TYPE(rhs)) {
+ SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_DIFFERENT_TYPES);
+ goto failure;
+ }
+ if (PyFloat_CheckExact(lhs)) {
+ *instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT_JUMP, _Py_OPARG(*instr));
+ adaptive->index = when_to_jump_mask;
+ goto success;
+ }
+ if (PyLong_CheckExact(lhs)) {
+ if (Py_ABS(Py_SIZE(lhs)) <= 1 && Py_ABS(Py_SIZE(rhs)) <= 1) {
+ *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, _Py_OPARG(*instr));
+ adaptive->index = when_to_jump_mask;
+ goto success;
+ }
+ else {
+ SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_BIG_INT);
+ goto failure;
+ }
+ }
+ if (PyUnicode_CheckExact(lhs)) {
+ if (op != Py_EQ && op != Py_NE) {
+ SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_STRING_COMPARE);
+ goto failure;
+ }
+ else {
+ *instr = _Py_MAKECODEUNIT(COMPARE_OP_STR_JUMP, _Py_OPARG(*instr));
+ adaptive->index = (when_to_jump_mask & 2) == 0;
+ goto success;
+ }
+ }
+ SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_OTHER);
+failure:
+ STAT_INC(COMPARE_OP, specialization_failure);
+ cache_backoff(adaptive);
+ return;
+success:
+ STAT_INC(COMPARE_OP, specialization_success);
+ adaptive->counter = initial_counter_value();
+}