diff options
author | Dennis Sweeney <36520290+sweeneyde@users.noreply.github.com> | 2021-12-03 04:29:12 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-03 11:29:12 +0000 |
commit | 03768c4d139df46212a091ed931aad03bec18b57 (patch) | |
tree | 35ab5e03aacded970ee0b1d21077f18782176c08 /Python | |
parent | 99fcf1505218464c489d419d4500f126b6d6dc28 (diff) | |
download | cpython-git-03768c4d139df46212a091ed931aad03bec18b57.tar.gz |
bpo-45885: Specialize COMPARE_OP (GH-29734)
* Add COMPARE_OP_ADAPTIVE adaptive instruction.
* Add COMPARE_OP_FLOAT_JUMP, COMPARE_OP_INT_JUMP and COMPARE_OP_STR_JUMP specialized instructions.
* Introduce and use _PyUnicode_Equal
Diffstat (limited to 'Python')
-rw-r--r-- | Python/ceval.c | 122 | ||||
-rw-r--r-- | Python/opcode_targets.h | 46 | ||||
-rw-r--r-- | Python/specialize.c | 79 |
3 files changed, 224 insertions, 23 deletions
diff --git a/Python/ceval.c b/Python/ceval.c index 97c684479a..05897c561a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3778,6 +3778,8 @@ check_eval_breaker: } TARGET(COMPARE_OP) { + PREDICTED(COMPARE_OP); + STAT_INC(COMPARE_OP, unquickened); assert(oparg <= Py_GE); PyObject *right = POP(); PyObject *left = TOP(); @@ -3792,6 +3794,125 @@ check_eval_breaker: DISPATCH(); } + TARGET(COMPARE_OP_ADAPTIVE) { + assert(cframe.use_tracing == 0); + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { + PyObject *right = TOP(); + PyObject *left = SECOND(); + next_instr--; + _Py_Specialize_CompareOp(left, right, next_instr, cache); + DISPATCH(); + } + else { + STAT_INC(COMPARE_OP, deferred); + cache->adaptive.counter--; + oparg = cache->adaptive.original_oparg; + STAT_DEC(COMPARE_OP, unquickened); + JUMP_TO_INSTRUCTION(COMPARE_OP); + } + } + + TARGET(COMPARE_OP_FLOAT_JUMP) { + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) + SpecializedCacheEntry *caches = GET_CACHE(); + int when_to_jump_mask = caches[0].adaptive.index; + PyObject *right = TOP(); + PyObject *left = SECOND(); + DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); + double dleft = PyFloat_AS_DOUBLE(left); + double dright = PyFloat_AS_DOUBLE(right); + int sign = (dleft > dright) - (dleft < dright); + DEOPT_IF(isnan(dleft), COMPARE_OP); + DEOPT_IF(isnan(dright), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + NEXTOPARG(); + STACK_SHRINK(2); + Py_DECREF(left); + Py_DECREF(right); + assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); + int jump = (1 << (sign + 1)) & when_to_jump_mask; + if (!jump) { + next_instr++; + NOTRACE_DISPATCH(); + } + else { + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + NOTRACE_DISPATCH(); + } + } + + TARGET(COMPARE_OP_INT_JUMP) { + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) + SpecializedCacheEntry *caches = GET_CACHE(); + int when_to_jump_mask = caches[0].adaptive.index; + PyObject *right = TOP(); + PyObject *left = SECOND(); + DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); + DEOPT_IF((size_t)(Py_SIZE(left) + 1) > 2, COMPARE_OP); + DEOPT_IF((size_t)(Py_SIZE(right) + 1) > 2, COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1); + Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0]; + Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0]; + int sign = (ileft > iright) - (ileft < iright); + NEXTOPARG(); + STACK_SHRINK(2); + Py_DECREF(left); + Py_DECREF(right); + assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); + int jump = (1 << (sign + 1)) & when_to_jump_mask; + if (!jump) { + next_instr++; + NOTRACE_DISPATCH(); + } + else { + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + NOTRACE_DISPATCH(); + } + } + + TARGET(COMPARE_OP_STR_JUMP) { + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) + SpecializedCacheEntry *caches = GET_CACHE(); + int invert = caches[0].adaptive.index; + PyObject *right = TOP(); + PyObject *left = SECOND(); + DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + int res = _PyUnicode_Equal(left, right); + if (res < 0) { + goto error; + } + assert(caches[0].adaptive.original_oparg == Py_EQ || + caches[0].adaptive.original_oparg == Py_NE); + NEXTOPARG(); + assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); + STACK_SHRINK(2); + Py_DECREF(left); + Py_DECREF(right); + assert(res == 0 || res == 1); + assert(invert == 0 || invert == 1); + int jump = res ^ invert; + if (!jump) { + next_instr++; + NOTRACE_DISPATCH(); + } + else { + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + NOTRACE_DISPATCH(); + } + } + TARGET(IS_OP) { PyObject *right = POP(); PyObject *left = TOP(); @@ -5083,6 +5204,7 @@ MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(LOAD_METHOD) MISS_WITH_CACHE(CALL_FUNCTION) MISS_WITH_CACHE(BINARY_OP) +MISS_WITH_CACHE(COMPARE_OP) MISS_WITH_CACHE(BINARY_SUBSCR) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c9d430d268..872a688311 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -20,23 +20,27 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_OP_MULTIPLY_FLOAT, &&TARGET_BINARY_OP_SUBTRACT_INT, &&TARGET_BINARY_OP_SUBTRACT_FLOAT, + &&TARGET_COMPARE_OP_ADAPTIVE, + &&TARGET_COMPARE_OP_FLOAT_JUMP, + &&TARGET_COMPARE_OP_INT_JUMP, + &&TARGET_COMPARE_OP_STR_JUMP, + &&TARGET_BINARY_SUBSCR, &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_SUBSCR_GETITEM, &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_BINARY_SUBSCR, - &&TARGET_BINARY_SUBSCR_DICT, - &&TARGET_STORE_SUBSCR_ADAPTIVE, - &&TARGET_STORE_SUBSCR_LIST_INT, - &&TARGET_STORE_SUBSCR_DICT, &&TARGET_GET_LEN, &&TARGET_MATCH_MAPPING, &&TARGET_MATCH_SEQUENCE, &&TARGET_MATCH_KEYS, - &&TARGET_CALL_FUNCTION_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_PUSH_EXC_INFO, - &&TARGET_CALL_FUNCTION_BUILTIN_O, + &&TARGET_STORE_SUBSCR_ADAPTIVE, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_STORE_SUBSCR_LIST_INT, + &&TARGET_STORE_SUBSCR_DICT, + &&TARGET_CALL_FUNCTION_ADAPTIVE, + &&TARGET_CALL_FUNCTION_BUILTIN_O, &&TARGET_CALL_FUNCTION_BUILTIN_FAST, &&TARGET_CALL_FUNCTION_LEN, &&TARGET_CALL_FUNCTION_ISINSTANCE, @@ -44,29 +48,25 @@ static void *opcode_targets[256] = { &&TARGET_JUMP_ABSOLUTE_QUICK, &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, - &&TARGET_LOAD_ATTR_WITH_HINT, - &&TARGET_LOAD_ATTR_SLOT, - &&TARGET_LOAD_ATTR_MODULE, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, &&TARGET_BEFORE_ASYNC_WITH, &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, + &&TARGET_LOAD_ATTR_WITH_HINT, + &&TARGET_LOAD_ATTR_SLOT, + &&TARGET_LOAD_ATTR_MODULE, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_STORE_SUBSCR, + &&TARGET_DELETE_SUBSCR, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_LOAD_METHOD_CACHED, &&TARGET_LOAD_METHOD_CLASS, - &&TARGET_STORE_SUBSCR, - &&TARGET_DELETE_SUBSCR, &&TARGET_LOAD_METHOD_MODULE, &&TARGET_LOAD_METHOD_NO_DICT, - &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_STORE_ATTR_INSTANCE_VALUE, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_GET_ITER, &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_PRINT_EXPR, @@ -74,20 +74,20 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_FROM, &&TARGET_GET_AWAITABLE, &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_STORE_ATTR_ADAPTIVE, + &&TARGET_STORE_ATTR_INSTANCE_VALUE, + &&TARGET_STORE_ATTR_SLOT, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_LOAD_CONST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, diff --git a/Python/specialize.c b/Python/specialize.c index f5f12139df..b384675560 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -129,6 +129,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, STORE_ATTR, "store_attr"); err += add_stat_dict(stats, CALL_FUNCTION, "call_function"); err += add_stat_dict(stats, BINARY_OP, "binary_op"); + err += add_stat_dict(stats, COMPARE_OP, "compare_op"); if (err < 0) { Py_DECREF(stats); return NULL; @@ -187,6 +188,7 @@ _Py_PrintSpecializationStats(void) print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); print_stats(out, &_specialization_stats[CALL_FUNCTION], "call_function"); print_stats(out, &_specialization_stats[BINARY_OP], "binary_op"); + print_stats(out, &_specialization_stats[COMPARE_OP], "compare_op"); if (out != stderr) { fclose(out); } @@ -239,6 +241,7 @@ static uint8_t adaptive_opcodes[256] = { [CALL_FUNCTION] = CALL_FUNCTION_ADAPTIVE, [STORE_ATTR] = STORE_ATTR_ADAPTIVE, [BINARY_OP] = BINARY_OP_ADAPTIVE, + [COMPARE_OP] = COMPARE_OP_ADAPTIVE, }; /* The number of cache entries required for a "family" of instructions. */ @@ -251,6 +254,7 @@ static uint8_t cache_requirements[256] = { [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [BINARY_OP] = 1, // _PyAdaptiveEntry + [COMPARE_OP] = 1, /* _PyAdaptiveEntry */ }; /* Return the oparg for the cache_offset and instruction index. @@ -487,6 +491,10 @@ initial_counter_value(void) { #define SPEC_FAIL_BAD_CALL_FLAGS 17 #define SPEC_FAIL_CLASS 18 +/* COMPARE_OP */ +#define SPEC_FAIL_STRING_COMPARE 13 +#define SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP 14 +#define SPEC_FAIL_BIG_INT 15 static int specialize_module_load_attr( @@ -1536,3 +1544,74 @@ success: STAT_INC(BINARY_OP, specialization_success); adaptive->counter = initial_counter_value(); } + +static int compare_masks[] = { + // 1-bit: jump if less than + // 2-bit: jump if equal + // 4-bit: jump if greater + [Py_LT] = 1 | 0 | 0, + [Py_LE] = 1 | 2 | 0, + [Py_EQ] = 0 | 2 | 0, + [Py_NE] = 1 | 0 | 4, + [Py_GT] = 0 | 0 | 4, + [Py_GE] = 0 | 2 | 4, +}; + +void +_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, + _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +{ + _PyAdaptiveEntry *adaptive = &cache->adaptive; + int op = adaptive->original_oparg; + int next_opcode = _Py_OPCODE(instr[1]); + if (next_opcode != POP_JUMP_IF_FALSE && next_opcode != POP_JUMP_IF_TRUE) { + // Can't ever combine, so don't don't bother being adaptive. + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP); + *instr = _Py_MAKECODEUNIT(COMPARE_OP, adaptive->original_oparg); + goto failure; + } + assert(op <= Py_GE); + int when_to_jump_mask = compare_masks[op]; + if (next_opcode == POP_JUMP_IF_FALSE) { + when_to_jump_mask = (1 | 2 | 4) & ~when_to_jump_mask; + } + if (Py_TYPE(lhs) != Py_TYPE(rhs)) { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_DIFFERENT_TYPES); + goto failure; + } + if (PyFloat_CheckExact(lhs)) { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT_JUMP, _Py_OPARG(*instr)); + adaptive->index = when_to_jump_mask; + goto success; + } + if (PyLong_CheckExact(lhs)) { + if (Py_ABS(Py_SIZE(lhs)) <= 1 && Py_ABS(Py_SIZE(rhs)) <= 1) { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, _Py_OPARG(*instr)); + adaptive->index = when_to_jump_mask; + goto success; + } + else { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_BIG_INT); + goto failure; + } + } + if (PyUnicode_CheckExact(lhs)) { + if (op != Py_EQ && op != Py_NE) { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_STRING_COMPARE); + goto failure; + } + else { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_STR_JUMP, _Py_OPARG(*instr)); + adaptive->index = (when_to_jump_mask & 2) == 0; + goto success; + } + } + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_OTHER); +failure: + STAT_INC(COMPARE_OP, specialization_failure); + cache_backoff(adaptive); + return; +success: + STAT_INC(COMPARE_OP, specialization_success); + adaptive->counter = initial_counter_value(); +} |