From 9de7ec7868554e92700e4bda6c5f9d8fcad634dc Mon Sep 17 00:00:00 2001 From: Jeffrey Yasskin Date: Wed, 25 Feb 2009 02:25:04 +0000 Subject: http://bugs.python.org/issue4715 This patch by Antoine Pitrou optimizes the bytecode for conditional branches by merging the following "POP_TOP" instruction into the conditional jump. For example, the list comprehension "[x for x in l if not x]" produced the following bytecode: 1 0 BUILD_LIST 0 3 LOAD_FAST 0 (.0) >> 6 FOR_ITER 23 (to 32) 9 STORE_FAST 1 (x) 12 LOAD_FAST 1 (x) 15 JUMP_IF_TRUE 10 (to 28) 18 POP_TOP 19 LOAD_FAST 1 (x) 22 LIST_APPEND 2 25 JUMP_ABSOLUTE 6 >> 28 POP_TOP 29 JUMP_ABSOLUTE 6 >> 32 RETURN_VALUE but after the patch it produces the following bytecode: 1 0 BUILD_LIST 0 3 LOAD_FAST 0 (.0) >> 6 FOR_ITER 18 (to 27) 9 STORE_FAST 1 (x) 12 LOAD_FAST 1 (x) 15 POP_JUMP_IF_TRUE 6 18 LOAD_FAST 1 (x) 21 LIST_APPEND 2 24 JUMP_ABSOLUTE 6 >> 27 RETURN_VALUE Notice that not only the code is shorter, but the conditional jump (POP_JUMP_IF_TRUE) jumps right to the start of the loop instead of going through the JUMP_ABSOLUTE at the end. "continue" statements are helped similarly. Furthermore, the old jump opcodes (JUMP_IF_FALSE, JUMP_IF_TRUE) have been replaced by two new opcodes: - JUMP_IF_TRUE_OR_POP, which jumps if true and pops otherwise - JUMP_IF_FALSE_OR_POP, which jumps if false and pops otherwise --- Python/peephole.c | 102 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 58 insertions(+), 44 deletions(-) (limited to 'Python/peephole.c') diff --git a/Python/peephole.c b/Python/peephole.c index d31c89b86e..c413fc8e87 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -13,7 +13,12 @@ #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1])) #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD) -#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) +#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \ + || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP) #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) @@ -237,8 +242,10 @@ markblocks(unsigned char *code, Py_ssize_t len) switch (opcode) { case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case JUMP_ABSOLUTE: case CONTINUE_LOOP: case SETUP_LOOP: @@ -363,29 +370,24 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, assert(PyList_Check(consts)); for (i=0 ; i a is not b not a in b --> a not in b @@ -417,29 +419,19 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, break; /* Skip over LOAD_CONST trueconst - JUMP_IF_FALSE xx POP_TOP */ + POP_JUMP_IF_FALSE xx. This improves + "while 1" performance. */ case LOAD_CONST: cumlc = lastlc + 1; j = GETARG(codestr, i); - if (codestr[i+3] != JUMP_IF_FALSE || - codestr[i+6] != POP_TOP || + if (codestr[i+3] != POP_JUMP_IF_FALSE || !ISBASICBLOCK(blocks,i,7) || !PyObject_IsTrue(PyList_GET_ITEM(consts, j))) continue; - memset(codestr+i, NOP, 7); + memset(codestr+i, NOP, 6); cumlc = 0; break; - /* Replace POP_TOP JUMP_FORWARD 1 POP_TOP - with NOP NOP NOP NOP POP_TOP. */ - case POP_TOP: - if (UNCONDITIONAL_JUMP(codestr[i+1]) && - GETJUMPTGT(codestr, i+1) == i+5 && - codestr[i+4] == POP_TOP && - ISBASICBLOCK(blocks,i,4)) - memset(codestr+i, NOP, 4); - break; - /* Try to fold tuples of constants (includes a case for lists which are only used for "in" and "not in" tests). Skip over BUILD_SEQN 1 UNPACK_SEQN 1. @@ -524,27 +516,47 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, "if a or b:" "a and b or c" "(a and b) and c" - x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z - x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3 + x:POP_OR_JUMP y y:POP_OR_JUMP z --> x:POP_OR_JUMP z + x:POP_OR_JUMP y y:JUMP_OR_POP z --> x:POP_JUMP_IF_FALSE y+3 where y+3 is the instruction following the second test. */ - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: tgt = GETJUMPTGT(codestr, i); j = codestr[tgt]; - if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) { - if (j == opcode) { - tgttgt = GETJUMPTGT(codestr, tgt) - i - 3; + if (CONDITIONAL_JUMP(j)) { + /* NOTE: all possible jumps here are + absolute! */ + if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) { + /* The second jump will be + taken iff the first is. */ + tgttgt = GETJUMPTGT(codestr, tgt); + /* The current opcode inherits + its target's stack behaviour */ + codestr[i] = j; SETARG(codestr, i, tgttgt); + goto reoptimize_current; } else { - tgt -= i; - SETARG(codestr, i, tgt); + /* The second jump is not taken + if the first is (so jump past + it), and all conditional + jumps pop their argument when + they're not taken (so change + the first jump to pop its + argument when it's taken). */ + if (JUMPS_ON_TRUE(opcode)) + codestr[i] = POP_JUMP_IF_TRUE; + else + codestr[i] = POP_JUMP_IF_FALSE; + SETARG(codestr, i, (tgt + 3)); + goto reoptimize_current; } - break; } - /* Intentional fallthrough */ + /* Intentional fallthrough */ /* Replace jumps to unconditional jumps */ + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case FOR_ITER: case JUMP_FORWARD: case JUMP_ABSOLUTE: @@ -621,14 +633,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, case JUMP_ABSOLUTE: case CONTINUE_LOOP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: j = addrmap[GETARG(codestr, i)]; SETARG(codestr, i, j); break; case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: case SETUP_LOOP: case SETUP_EXCEPT: case SETUP_FINALLY: -- cgit v1.2.1