Merge pull request #1101 from pallets/refactor-loop-context

don't prefetch next item in loop context
author: David Lord <davidism@gmail.com> 2019-11-08 07:13:53 -0800
committer: GitHub <noreply@github.com> 2019-11-08 07:13:53 -0800
commit: 540b260198285f0ed41fbe80c0b1b6f13be579c1 (patch)
tree: da55a6740db12d7d4869d9c245fb10391a73777a
parent: d8820b95d60ecc6a7b3c9e0fc178573e62e2f012 (diff)
parent: 4d0949b3087e10c5bd183e7b7f22b15a74b95f68 (diff)
download: jinja2-540b260198285f0ed41fbe80c0b1b6f13be579c1.tar.gz
6 files changed, 273 insertions, 176 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index f05603f..0752854 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -5,11 +5,7 @@ Version 2.11.0
 
 Unreleased
 
--   Async support is only loaded the first time an
-    :class:`~environment.Environment` enables it, in order to avoid a
-    slow initial import. :issue:`765`
--   Python 2.6 and 3.3 are not supported anymore.
--   The ``map`` filter in async mode now automatically awaits
+-   Python 2.6, 3.3, and 3.4 are not supported anymore.
 -   Added a new ``ChainableUndefined`` class to support getitem and
     getattr on an undefined object. :issue:`977`
 -   Allow ``{%+`` syntax (with NOP behavior) when ``lstrip_blocks`` is
@@ -47,6 +43,18 @@ Unreleased
 -   Fix behavior of ``loop`` control variables such as ``length`` and
     ``revindex0`` when looping over a generator. :issue:`459, 751, 794`,
     :pr:`993`
+-   Async support is only loaded the first time an environment enables
+    it, in order to avoid a slow initial import. :issue:`765`
+-   In async environments, the ``|map`` filter will await the filter
+    call if needed. :pr:`913`
+-   In for loops that access ``loop`` attributes, the iterator is not
+    advanced ahead of the current iteration unless ``length``,
+    ``revindex``, ``nextitem``, or ``last`` are accessed. This makes it
+    less likely to break ``groupby`` results. :issue:`555`, :pr:`1101`
+-   In async environments, the ``loop`` attributes ``length`` and
+    ``revindex`` work for async iterators. :pr:`1101`
+-   In async environments, values from attribute/property access will
+    be awaited if needed. :pr:`1101`
 -   ``PackageLoader`` doesn't depend on setuptools or pkg_resources.
     :issue:`970`
 -   Support :class:`os.PathLike` objects in
diff --git a/jinja2/asyncsupport.py b/jinja2/asyncsupport.py
index 53ad192..7d457e3 100644
--- a/jinja2/asyncsupport.py
+++ b/jinja2/asyncsupport.py
@@ -9,14 +9,17 @@
     :copyright: (c) 2017 by the Jinja Team.
     :license: BSD, see LICENSE for more details.
 """
-import sys
 import asyncio
 import inspect
+import sys
 from functools import update_wrapper
 
-from jinja2.utils import concat, internalcode, Markup
 from jinja2.environment import TemplateModule
-from jinja2.runtime import LoopContextBase, _last_iteration
+from jinja2.runtime import LoopContext
+from jinja2.utils import concat
+from jinja2.utils import internalcode
+from jinja2.utils import Markup
+from jinja2.utils import missing
 
 
 async def concat_async(async_gen):
@@ -187,73 +190,80 @@ async def auto_aiter(iterable):
         yield item
 
 
-class AsyncLoopContext(LoopContextBase):
-
-    def __init__(self, async_iterator, undefined, after, length, recurse=None,
-                 depth0=0):
-        LoopContextBase.__init__(self, undefined, recurse, depth0)
-        self._async_iterator = async_iterator
-        self._after = after
-        self._length = length
+class AsyncLoopContext(LoopContext):
+    _to_iterator = staticmethod(auto_aiter)
 
     @property
-    def length(self):
-        if self._length is None:
-            raise TypeError('Loop length for some iterators cannot be '
-                            'lazily calculated in async mode')
+    async def length(self):
+        if self._length is not None:
+            return self._length
+
+        try:
+            self._length = len(self._iterable)
+        except TypeError:
+            iterable = [x async for x in self._iterator]
+            self._iterator = self._to_iterator(iterable)
+            self._length = len(iterable) + self.index + (self._after is not missing)
+
         return self._length
 
-    def __aiter__(self):
-        return AsyncLoopContextIterator(self)
+    @property
+    async def revindex0(self):
+        return await self.length - self.index
+
+    @property
+    async def revindex(self):
+        return await self.length - self.index0
+
+    async def _peek_next(self):
+        if self._after is not missing:
+            return self._after
+
+        try:
+            self._after = await self._iterator.__anext__()
+        except StopAsyncIteration:
+            self._after = missing
+
+        return self._after
 
+    @property
+    async def last(self):
+        return await self._peek_next() is missing
 
-class AsyncLoopContextIterator(object):
-    __slots__ = ('context',)
+    @property
+    async def nextitem(self):
+        rv = await self._peek_next()
 
-    def __init__(self, context):
-        self.context = context
+        if rv is missing:
+            return self._undefined("there is no next item")
+
+        return rv
 
     def __aiter__(self):
         return self
 
     async def __anext__(self):
-        ctx = self.context
-        ctx.index0 += 1
-        if ctx._after is _last_iteration:
-            raise StopAsyncIteration()
-        ctx._before = ctx._current
-        ctx._current = ctx._after
-        try:
-            ctx._after = await ctx._async_iterator.__anext__()
-        except StopAsyncIteration:
-            ctx._after = _last_iteration
-        return ctx._current, ctx
+        if self._after is not missing:
+            rv = self._after
+            self._after = missing
+        else:
+            rv = await self._iterator.__anext__()
+
+        self.index0 += 1
+        self._before = self._current
+        self._current = rv
+        return rv, self
 
 
 async def make_async_loop_context(iterable, undefined, recurse=None, depth0=0):
-    # Length is more complicated and less efficient in async mode.  The
-    # reason for this is that we cannot know if length will be used
-    # upfront but because length is a property we cannot lazily execute it
-    # later.  This means that we need to buffer it up and measure :(
-    #
-    # We however only do this for actual iterators, not for async
-    # iterators as blocking here does not seem like the best idea in the
-    # world.
-    try:
-        length = len(iterable)
-    except (TypeError, AttributeError):
-        if not hasattr(iterable, '__aiter__'):
-            iterable = tuple(iterable)
-            length = len(iterable)
-        else:
-            length = None
-    async_iterator = auto_aiter(iterable)
-    try:
-        after = await async_iterator.__anext__()
-    except StopAsyncIteration:
-        after = _last_iteration
-    return AsyncLoopContext(async_iterator, undefined, after, length, recurse,
-                            depth0)
+    import warnings
+    warnings.warn(
+        "This template must be recompiled with at least Jinja 2.11, or"
+        " it will fail in 3.0.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return AsyncLoopContext(iterable, undefined, recurse, depth0)
 
 
 patch_all()
diff --git a/jinja2/compiler.py b/jinja2/compiler.py
index 488ef0a..50e00ab 100644
--- a/jinja2/compiler.py
+++ b/jinja2/compiler.py
@@ -705,7 +705,7 @@ class CodeGenerator(NodeVisitor):
 
         if self.environment.is_async:
             self.writeline('from jinja2.asyncsupport import auto_await, '
-                           'auto_aiter, make_async_loop_context')
+                           'auto_aiter, AsyncLoopContext')
 
         # if we want a deferred initialization we cannot move the
         # environment into a local name
@@ -1095,7 +1095,7 @@ class CodeGenerator(NodeVisitor):
         self.visit(node.target, loop_frame)
         if extended_loop:
             if self.environment.is_async:
-                self.write(', %s in await make_async_loop_context(' % loop_ref)
+                self.write(', %s in AsyncLoopContext(' % loop_ref)
             else:
                 self.write(', %s in LoopContext(' % loop_ref)
         else:
@@ -1551,10 +1551,16 @@ class CodeGenerator(NodeVisitor):
 
     @optimizeconst
     def visit_Getattr(self, node, frame):
+        if self.environment.is_async:
+            self.write("await auto_await(")
+
         self.write('environment.getattr(')
         self.visit(node.node, frame)
         self.write(', %r)' % node.attr)
 
+        if self.environment.is_async:
+            self.write(")")
+
     @optimizeconst
     def visit_Getitem(self, node, frame):
         # slices bypass the environment getitem method.
@@ -1564,12 +1570,18 @@ class CodeGenerator(NodeVisitor):
             self.visit(node.arg, frame)
             self.write(']')
         else:
+            if self.environment.is_async:
+                self.write("await auto_await(")
+
             self.write('environment.getitem(')
             self.visit(node.node, frame)
             self.write(', ')
             self.visit(node.arg, frame)
             self.write(')')
 
+            if self.environment.is_async:
+                self.write(")")
+
     def visit_Slice(self, node, frame):
         if node.start is not None:
             self.visit(node.start, frame)
diff --git a/jinja2/runtime.py b/jinja2/runtime.py
index ff12ded..135ff27 100644
--- a/jinja2/runtime.py
+++ b/jinja2/runtime.py
@@ -343,134 +343,197 @@ class BlockReference(object):
         return rv
 
 
-class LoopContextBase(object):
-    """A loop context for dynamic iteration."""
+@implements_iterator
+class LoopContext:
+    """A wrapper iterable for dynamic ``for`` loops, with information
+    about the loop and iteration.
+    """
+
+    #: Current iteration of the loop, starting at 0.
+    index0 = -1
 
-    _before = _first_iteration
-    _current = _first_iteration
-    _after = _last_iteration
     _length = None
+    _after = missing
+    _current = missing
+    _before = missing
+    _last_changed_value = missing
 
-    def __init__(self, undefined, recurse=None, depth0=0):
+    def __init__(self, iterable, undefined, recurse=None, depth0=0):
+        """
+        :param iterable: Iterable to wrap.
+        :param undefined: :class:`Undefined` class to use for next and
+            previous items.
+        :param recurse: The function to render the loop body when the
+            loop is marked recursive.
+        :param depth0: Incremented when looping recursively.
+        """
+        self._iterable = iterable
+        self._iterator = self._to_iterator(iterable)
         self._undefined = undefined
         self._recurse = recurse
-        self.index0 = -1
+        #: How many levels deep a recursive loop currently is, starting at 0.
         self.depth0 = depth0
-        self._last_checked_value = missing
 
-    def cycle(self, *args):
-        """Cycles among the arguments with the current loop index."""
-        if not args:
-            raise TypeError('no items for cycling given')
-        return args[self.index0 % len(args)]
+    @staticmethod
+    def _to_iterator(iterable):
+        return iter(iterable)
 
-    def changed(self, *value):
-        """Checks whether the value has changed since the last call."""
-        if self._last_checked_value != value:
-            self._last_checked_value = value
-            return True
-        return False
+    @property
+    def length(self):
+        """Length of the iterable.
 
-    first = property(lambda x: x.index0 == 0)
-    last = property(lambda x: x._after is _last_iteration)
-    index = property(lambda x: x.index0 + 1)
-    revindex = property(lambda x: x.length - x.index0)
-    revindex0 = property(lambda x: x.length - x.index)
-    depth = property(lambda x: x.depth0 + 1)
+        If the iterable is a generator or otherwise does not have a
+        size, it is eagerly evaluated to get a size.
+        """
+        if self._length is not None:
+            return self._length
 
-    @property
-    def previtem(self):
-        if self._before is _first_iteration:
-            return self._undefined('there is no previous item')
-        return self._before
+        try:
+            self._length = len(self._iterable)
+        except TypeError:
+            iterable = list(self._iterator)
+            self._iterator = self._to_iterator(iterable)
+            self._length = len(iterable) + self.index + (self._after is not missing)
 
-    @property
-    def nextitem(self):
-        if self._after is _last_iteration:
-            return self._undefined('there is no next item')
-        return self._after
+        return self._length
 
     def __len__(self):
         return self.length
 
-    @internalcode
-    def loop(self, iterable):
-        if self._recurse is None:
-            raise TypeError('Tried to call non recursive loop.  Maybe you '
-                            "forgot the 'recursive' modifier.")
-        return self._recurse(iterable, self._recurse, self.depth0 + 1)
+    @property
+    def depth(self):
+        """How many levels deep a recursive loop currently is, starting at 1."""
+        return self.depth0 + 1
 
-    # a nifty trick to enhance the error message if someone tried to call
-    # the loop without or with too many arguments.
-    __call__ = loop
-    del loop
+    @property
+    def index(self):
+        """Current iteration of the loop, starting at 1."""
+        return self.index0 + 1
 
-    def __repr__(self):
-        return '<%s %r/%r>' % (
-            self.__class__.__name__,
-            self.index,
-            self.length
-        )
+    @property
+    def revindex0(self):
+        """Number of iterations from the end of the loop, ending at 0.
 
+        Requires calculating :attr:`length`.
+        """
+        return self.length - self.index
 
-class LoopContext(LoopContextBase):
+    @property
+    def revindex(self):
+        """Number of iterations from the end of the loop, ending at 1.
 
-    def __init__(self, iterable, undefined, recurse=None, depth0=0):
-        LoopContextBase.__init__(self, undefined, recurse, depth0)
-        self._iterator = iter(iterable)
-        self._iterations_done_count = 0
-        self._length = None
-        self._after = self._safe_next()
+        Requires calculating :attr:`length`.
+        """
+        return self.length - self.index0
 
     @property
-    def length(self):
+    def first(self):
+        """Whether this is the first iteration of the loop."""
+        return self.index0 == 0
+
+    def _peek_next(self):
+        """Return the next element in the iterable, or :data:`missing`
+        if the iterable is exhausted. Only peeks one item ahead, caching
+        the result in :attr:`_last` for use in subsequent checks. The
+        cache is reset when :meth:`__next__` is called.
+        """
+        if self._after is not missing:
+            return self._after
+
+        self._after = next(self._iterator, missing)
+        return self._after
+
+    @property
+    def last(self):
+        """Whether this is the last iteration of the loop.
+
+        Causes the iterable to advance early. See
+        :func:`itertools.groupby` for issues this can cause.
+        The :func:`groupby` filter avoids that issue.
         """
-        Getting length of an iterator is a costly operation which requires extra memory
-        and traversing in linear time. So make it an on demand param that iterates from
-        the point onwards of the iterator and accounts for iterated elements.
+        return self._peek_next() is missing
+
+    @property
+    def previtem(self):
+        """The item in the previous iteration. Undefined during the
+        first iteration.
         """
-        if self._length is None:
-            # if was not possible to get the length of the iterator when
-            # the loop context was created (ie: iterating over a generator)
-            # we have to convert the iterable into a sequence and use the
-            # length of that + the number of iterations so far.
-            iterable = tuple(self._iterator)
-            self._iterator = iter(iterable)
-            self._length = len(iterable) + self._iterations_done_count
-        return self._length
+        if self.first:
+            return self._undefined("there is no previous item")
 
-    def __iter__(self):
-        return LoopContextIterator(self)
+        return self._before
 
-    def _safe_next(self):
-        try:
-            tmp = next(self._iterator)
-            self._iterations_done_count += 1
-            return tmp
-        except StopIteration:
-            return _last_iteration
+    @property
+    def nextitem(self):
+        """The item in the next iteration. Undefined during the last
+        iteration.
 
+        Causes the iterable to advance early. See
+        :func:`itertools.groupby` for issues this can cause.
+        The :func:`groupby` filter avoids that issue.
+        """
+        rv = self._peek_next()
 
-@implements_iterator
-class LoopContextIterator(object):
-    """The iterator for a loop context."""
-    __slots__ = ('context',)
+        if rv is missing:
+            return self._undefined("there is no next item")
 
-    def __init__(self, context):
-        self.context = context
+        return rv
+
+    def cycle(self, *args):
+        """Return a value from the given args, cycling through based on
+        the current :attr:`index0`.
+
+        :param args: One or more values to cycle through.
+        """
+        if not args:
+            raise TypeError("no items for cycling given")
+
+        return args[self.index0 % len(args)]
+
+    def changed(self, *value):
+        """Return ``True`` if previously called with a different value
+        (including when called for the first time).
+
+        :param value: One or more values to compare to the last call.
+        """
+        if self._last_changed_value != value:
+            self._last_changed_value = value
+            return True
+
+        return False
 
     def __iter__(self):
         return self
 
+    @internalcode
     def __next__(self):
-        ctx = self.context
-        ctx.index0 += 1
-        if ctx._after is _last_iteration:
-            raise StopIteration()
-        ctx._before = ctx._current
-        ctx._current = ctx._after
-        ctx._after = ctx._safe_next()
-        return ctx._current, ctx
+        if self._after is not missing:
+            rv = self._after
+            self._after = missing
+        else:
+            rv = next(self._iterator)
+
+        self.index0 += 1
+        self._before = self._current
+        self._current = rv
+        return rv, self
+
+    def __call__(self, iterable):
+        """When iterating over nested data, render the body of the loop
+        recursively with the given inner iterable data.
+
+        The loop must have the ``recursive`` marker for this to work.
+        """
+        if self._recurse is None:
+            raise TypeError(
+                "The loop must have the 'recursive' marker to be"
+                " called recursively."
+            )
+
+        return self._recurse(iterable, self._recurse, depth=self.depth)
+
+    def __repr__(self):
+        return "<%s %d/%d>" % (self.__class__.__name__, self.index, self.length)
 
 
 class Macro(object):
diff --git a/tests/test_async.py b/tests/test_async.py
index 92ac2a3..5f331a5 100644
--- a/tests/test_async.py
+++ b/tests/test_async.py
@@ -2,6 +2,7 @@ import pytest
 import asyncio
 
 from jinja2 import Template, Environment, DictLoader
+from jinja2.asyncsupport import auto_aiter
 from jinja2.exceptions import TemplateNotFound, TemplatesNotFound, \
      UndefinedError
 
@@ -274,26 +275,17 @@ class TestAsyncForLoop(object):
         tmpl = test_env_async.from_string('<{% for item in seq %}{% else %}{% endfor %}>')
         assert tmpl.render() == '<>'
 
-    def test_context_vars(self, test_env_async):
-        slist = [42, 24]
-        for seq in [slist, iter(slist), reversed(slist), (_ for _ in slist)]:
-            tmpl = test_env_async.from_string('''{% for item in seq -%}
-            {{ loop.index }}|{{ loop.index0 }}|{{ loop.revindex }}|{{
-                loop.revindex0 }}|{{ loop.first }}|{{ loop.last }}|{{
-               loop.length }}###{% endfor %}''')
-            one, two, _ = tmpl.render(seq=seq).split('###')
-            (one_index, one_index0, one_revindex, one_revindex0, one_first,
-             one_last, one_length) = one.split('|')
-            (two_index, two_index0, two_revindex, two_revindex0, two_first,
-             two_last, two_length) = two.split('|')
-
-            assert int(one_index) == 1 and int(two_index) == 2
-            assert int(one_index0) == 0 and int(two_index0) == 1
-            assert int(one_revindex) == 2 and int(two_revindex) == 1
-            assert int(one_revindex0) == 1 and int(two_revindex0) == 0
-            assert one_first == 'True' and two_first == 'False'
-            assert one_last == 'False' and two_last == 'True'
-            assert one_length == two_length == '2'
+    @pytest.mark.parametrize(
+        "transform", [lambda x: x, iter, reversed, lambda x: (i for i in x), auto_aiter]
+    )
+    def test_context_vars(self, test_env_async, transform):
+        t = test_env_async.from_string(
+            "{% for item in seq %}{{ loop.index }}|{{ loop.index0 }}"
+            "|{{ loop.revindex }}|{{ loop.revindex0 }}|{{ loop.first }}"
+            "|{{ loop.last }}|{{ loop.length }}\n{% endfor %}"
+        )
+        out = t.render(seq=transform([42, 24]))
+        assert out == "1|0|2|1|True|False|2\n2|1|1|0|False|True|2\n"
 
     def test_cycling(self, test_env_async):
         tmpl = test_env_async.from_string('''{% for item in seq %}{{
diff --git a/tests/test_runtime.py b/tests/test_runtime.py
index 1b24b40..1afcb3f 100644
--- a/tests/test_runtime.py
+++ b/tests/test_runtime.py
@@ -1,3 +1,5 @@
+import itertools
+
 from jinja2 import Template
 from jinja2.runtime import LoopContext
 
@@ -46,3 +48,13 @@ def test_loopcontext2():
     in_lst = [10, 11]
     l = LoopContext(reversed(in_lst), None)
     assert l.length == len(in_lst)
+
+
+def test_iterator_not_advanced_early():
+    t = Template("{% for _, g in gs %}{{ loop.index }} {{ g|list }}\n{% endfor %}")
+    out = t.render(gs=itertools.groupby(
+        [(1, "a"), (1, "b"), (2, "c"), (3, "d")], lambda x: x[0]
+    ))
+    # groupby groups depend on the current position of the iterator. If
+    # it was advanced early, the lists would appear empty.
+    assert out == "1 [(1, 'a'), (1, 'b')]\n2 [(2, 'c')]\n3 [(3, 'd')]\n"
author	David Lord <davidism@gmail.com>	2019-11-08 07:13:53 -0800
committer	GitHub <noreply@github.com>	2019-11-08 07:13:53 -0800
commit	540b260198285f0ed41fbe80c0b1b6f13be579c1 (patch)
tree	da55a6740db12d7d4869d9c245fb10391a73777a
parent	d8820b95d60ecc6a7b3c9e0fc178573e62e2f012 (diff)
parent	4d0949b3087e10c5bd183e7b7f22b15a74b95f68 (diff)
download	jinja2-540b260198285f0ed41fbe80c0b1b6f13be579c1.tar.gz