From 117878f7870377f143917a22160320a891eb0211 Mon Sep 17 00:00:00 2001
From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Tue, 7 Jun 2022 15:00:20 -0400
Subject: fix race conditions in lambda statements

Fixed multiple observed race conditions related to :func:`.lambda_stmt`,
including an initial "dogpile" issue when a new Python code object is
initially analyzed among multiple simultaneous threads which created both a
performance issue as well as some internal corruption of state.
Additionally repaired observed race condition which could occur when
"cloning" an expression construct that is also in the process of being
compiled or otherwise accessed in a different thread due to memoized
attributes altering the ``__dict__`` while iterated, for Python versions
prior to 3.10; in particular the lambda SQL construct is sensitive to this
as it holds onto a single statement object persistently. The iteration has
been refined to use ``dict.copy()`` with or without an additional iteration
instead.

Fixes: #8098
Change-Id: I4e0b627bfa187f1780dc68ec81b94db1c78f846a
---
 lib/sqlalchemy/sql/base.py     |  3 ++-
 lib/sqlalchemy/sql/elements.py |  9 ++++++++-
 lib/sqlalchemy/sql/lambdas.py  | 18 +++++++++++++-----
 3 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'lib/sqlalchemy/sql')

diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py
index f5a9c10c0..391f74772 100644
--- a/lib/sqlalchemy/sql/base.py
+++ b/lib/sqlalchemy/sql/base.py
@@ -694,8 +694,9 @@ class Generative(HasMemoized):
         cls = self.__class__
         s = cls.__new__(cls)
         if skip:
+            # ensure this iteration remains atomic
             s.__dict__ = {
-                k: v for k, v in self.__dict__.items() if k not in skip
+                k: v for k, v in self.__dict__.copy().items() if k not in skip
             }
         else:
             s.__dict__ = self.__dict__.copy()
diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py
index 6032253c2..57cbdd8fc 100644
--- a/lib/sqlalchemy/sql/elements.py
+++ b/lib/sqlalchemy/sql/elements.py
@@ -390,7 +390,14 @@ class ClauseElement(
 
         skip = self._memoized_keys
         c = self.__class__.__new__(self.__class__)
-        c.__dict__ = {k: v for k, v in self.__dict__.items() if k not in skip}
+
+        if skip:
+            # ensure this iteration remains atomic
+            c.__dict__ = {
+                k: v for k, v in self.__dict__.copy().items() if k not in skip
+            }
+        else:
+            c.__dict__ = self.__dict__.copy()
 
         # this is a marker that helps to "equate" clauses to each other
         # when a Select returns its list of FROM clauses.  the cloning
diff --git a/lib/sqlalchemy/sql/lambdas.py b/lib/sqlalchemy/sql/lambdas.py
index 3e82a9a6a..c7464c91c 100644
--- a/lib/sqlalchemy/sql/lambdas.py
+++ b/lib/sqlalchemy/sql/lambdas.py
@@ -12,6 +12,7 @@ import collections.abc as collections_abc
 import inspect
 import itertools
 import operator
+import threading
 import types
 from types import CodeType
 from typing import Any
@@ -695,6 +696,8 @@ class AnalyzedCode:
         CodeType, AnalyzedCode
     ] = weakref.WeakKeyDictionary()
 
+    _generation_mutex = threading.RLock()
+
     @classmethod
     def get(cls, fn, lambda_element, lambda_kw, **kw):
         try:
@@ -703,11 +706,16 @@ class AnalyzedCode:
         except KeyError:
             pass
 
-        analyzed: AnalyzedCode
-        cls._fns[fn.__code__] = analyzed = AnalyzedCode(
-            fn, lambda_element, lambda_kw, **kw
-        )
-        return analyzed
+        with cls._generation_mutex:
+            # check for other thread already created object
+            if fn.__code__ in cls._fns:
+                return cls._fns[fn.__code__]
+
+            analyzed: AnalyzedCode
+            cls._fns[fn.__code__] = analyzed = AnalyzedCode(
+                fn, lambda_element, lambda_kw, **kw
+            )
+            return analyzed
 
     def __init__(self, fn, lambda_element, opts):
         if inspect.ismethod(fn):
-- 
cgit v1.2.1