summaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorTim Beale <timbeale@catalyst.net.nz>2019-06-20 09:20:09 +1200
committerAndrew Bartlett <abartlet@samba.org>2019-07-24 02:24:27 +0000
commit630857c71eff6332a6c94292ee840fa86a727f10 (patch)
treefbe7263b99cfaf5c074f2d7bc7708bc6151284b6 /python
parent7abfa6778f309bc3c1cfdb45042f708e81cfad9d (diff)
downloadsamba-630857c71eff6332a6c94292ee840fa86a727f10.tar.gz
traffic_replay: Avoid DB full scans in LDAP searches
When generating LDAP search traffic, a full DB scan can be very costly. Avoiding full-scan LDAP searches means that we can run traffic_replay against a 100K user DB and get some sane results. Because the traffic_learner doesn't record the LDAP search filter at all, the traffic_replay LDAP searches default to being full scans. Doing full scans meant that the LDAP search was usually the first packet type to exceed the max latency and fail the test. It could also skew results for the other packet types by creating big demands on memory/CPU/ DB-lock-time. It's hard to know for sure exactly what real-world LDAP searches will look like, but let's assume full scan searches will be fairly rare. In traffic-model files we've collected previously, some of the attributes are fairly unique (e.g. pKIExtendedKeyUsage), and as there are some LDAP queries specified in MS specs (such as MS-GPOL and MS-WCCE), it allows us to infer what the search filter might be. Signed-off-by: Tim Beale <timbeale@catalyst.net.nz> Reviewed-by: Andrew Bartlett <abartlet@samba.org>
Diffstat (limited to 'python')
-rw-r--r--python/samba/emulate/traffic.py57
-rw-r--r--python/samba/emulate/traffic_packets.py6
2 files changed, 63 insertions, 0 deletions
diff --git a/python/samba/emulate/traffic.py b/python/samba/emulate/traffic.py
index b2175d3e4bf..d0a2ffc8f2c 100644
--- a/python/samba/emulate/traffic.py
+++ b/python/samba/emulate/traffic.py
@@ -445,6 +445,63 @@ class ReplayContext(object):
self.dn_map = dn_map
self.attribute_clue_map = attribute_clue_map
+ # pre-populate DN-based search filters (it's simplest to generate them
+ # once, when the test starts). These are used by guess_search_filter()
+ # to avoid full-scans
+ self.search_filters = {}
+
+ # lookup all the GPO DNs
+ res = db.search(db.domain_dn(), scope=ldb.SCOPE_SUBTREE, attrs=['dn'],
+ expression='(objectclass=groupPolicyContainer)')
+ gpos_by_dn = ""
+ for msg in res:
+ gpos_by_dn += "(distinguishedName={0})".format(msg['dn'])
+
+ # a search for the 'gPCFileSysPath' attribute is probably a GPO search
+ # (as per the MS-GPOL spec) which searches for GPOs by DN
+ self.search_filters['gPCFileSysPath'] = "(|{0})".format(gpos_by_dn)
+
+ # likewise, a search for gpLink is probably the Domain SOM search part
+ # of the MS-GPOL, in which case it's looking up a few OUs by DN
+ ou_str = ""
+ for ou in ["Domain Controllers,", "traffic_replay,", ""]:
+ ou_str += "(distinguishedName={0}{1})".format(ou, db.domain_dn())
+ self.search_filters['gpLink'] = "(|{0})".format(ou_str)
+
+ # The CEP Web Service can query the AD DC to get pKICertificateTemplate
+ # objects (as per MS-WCCE)
+ self.search_filters['pKIExtendedKeyUsage'] = \
+ '(objectCategory=pKICertificateTemplate)'
+
+ # assume that anything querying the usnChanged is some kind of
+ # synchronization tool, e.g. AD Change Detection Connector
+ res = db.search('', scope=ldb.SCOPE_BASE, attrs=['highestCommittedUSN'])
+ self.search_filters['usnChanged'] = \
+ '(usnChanged>={0})'.format(res[0]['highestCommittedUSN'])
+
+ # The traffic_learner script doesn't preserve the LDAP search filter, and
+ # having no filter can result in a full DB scan. This is costly for a large
+ # DB, and not necessarily representative of real world traffic. As there
+ # several standard LDAP queries that get used by AD tools, we can apply
+ # some logic and guess what the search filter might have been originally.
+ def guess_search_filter(self, attrs, dn_sig, dn):
+
+ # there are some standard spec-based searches that query fairly unique
+ # attributes. Check if the search is likely one of these
+ for key in self.search_filters.keys():
+ if key in attrs:
+ return self.search_filters[key]
+
+ # if it's the top-level domain, assume we're looking up a single user,
+ # e.g. like powershell Get-ADUser or a similar tool
+ if dn_sig == 'DC,DC':
+ random_user_id = random.random() % self.total_conversations
+ account_name = user_name(self.instance_id, random_user_id)
+ return '(&(sAMAccountName=%s)(objectClass=user))' % account_name
+
+ # otherwise just return everything in the sub-tree
+ return '(objectClass=*)'
+
def generate_process_local_config(self, account, conversation):
self.ldap_connections = []
self.dcerpc_connections = []
diff --git a/python/samba/emulate/traffic_packets.py b/python/samba/emulate/traffic_packets.py
index e42f7998f05..a585482ccd4 100644
--- a/python/samba/emulate/traffic_packets.py
+++ b/python/samba/emulate/traffic_packets.py
@@ -334,7 +334,13 @@ def packet_ldap_3(packet, conversation, context):
samdb = context.get_ldap_connection()
dn = context.get_matching_dn(dn_sig)
+ # try to guess the search expression (don't bother for base searches, as
+ # they're only looking up a single object)
+ if (filter is None or filter is '') and scope != SCOPE_BASE:
+ filter = context.guess_search_filter(attrs, dn_sig, dn)
+
samdb.search(dn,
+ expression=filter,
scope=int(scope),
attrs=attrs.split(','),
controls=["paged_results:1:1000"])