summaryrefslogtreecommitdiff
path: root/source3
diff options
context:
space:
mode:
authorRalph Boehme <slow@samba.org>2018-07-25 19:14:25 +0200
committerRalph Boehme <slow@samba.org>2018-07-27 13:07:15 +0200
commit0736fdcdb0b9f072a482074ff69bb0054e16ffb2 (patch)
treebe99d41f803880ed52c2e9078a105cd42e869017 /source3
parent8884036ba297f504eb2e427db8247719ffb7a713 (diff)
downloadsamba-0736fdcdb0b9f072a482074ff69bb0054e16ffb2.tar.gz
smbd: use async dos_mode_at_send in smbd_smb2_query_directory_send()
Finally: use the new dos_mode_at_send() in the directory enumeration loop. This means that fetching the DOS attributes for directory entries is done asynchronously with regard to the enumeration loop. As the DOS attribute is typically read from an extended attribute in the filesytem, this avoids sequentially blocking on IO. If the IO subsystem is slow servicing these request, enabling async processing can result in performance improvements. A parametric option smbd:async dosmode = true | false (default: false) can be used to enable the new async processing. Simulating slow IO with usleep(5000) in the synchronous and asynchronous versions of SMB_VFS_GET_DOS_ATTRIBUTES(), the results of enumerating a directory with 10,000 files are: smbd:async dosmode = no: $ time bin/smbclient -U slow%x //localhost/test -c "ls dir\*" > /dev/null real 0m59.597s user 0m0.024s sys 0m0.012s smbd:async dosmode = yes: $ time bin/smbclient -U slow%x //localhost/test -c "ls dir\*" > /dev/null real 0m0.698s user 0m0.038s sys 0m0.025s Performance gains in real world workloads depends on whether the actual IO requests can be merged and parallelized by the kernel. Without such wins at the IO layer, the async processing may even be slower then the sync processing due to the additional overhead. The following parameters can be used to adapt async processing behaviour for specific workloads and systems: aio max threads = X (default: 100) smbd:max async dosmode = Y (default: "aio max threads" * 2) By default we have at most twice the number of async requests in flight as threads provided by the underlying threadpool. This ensures a worker thread that finishes a job can directly pick up a new one without going to sleep. It may be advisable to reduce the number of threads to avoid scheduling overhead while also increasing "smbd:max async dosmode". Note that we disable async processing for certain VFS modules in the VFS connect function to avoid the overhead of triggering the sync fallback in dos_mode_at_send(). This is done for VFS modules that implement the sync SMB_VFS_GET_DOS_ATTRIBUTES(), but not the async version (gpfs), and for VFS modules that don't share a real filesystem where fchdir() can be used (ceph, gluster). It is disabled for catia, because we realized that the catia name translation macros used on fsps (CATIA_FETCH_FSP_[PRE|POST]_NEXT) have a bug (#13547). We use threadpool = smb_vfs_ev_glue_tp_chdir_safe() and then pthreadpool_tevent_max_threads(threadpool) to get the number of maximum worker threads which matches the pool used by the low level SMB_VFS_GETXATTRAT_[SEND|RECV] implementation in vfs_default. This is a terrible abstraction leak that should be removed in the future by maybe making it possible to ask a VFS function which threadpool it uses, internally suporting chaining so VFS function FOO that internally uses BAR can forward the question to BAR. On a hyphotetical system that had a getxattrat(dirfd, path, ...) syscall and at the same time doesn't support per-thread current working directories (eg FreeBSD doesn't have the latter) but has support for per-thread-credentials, pthreadpool_tevent_max_threads() on the tp_chdir_safe threadpool returns 1. So when hooking the hyphotetical getxattrat() into the async SMB_VFS_GETXATTRAT_[SEND|RECV] implementation in an VFS module, the implementation could use the tp_path_safe threadpool, but the SMB2 layer would use the wrong threadpool in the call to pthreadpool_tevent_max_threads(), resulting in no parallelism. Signed-off-by: Ralph Boehme <slow@samba.org> Reviewed-by: Stefan Metzmacher <metze@samba.org>
Diffstat (limited to 'source3')
-rw-r--r--source3/modules/vfs_catia.c15
-rw-r--r--source3/modules/vfs_ceph.c5
-rw-r--r--source3/modules/vfs_glusterfs.c5
-rw-r--r--source3/modules/vfs_gpfs.c6
-rw-r--r--source3/smbd/smb2_query_directory.c225
5 files changed, 254 insertions, 2 deletions
diff --git a/source3/modules/vfs_catia.c b/source3/modules/vfs_catia.c
index 12995dda9bf..c362be764cc 100644
--- a/source3/modules/vfs_catia.c
+++ b/source3/modules/vfs_catia.c
@@ -158,6 +158,19 @@ static NTSTATUS catia_string_replace_allocate(connection_struct *conn,
return status;
}
+static int catia_connect(struct vfs_handle_struct *handle,
+ const char *service,
+ const char *user)
+{
+ /*
+ * Unless we have an async implementation of get_dos_attributes turn
+ * this off.
+ */
+ lp_do_parameter(SNUM(handle->conn), "smbd:async dosmode", "false");
+
+ return SMB_VFS_NEXT_CONNECT(handle, service, user);
+}
+
static DIR *catia_opendir(vfs_handle_struct *handle,
const struct smb_filename *smb_fname,
const char *mask,
@@ -2405,6 +2418,8 @@ static NTSTATUS catia_set_dos_attributes(struct vfs_handle_struct *handle,
}
static struct vfs_fn_pointers vfs_catia_fns = {
+ .connect_fn = catia_connect,
+
/* Directory operations */
.mkdir_fn = catia_mkdir,
.rmdir_fn = catia_rmdir,
diff --git a/source3/modules/vfs_ceph.c b/source3/modules/vfs_ceph.c
index 8b709eddc90..d863c8add5a 100644
--- a/source3/modules/vfs_ceph.c
+++ b/source3/modules/vfs_ceph.c
@@ -132,6 +132,11 @@ static int cephwrap_connect(struct vfs_handle_struct *handle, const char *servi
handle->data = cmount;
cmount_cnt++;
+ /*
+ * Unless we have an async implementation of getxattrat turn this off.
+ */
+ lp_do_parameter(SNUM(handle->conn), "smbd:async dosmode", "false");
+
return 0;
err_cm_release:
diff --git a/source3/modules/vfs_glusterfs.c b/source3/modules/vfs_glusterfs.c
index 98be3c6d4e2..431f6fff48c 100644
--- a/source3/modules/vfs_glusterfs.c
+++ b/source3/modules/vfs_glusterfs.c
@@ -362,6 +362,11 @@ static int vfs_gluster_connect(struct vfs_handle_struct *handle,
*/
lp_do_parameter(SNUM(handle->conn), "shadow:mountpoint", "/");
+ /*
+ * Unless we have an async implementation of getxattrat turn this off.
+ */
+ lp_do_parameter(SNUM(handle->conn), "smbd:async dosmode", "false");
+
done:
if (ret < 0) {
if (fs)
diff --git a/source3/modules/vfs_gpfs.c b/source3/modules/vfs_gpfs.c
index 5f21bc0826d..982dc19e785 100644
--- a/source3/modules/vfs_gpfs.c
+++ b/source3/modules/vfs_gpfs.c
@@ -2163,6 +2163,12 @@ static int vfs_gpfs_connect(struct vfs_handle_struct *handle,
}
}
+ /*
+ * Unless we have an async implementation of get_dos_attributes turn
+ * this off.
+ */
+ lp_do_parameter(SNUM(handle->conn), "smbd:async dosmode", "false");
+
return 0;
}
diff --git a/source3/smbd/smb2_query_directory.c b/source3/smbd/smb2_query_directory.c
index aeba134810f..90527790817 100644
--- a/source3/smbd/smb2_query_directory.c
+++ b/source3/smbd/smb2_query_directory.c
@@ -25,6 +25,7 @@
#include "trans2.h"
#include "../lib/util/tevent_ntstatus.h"
#include "system/filesys.h"
+#include "lib/pthreadpool/pthreadpool_tevent.h"
#undef DBGC_CLASS
#define DBGC_CLASS DBGC_SMB2
@@ -205,7 +206,18 @@ static struct tevent_req *fetch_write_time_send(TALLOC_CTX *mem_ctx,
bool *stop);
static NTSTATUS fetch_write_time_recv(struct tevent_req *req);
+static struct tevent_req *fetch_dos_mode_send(
+ TALLOC_CTX *mem_ctx,
+ struct smb_vfs_ev_glue *evg,
+ struct files_struct *dir_fsp,
+ struct smb_filename **smb_fname,
+ uint32_t info_level,
+ uint8_t *entry_marshall_buf);
+
+static NTSTATUS fetch_dos_mode_recv(struct tevent_req *req);
+
struct smbd_smb2_query_directory_state {
+ struct smb_vfs_ev_glue *evg;
struct tevent_context *ev;
struct smbd_smb2_request *smb2req;
uint64_t async_sharemode_count;
@@ -225,13 +237,18 @@ struct smbd_smb2_query_directory_state {
uint32_t dirtype;
bool dont_descend;
bool ask_sharemode;
+ bool async_dosmode;
bool async_ask_sharemode;
int last_entry_off;
+ struct pthreadpool_tevent *tp_chdir_safe;
+ size_t max_async_dosmode_active;
+ uint32_t async_dosmode_active;
bool done;
};
static bool smb2_query_directory_next_entry(struct tevent_req *req);
static void smb2_query_directory_fetch_write_time_done(struct tevent_req *subreq);
+static void smb2_query_directory_dos_mode_done(struct tevent_req *subreq);
static void smb2_query_directory_waited(struct tevent_req *subreq);
static struct tevent_req *smbd_smb2_query_directory_send(TALLOC_CTX *mem_ctx,
@@ -260,7 +277,9 @@ static struct tevent_req *smbd_smb2_query_directory_send(TALLOC_CTX *mem_ctx,
if (req == NULL) {
return NULL;
}
+ state->evg = conn->user_vfs_evg;
state->ev = ev;
+ state->tp_chdir_safe = smb_vfs_ev_glue_tp_chdir_safe(state->evg);
state->fsp = fsp;
state->smb2req = smb2req;
state->in_output_buffer_length = in_output_buffer_length;
@@ -488,12 +507,31 @@ static struct tevent_req *smbd_smb2_query_directory_send(TALLOC_CTX *mem_ctx,
if (state->info_level != SMB_FIND_FILE_NAMES_INFO) {
state->ask_sharemode = lp_parm_bool(
SNUM(conn), "smbd", "search ask sharemode", true);
+
+ state->async_dosmode = lp_parm_bool(
+ SNUM(conn), "smbd", "async dosmode", false);
}
if (state->ask_sharemode && lp_clustering()) {
state->ask_sharemode = false;
state->async_ask_sharemode = true;
+ }
+ if (state->async_dosmode) {
+ size_t max_threads;
+
+ max_threads = pthreadpool_tevent_max_threads(state->tp_chdir_safe);
+
+ state->max_async_dosmode_active = lp_parm_ulong(
+ SNUM(conn), "smbd", "max async dosmode",
+ max_threads * 2);
+
+ if (state->max_async_dosmode_active == 0) {
+ state->max_async_dosmode_active = 1;
+ }
+ }
+
+ if (state->async_dosmode || state->async_ask_sharemode) {
/*
* Should we only set async_internal
* if we're not the last request in
@@ -537,6 +575,7 @@ static bool smb2_query_directory_next_entry(struct tevent_req *req)
int space_remaining = state->in_output_buffer_length - off;
struct file_id file_id;
NTSTATUS status;
+ bool get_dosmode = !state->async_dosmode;
bool stop = false;
SMB_ASSERT(space_remaining >= 0);
@@ -551,7 +590,7 @@ static bool smb2_query_directory_next_entry(struct tevent_req *req)
false, /* requires_resume_key */
state->dont_descend,
state->ask_sharemode,
- true,
+ get_dosmode,
8, /* align to 8 bytes */
false, /* no padding */
&state->pdata,
@@ -605,6 +644,36 @@ static bool smb2_query_directory_next_entry(struct tevent_req *req)
state->async_sharemode_count++;
}
+ if (state->async_dosmode) {
+ struct tevent_req *subreq = NULL;
+ uint8_t *buf = NULL;
+ size_t outstanding_aio;
+
+ buf = (uint8_t *)state->base_data + state->last_entry_off;
+
+ subreq = fetch_dos_mode_send(state,
+ state->evg,
+ state->fsp,
+ &smb_fname,
+ state->info_level,
+ buf);
+ if (tevent_req_nomem(subreq, req)) {
+ return true;
+ }
+ tevent_req_set_callback(subreq,
+ smb2_query_directory_dos_mode_done,
+ req);
+
+ state->async_dosmode_active++;
+
+ outstanding_aio = pthreadpool_tevent_queued_jobs(
+ state->tp_chdir_safe);
+
+ if (outstanding_aio > state->max_async_dosmode_active) {
+ stop = true;
+ }
+ }
+
TALLOC_FREE(smb_fname);
state->num++;
@@ -625,6 +694,10 @@ last_entry_done:
return true;
}
+ if (state->async_dosmode_active > 0) {
+ return true;
+ }
+
if (state->find_async_delay_usec > 0) {
struct timeval tv;
struct tevent_req *subreq = NULL;
@@ -674,6 +747,28 @@ static void smb2_query_directory_fetch_write_time_done(struct tevent_req *subreq
return;
}
+static void smb2_query_directory_dos_mode_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req =
+ tevent_req_callback_data(subreq,
+ struct tevent_req);
+ struct smbd_smb2_query_directory_state *state =
+ tevent_req_data(req,
+ struct smbd_smb2_query_directory_state);
+ NTSTATUS status;
+
+ status = fetch_dos_mode_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (tevent_req_nterror(req, status)) {
+ return;
+ }
+
+ state->async_dosmode_active--;
+
+ smb2_query_directory_check_next_entry(req);
+ return;
+}
+
static void smb2_query_directory_check_next_entry(struct tevent_req *req)
{
struct smbd_smb2_query_directory_state *state = tevent_req_data(
@@ -687,7 +782,9 @@ static void smb2_query_directory_check_next_entry(struct tevent_req *req)
return;
}
- if (state->async_sharemode_count > 0) {
+ if (state->async_sharemode_count > 0 ||
+ state->async_dosmode_active > 0)
+ {
return;
}
@@ -860,3 +957,127 @@ static NTSTATUS fetch_write_time_recv(struct tevent_req *req)
tevent_req_received(req);
return NT_STATUS_OK;
}
+
+struct fetch_dos_mode_state {
+ struct files_struct *dir_fsp;
+ struct smb_filename *smb_fname;
+ uint32_t info_level;
+ uint8_t *entry_marshall_buf;
+};
+
+static void fetch_dos_mode_done(struct tevent_req *subreq);
+
+static struct tevent_req *fetch_dos_mode_send(
+ TALLOC_CTX *mem_ctx,
+ struct smb_vfs_ev_glue *evg,
+ struct files_struct *dir_fsp,
+ struct smb_filename **smb_fname,
+ uint32_t info_level,
+ uint8_t *entry_marshall_buf)
+{
+ struct tevent_context *ev = smb_vfs_ev_glue_ev_ctx(evg);
+ struct tevent_req *req = NULL;
+ struct fetch_dos_mode_state *state = NULL;
+ struct tevent_req *subreq = NULL;
+
+ req = tevent_req_create(mem_ctx, &state, struct fetch_dos_mode_state);
+ if (req == NULL) {
+ return NULL;
+ }
+ *state = (struct fetch_dos_mode_state) {
+ .dir_fsp = dir_fsp,
+ .info_level = info_level,
+ .entry_marshall_buf = entry_marshall_buf,
+ };
+
+ state->smb_fname = talloc_move(state, smb_fname);
+
+ subreq = dos_mode_at_send(state, evg, dir_fsp, state->smb_fname);
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, fetch_dos_mode_done, req);
+
+ return req;
+}
+
+static void fetch_dos_mode_done(struct tevent_req *subreq)
+{
+ struct tevent_req *req =
+ tevent_req_callback_data(subreq,
+ struct tevent_req);
+ struct fetch_dos_mode_state *state =
+ tevent_req_data(req,
+ struct fetch_dos_mode_state);
+ uint32_t dfs_dosmode;
+ uint32_t dosmode;
+ struct timespec btime_ts = {0};
+ off_t dosmode_off;
+ off_t btime_off;
+ NTSTATUS status;
+
+ status = dos_mode_at_recv(subreq, &dosmode);
+ TALLOC_FREE(subreq);
+ if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
+ tevent_req_done(req);
+ return;
+ }
+ if (!NT_STATUS_IS_OK(status)) {
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ switch (state->info_level) {
+ case SMB_FIND_ID_BOTH_DIRECTORY_INFO:
+ case SMB_FIND_FILE_BOTH_DIRECTORY_INFO:
+ case SMB_FIND_FILE_DIRECTORY_INFO:
+ case SMB_FIND_FILE_FULL_DIRECTORY_INFO:
+ case SMB_FIND_ID_FULL_DIRECTORY_INFO:
+ btime_off = 8;
+ dosmode_off = 56;
+ break;
+
+ default:
+ DBG_ERR("Unsupported info_level [%u]\n", state->info_level);
+ tevent_req_nterror(req, NT_STATUS_INVALID_LEVEL);
+ return;
+ }
+
+
+ dfs_dosmode = IVAL(state->entry_marshall_buf, dosmode_off);
+ if (dfs_dosmode == 0) {
+ /*
+ * DOS mode for a DFS link, only overwrite if still set to 0 and
+ * not already populated by the lower layer for a DFS link in
+ * smbd_dirptr_lanman2_mode_fn().
+ */
+ SIVAL(state->entry_marshall_buf, dosmode_off, dosmode);
+ }
+
+ btime_ts = get_create_timespec(state->dir_fsp->conn,
+ NULL,
+ state->smb_fname);
+ if (lp_dos_filetime_resolution(SNUM(state->dir_fsp->conn))) {
+ dos_filetime_timespec(&btime_ts);
+ }
+
+ put_long_date_timespec(state->dir_fsp->conn->ts_res,
+ (char *)state->entry_marshall_buf + btime_off,
+ btime_ts);
+
+ tevent_req_done(req);
+ return;
+}
+
+static NTSTATUS fetch_dos_mode_recv(struct tevent_req *req)
+{
+ NTSTATUS status;
+
+ if (tevent_req_is_nterror(req, &status)) {
+ tevent_req_received(req);
+ return status;
+ }
+
+ tevent_req_received(req);
+ return NT_STATUS_OK;
+}