From 9ffa18884cceb2e5731e422140fad06292de0577 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 23 Jan 2023 18:58:27 +0100
Subject: gfs2: gl_object races fix

Function glock_clear_object() checks if the specified glock is still
pointing at the right object and clears the gl_object pointer.  To
handle the case of incompletely constructed inodes, glock_clear_object()
also allows gl_object to be NULL.

However, in the teardown case, when iget_failed() is called and the
inode is removed from the inode hash, by the time we get to the
glock_clear_object() calls in gfs2_put_super() and its helpers, we don't
have exclusion against concurrent gfs2_inode_lookup() and
gfs2_create_inode() calls, and the inode and iopen glocks may already be
pointing at another inode, so the checks in glock_clear_object() are
incorrect.

To better handle this case, always completely disassociate an inode from
its glocks before tearing it down.  In addition, get rid of a duplicate
glock_clear_object() call in gfs2_evict_inode().  That way,
glock_clear_object() will only ever be called when the glock points at
the current inode, and the NULL check in glock_clear_object() can be
removed.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 4 ++--
 fs/gfs2/inode.c | 8 ++++++++
 fs/gfs2/super.c | 4 +---
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 524f3c96b9a4..2868e979810a 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -883,6 +883,7 @@ void glock_set_object(struct gfs2_glock *gl, void *object)
 /**
  * glock_clear_object - clear the gl_object field of a glock
  * @gl: the glock
+ * @object: object the glock currently points at
  */
 void glock_clear_object(struct gfs2_glock *gl, void *object)
 {
@@ -892,8 +893,7 @@ void glock_clear_object(struct gfs2_glock *gl, void *object)
 	prev_object = gl->gl_object;
 	gl->gl_object = NULL;
 	spin_unlock(&gl->gl_lockref.lock);
-	if (gfs2_assert_warn(gl->gl_name.ln_sbd,
-			     prev_object == object || prev_object == NULL)) {
+	if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == object)) {
 		pr_warn("glock=%u/%llx\n",
 			gl->gl_name.ln_type,
 			(unsigned long long)gl->gl_name.ln_number);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 614db3055c02..c76fdb8f951f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -225,6 +225,10 @@ fail:
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 	if (gfs2_holder_initialized(&i_gh))
 		gfs2_glock_dq_uninit(&i_gh);
+	if (ip->i_gl) {
+		gfs2_glock_put(ip->i_gl);
+		ip->i_gl = NULL;
+	}
 	iget_failed(inode);
 	return ERR_PTR(error);
 }
@@ -816,6 +820,10 @@ fail_gunlock3:
 fail_gunlock2:
 	gfs2_glock_put(io_gl);
 fail_free_inode:
+	if (ip->i_gl) {
+		gfs2_glock_put(ip->i_gl);
+		ip->i_gl = NULL;
+	}
 	gfs2_rs_deltree(&ip->i_res);
 	gfs2_qa_put(ip);
 fail_free_acls:
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 999cc146d708..de99505d49de 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1401,10 +1401,8 @@ static void gfs2_evict_inode(struct inode *inode)
 	if (gfs2_rs_active(&ip->i_res))
 		gfs2_rs_deltree(&ip->i_res);
 
-	if (gfs2_holder_initialized(&gh)) {
-		glock_clear_object(ip->i_gl, ip);
+	if (gfs2_holder_initialized(&gh))
 		gfs2_glock_dq_uninit(&gh);
-	}
 	if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
 		fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
 out:
-- 
cgit v1.2.1


From 2d1439557ffeab3b9729f1c64fa86830070f9a04 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 24 Jan 2023 14:14:42 +0100
Subject: gfs2: Improve gfs2_upgrade_iopen_glock comment

Improve the comment describing the inode and iopen glock interactions
and the glock poking related to inode evict.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/super.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index de99505d49de..a857b99252ae 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1175,15 +1175,23 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode)
 	gfs2_glock_dq_wait(gh);
 
 	/*
-	 * If there are no other lock holders, we'll get the lock immediately.
+	 * If there are no other lock holders, we will immediately get
+	 * exclusive access to the iopen glock here.
+	 *
 	 * Otherwise, the other nodes holding the lock will be notified about
-	 * our locking request.  If they don't have the inode open, they'll
-	 * evict the cached inode and release the lock.  Otherwise, if they
-	 * poke the inode glock, we'll take this as an indication that they
-	 * still need the iopen glock and that they'll take care of deleting
-	 * the inode when they're done.  As a last resort, if another node
-	 * keeps holding the iopen glock without showing any activity on the
-	 * inode glock, we'll eventually time out.
+	 * our locking request.  If they do not have the inode open, they are
+	 * expected to evict the cached inode and release the lock, allowing us
+	 * to proceed.
+	 *
+	 * Otherwise, if they cannot evict the inode, they are expected to poke
+	 * the inode glock (note: not the iopen glock).  We will notice that
+	 * and stop waiting for the iopen glock immediately.  The other node(s)
+	 * are then expected to take care of deleting the inode when they no
+	 * longer use it.
+	 *
+	 * As a last resort, if another node keeps holding the iopen glock
+	 * without showing any activity on the inode glock, we will eventually
+	 * time out and fail the iopen glock upgrade.
 	 *
 	 * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
 	 * locking request as an optimization to notify lock holders as soon as
-- 
cgit v1.2.1


From 8fb8f70ec71eb5ca51ecbfc2303523ff836648db Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 10 Dec 2022 02:14:04 +0100
Subject: gfs2: Clean up gfs2_scan_glock_lru

Switch to list_for_each_entry_safe() and eliminate the "skipped" list in
gfs2_scan_glock_lru().

At the same time, scan the requested number of items to scan, not one
more than that number.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2868e979810a..05ef0ffa7ea6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1985,26 +1985,21 @@ add_back_to_lru:
 
 static long gfs2_scan_glock_lru(int nr)
 {
-	struct gfs2_glock *gl;
-	LIST_HEAD(skipped);
+	struct gfs2_glock *gl, *next;
 	LIST_HEAD(dispose);
 	long freed = 0;
 
 	spin_lock(&lru_lock);
-	while ((nr-- >= 0) && !list_empty(&lru_list)) {
-		gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru);
-
+	list_for_each_entry_safe(gl, next, &lru_list, gl_lru) {
+		if (nr-- <= 0)
+			break;
 		/* Test for being demotable */
 		if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
 			list_move(&gl->gl_lru, &dispose);
 			atomic_dec(&lru_count);
 			freed++;
-			continue;
 		}
-
-		list_move(&gl->gl_lru, &skipped);
 	}
-	list_splice(&skipped, &lru_list);
 	if (!list_empty(&dispose))
 		gfs2_dispose_glock_lru(&dispose);
 	spin_unlock(&lru_lock);
-- 
cgit v1.2.1


From 228804a35caa7edae4a81049281e7f106dea1ad1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 10 Dec 2022 02:21:32 +0100
Subject: gfs2: Make glock lru list scanning safer

In __gfs2_glock_put(), remove the glock from the lru list *after*
dropping the glock lock.  This prevents deadlocks against
gfs2_scan_glock_lru().

In gfs2_scan_glock_lru(), make sure that the glock's reference count is
zero before moving the glock to the dispose list.  This skips glocks
that are marked dead as well as glocks that are still in use.
Additionally, switch to spin_trylock() as we already do in
gfs2_dispose_glock_lru(); this alone would also be enough to prevent
deadlocks against __gfs2_glock_put().

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 05ef0ffa7ea6..fbfbf7a2feac 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -274,9 +274,8 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
 	struct address_space *mapping = gfs2_glock2aspace(gl);
 
 	lockref_mark_dead(&gl->gl_lockref);
-
-	gfs2_glock_remove_from_lru(gl);
 	spin_unlock(&gl->gl_lockref.lock);
+	gfs2_glock_remove_from_lru(gl);
 	GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
 	if (mapping) {
 		truncate_inode_pages_final(mapping);
@@ -1995,9 +1994,14 @@ static long gfs2_scan_glock_lru(int nr)
 			break;
 		/* Test for being demotable */
 		if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
-			list_move(&gl->gl_lru, &dispose);
-			atomic_dec(&lru_count);
-			freed++;
+			if (!spin_trylock(&gl->gl_lockref.lock))
+				continue;
+			if (!gl->gl_lockref.count) {
+				list_move(&gl->gl_lru, &dispose);
+				atomic_dec(&lru_count);
+				freed++;
+			}
+			spin_unlock(&gl->gl_lockref.lock);
 		}
 	}
 	if (!list_empty(&dispose))
-- 
cgit v1.2.1


From 3056dc46559bfe3fc4b79771dcbc2d003f9fd313 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 9 Dec 2022 18:28:20 +0100
Subject: gfs2: Get rid of GLF_PENDING_DELETE flag

Get rid of the GLF_PENDING_DELETE glock flag introduced by commit
a0e3cc65fa29 ("gfs2: Turn gl_delete into a delayed work").  The only use
of that flag is to prevent the iopen glock from being demoted (i.e.,
unlocked) while delete work is pending.  It turns out that demoting the
iopen glock while delete work is pending is perfectly fine; we only need
to make sure that the glock isn't being freed while still in use.  This
is ensured by the previous patch because delete_work_func() owns a
reference while the work is queued or running.

With these changes, gfs2_queue_delete_work() no longer takes the glock
spin lock, so we can use it in iopen_go_callback() instead of
open-coding it there.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c  | 26 +++-----------------------
 fs/gfs2/glock.h  |  1 -
 fs/gfs2/glops.c  |  9 +--------
 fs/gfs2/incore.h |  3 +--
 4 files changed, 5 insertions(+), 34 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index fbfbf7a2feac..8d55616488aa 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -984,10 +984,6 @@ static void delete_work_func(struct work_struct *work)
 	struct inode *inode;
 	u64 no_addr = gl->gl_name.ln_number;
 
-	spin_lock(&gl->gl_lockref.lock);
-	clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
-	spin_unlock(&gl->gl_lockref.lock);
-
 	if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
 		/*
 		 * If we can evict the inode, give the remote node trying to
@@ -2064,28 +2060,14 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
 
 bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay)
 {
-	bool queued;
-
-	spin_lock(&gl->gl_lockref.lock);
-	queued = queue_delayed_work(gfs2_delete_workqueue,
-				    &gl->gl_delete, delay);
-	if (queued)
-		set_bit(GLF_PENDING_DELETE, &gl->gl_flags);
-	spin_unlock(&gl->gl_lockref.lock);
-	return queued;
+	return queue_delayed_work(gfs2_delete_workqueue,
+				  &gl->gl_delete, delay);
 }
 
 void gfs2_cancel_delete_work(struct gfs2_glock *gl)
 {
-	if (cancel_delayed_work(&gl->gl_delete)) {
-		clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
+	if (cancel_delayed_work(&gl->gl_delete))
 		gfs2_glock_put(gl);
-	}
-}
-
-bool gfs2_delete_work_queued(const struct gfs2_glock *gl)
-{
-	return test_bit(GLF_PENDING_DELETE, &gl->gl_flags);
 }
 
 static void flush_delete_work(struct gfs2_glock *gl)
@@ -2307,8 +2289,6 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
 		*p++ = 'o';
 	if (test_bit(GLF_BLOCKING, gflags))
 		*p++ = 'b';
-	if (test_bit(GLF_PENDING_DELETE, gflags))
-		*p++ = 'P';
 	if (test_bit(GLF_FREEING, gflags))
 		*p++ = 'x';
 	if (test_bit(GLF_INSTANTIATE_NEEDED, gflags))
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index f37ac087e2c1..17b05d51977c 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -270,7 +270,6 @@ extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
 extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
 extern bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay);
 extern void gfs2_cancel_delete_work(struct gfs2_glock *gl);
-extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl);
 extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
 extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
 extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index d78b61ecc1cd..68676bc78171 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -651,17 +651,11 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
 	if (gl->gl_demote_state == LM_ST_UNLOCKED &&
 	    gl->gl_state == LM_ST_SHARED && ip) {
 		gl->gl_lockref.count++;
-		if (!queue_delayed_work(gfs2_delete_workqueue,
-					&gl->gl_delete, 0))
+		if (!gfs2_queue_delete_work(gl, 0))
 			gl->gl_lockref.count--;
 	}
 }
 
-static int iopen_go_demote_ok(const struct gfs2_glock *gl)
-{
-       return !gfs2_delete_work_queued(gl);
-}
-
 /**
  * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it
  * @gl: glock being freed
@@ -767,7 +761,6 @@ const struct gfs2_glock_operations gfs2_iopen_glops = {
 	.go_type = LM_TYPE_IOPEN,
 	.go_callback = iopen_go_callback,
 	.go_dump = inode_go_dump,
-	.go_demote_ok = iopen_go_demote_ok,
 	.go_flags = GLOF_LRU | GLOF_NONDISK,
 	.go_subclass = 1,
 };
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c26765080f28..cd886364b11d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -329,8 +329,7 @@ enum {
 	GLF_LRU				= 13,
 	GLF_OBJECT			= 14, /* Used only for tracing */
 	GLF_BLOCKING			= 15,
-	GLF_PENDING_DELETE		= 17,
-	GLF_FREEING			= 18, /* Wait for glock to be freed */
+	GLF_FREEING			= 16, /* Wait for glock to be freed */
 };
 
 struct gfs2_glock {
-- 
cgit v1.2.1


From 0247f4e959c01f6ce1fcc2091c571f8c0742a065 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 6 Dec 2022 16:04:22 +0100
Subject: gfs2: Move delete workqueue into super block

Move the global delete workqueue into struct gfs2_sbd so that we can
flush / drain it without interfering with other filesystems.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c      | 21 +++++++--------------
 fs/gfs2/glock.h      |  1 -
 fs/gfs2/incore.h     |  4 ++++
 fs/gfs2/ops_fstype.c | 10 +++++++++-
 fs/gfs2/super.c      |  2 ++
 5 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8d55616488aa..1565fdf32ac0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -67,7 +67,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
 
 static struct dentry *gfs2_root;
 static struct workqueue_struct *glock_workqueue;
-struct workqueue_struct *gfs2_delete_workqueue;
 static LIST_HEAD(lru_list);
 static atomic_t lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(lru_lock);
@@ -2060,7 +2059,9 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
 
 bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay)
 {
-	return queue_delayed_work(gfs2_delete_workqueue,
+	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+	return queue_delayed_work(sdp->sd_delete_wq,
 				  &gl->gl_delete, delay);
 }
 
@@ -2073,8 +2074,10 @@ void gfs2_cancel_delete_work(struct gfs2_glock *gl)
 static void flush_delete_work(struct gfs2_glock *gl)
 {
 	if (gl->gl_name.ln_type == LM_TYPE_IOPEN) {
+		struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
 		if (cancel_delayed_work(&gl->gl_delete)) {
-			queue_delayed_work(gfs2_delete_workqueue,
+			queue_delayed_work(sdp->sd_delete_wq,
 					   &gl->gl_delete, 0);
 		}
 	}
@@ -2083,7 +2086,7 @@ static void flush_delete_work(struct gfs2_glock *gl)
 void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
 {
 	glock_hash_walk(flush_delete_work, sdp);
-	flush_workqueue(gfs2_delete_workqueue);
+	flush_workqueue(sdp->sd_delete_wq);
 }
 
 /**
@@ -2444,18 +2447,9 @@ int __init gfs2_glock_init(void)
 		rhashtable_destroy(&gl_hash_table);
 		return -ENOMEM;
 	}
-	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
-						WQ_MEM_RECLAIM | WQ_FREEZABLE,
-						0);
-	if (!gfs2_delete_workqueue) {
-		destroy_workqueue(glock_workqueue);
-		rhashtable_destroy(&gl_hash_table);
-		return -ENOMEM;
-	}
 
 	ret = register_shrinker(&glock_shrinker, "gfs2-glock");
 	if (ret) {
-		destroy_workqueue(gfs2_delete_workqueue);
 		destroy_workqueue(glock_workqueue);
 		rhashtable_destroy(&gl_hash_table);
 		return ret;
@@ -2472,7 +2466,6 @@ void gfs2_glock_exit(void)
 	unregister_shrinker(&glock_shrinker);
 	rhashtable_destroy(&gl_hash_table);
 	destroy_workqueue(glock_workqueue);
-	destroy_workqueue(gfs2_delete_workqueue);
 }
 
 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 17b05d51977c..b9da61dbe550 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -144,7 +144,6 @@ struct gfs2_glock_aspace {
 	struct address_space mapping;
 };
 
-extern struct workqueue_struct *gfs2_delete_workqueue;
 static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 {
 	struct gfs2_holder *gh;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index cd886364b11d..6e8a5f2d6b49 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -770,6 +770,10 @@ struct gfs2_sbd {
 
 	struct completion sd_journal_ready;
 
+	/* Workqueue stuff */
+
+	struct workqueue_struct *sd_delete_wq;
+
 	/* Daemon stuff */
 
 	struct task_struct *sd_logd_process;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c0cf1d2d0ef5..f13a940f99d3 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1197,9 +1197,15 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
 
 	snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
 
+	sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s",
+			WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname);
+	error = -ENOMEM;
+	if (!sdp->sd_delete_wq)
+		goto fail_free;
+
 	error = gfs2_sys_fs_add(sdp);
 	if (error)
-		goto fail_free;
+		goto fail_delete_wq;
 
 	gfs2_create_debugfs_file(sdp);
 
@@ -1309,6 +1315,8 @@ fail_lm:
 fail_debug:
 	gfs2_delete_debugfs_file(sdp);
 	gfs2_sys_fs_del(sdp);
+fail_delete_wq:
+	destroy_workqueue(sdp->sd_delete_wq);
 fail_free:
 	free_sbd(sdp);
 	sb->s_fs_info = NULL;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a857b99252ae..0b5cda464787 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -630,6 +630,8 @@ restart:
 	/*  Unmount the locking protocol  */
 	gfs2_lm_unmount(sdp);
 
+	destroy_workqueue(sdp->sd_delete_wq);
+
 	/*  At this point, we're through participating in the lockspace  */
 	gfs2_sys_fs_del(sdp);
 	free_sbd(sdp);
-- 
cgit v1.2.1


From f0e56edc2ec7a40f4e94590172f21317baafb196 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 21 Dec 2022 00:52:51 +0100
Subject: gfs2: Split the two kinds of glock "delete" work

Function delete_work_func() is used for two purposes:

 * to immediately try to evict the glock's inode, and

 * to verify after a little while that the inode has been deleted as
   expected, and didn't just get skipped.

These two operations are not separated very well, so introduce two new
glock flags to improved that.  Split gfs2_queue_delete_work() into
gfs2_queue_try_to_evict and gfs2_queue_verify_evict().

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c  | 57 ++++++++++++++++++++++++++++++++++++++------------------
 fs/gfs2/glock.h  |  2 +-
 fs/gfs2/glops.c  |  2 +-
 fs/gfs2/incore.h |  2 ++
 fs/gfs2/rgrp.c   |  2 +-
 fs/gfs2/super.c  |  2 +-
 6 files changed, 45 insertions(+), 22 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1565fdf32ac0..b184375df9bd 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -975,6 +975,26 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
 	return evicted;
 }
 
+bool gfs2_queue_try_to_evict(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+	if (test_and_set_bit(GLF_TRY_TO_EVICT, &gl->gl_flags))
+		return false;
+	return queue_delayed_work(sdp->sd_delete_wq,
+				  &gl->gl_delete, 0);
+}
+
+static bool gfs2_queue_verify_evict(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+	if (test_and_set_bit(GLF_VERIFY_EVICT, &gl->gl_flags))
+		return false;
+	return queue_delayed_work(sdp->sd_delete_wq,
+				  &gl->gl_delete, 5 * HZ);
+}
+
 static void delete_work_func(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
@@ -983,7 +1003,7 @@ static void delete_work_func(struct work_struct *work)
 	struct inode *inode;
 	u64 no_addr = gl->gl_name.ln_number;
 
-	if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
+	if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) {
 		/*
 		 * If we can evict the inode, give the remote node trying to
 		 * delete the inode some time before verifying that the delete
@@ -1002,22 +1022,25 @@ static void delete_work_func(struct work_struct *work)
 		 * step entirely.
 		 */
 		if (gfs2_try_evict(gl)) {
-			if (gfs2_queue_delete_work(gl, 5 * HZ))
+			if (gfs2_queue_verify_evict(gl))
 				return;
 		}
 		goto out;
 	}
 
-	inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
-				    GFS2_BLKST_UNLINKED);
-	if (IS_ERR(inode)) {
-		if (PTR_ERR(inode) == -EAGAIN &&
-			(gfs2_queue_delete_work(gl, 5 * HZ)))
+	if (test_and_clear_bit(GLF_VERIFY_EVICT, &gl->gl_flags)) {
+		inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
+					    GFS2_BLKST_UNLINKED);
+		if (IS_ERR(inode)) {
+			if (PTR_ERR(inode) == -EAGAIN &&
+			    gfs2_queue_verify_evict(gl))
 				return;
-	} else {
-		d_prune_aliases(inode);
-		iput(inode);
+		} else {
+			d_prune_aliases(inode);
+			iput(inode);
+		}
 	}
+
 out:
 	gfs2_glock_put(gl);
 }
@@ -2057,16 +2080,10 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
 	rhashtable_walk_exit(&iter);
 }
 
-bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay)
-{
-	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-
-	return queue_delayed_work(sdp->sd_delete_wq,
-				  &gl->gl_delete, delay);
-}
-
 void gfs2_cancel_delete_work(struct gfs2_glock *gl)
 {
+	clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags);
+	clear_bit(GLF_VERIFY_EVICT, &gl->gl_flags);
 	if (cancel_delayed_work(&gl->gl_delete))
 		gfs2_glock_put(gl);
 }
@@ -2298,6 +2315,10 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
 		*p++ = 'n';
 	if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags))
 		*p++ = 'N';
+	if (test_bit(GLF_TRY_TO_EVICT, gflags))
+		*p++ = 'e';
+	if (test_bit(GLF_VERIFY_EVICT, gflags))
+		*p++ = 'E';
 	*p = 0;
 	return buf;
 }
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index b9da61dbe550..1f1ba92c15a8 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -267,7 +267,7 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
 
 extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
 extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
-extern bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay);
+extern bool gfs2_queue_try_to_evict(struct gfs2_glock *gl);
 extern void gfs2_cancel_delete_work(struct gfs2_glock *gl);
 extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp);
 extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 68676bc78171..e4c585f16ddd 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -651,7 +651,7 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
 	if (gl->gl_demote_state == LM_ST_UNLOCKED &&
 	    gl->gl_state == LM_ST_SHARED && ip) {
 		gl->gl_lockref.count++;
-		if (!gfs2_queue_delete_work(gl, 0))
+		if (!gfs2_queue_try_to_evict(gl))
 			gl->gl_lockref.count--;
 	}
 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6e8a5f2d6b49..9b380125eb78 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -330,6 +330,8 @@ enum {
 	GLF_OBJECT			= 14, /* Used only for tracing */
 	GLF_BLOCKING			= 15,
 	GLF_FREEING			= 16, /* Wait for glock to be freed */
+	GLF_TRY_TO_EVICT		= 17, /* iopen glocks only */
+	GLF_VERIFY_EVICT		= 18, /* iopen glocks only */
 };
 
 struct gfs2_glock {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f602fb844951..3b9b76e980ad 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1879,7 +1879,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 		 */
 		ip = gl->gl_object;
 
-		if (ip || !gfs2_queue_delete_work(gl, 0))
+		if (ip || !gfs2_queue_try_to_evict(gl))
 			gfs2_glock_put(gl);
 		else
 			found++;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 0b5cda464787..7b0bfe41b5cc 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -954,7 +954,7 @@ static int gfs2_drop_inode(struct inode *inode)
 		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
 
 		gfs2_glock_hold(gl);
-		if (!gfs2_queue_delete_work(gl, 0))
+		if (!gfs2_queue_try_to_evict(gl))
 			gfs2_glock_queue_put(gl);
 		return 0;
 	}
-- 
cgit v1.2.1


From fd5f446f0b3d529e55cf2f81f3b994a7216808ca Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 24 Jan 2023 14:55:18 -0500
Subject: gfs2: check gl_object in rgrp glops

Function gfs2_clear_rgrpd() is called during unmount to free all rgrps
and their sub-objects. If the rgrp glock is held (e.g. in SH) it calls
gfs2_glock_cb() to unlock, then calls flush_delayed_work() to make
sure any glock work is finished. However, there is a race with other
cluster nodes who may request the rgrp glock in another mode (say, EX).

Func gfs2_clear_rgrpd() calls glock_clear_object() which sets gl_object
to NULL but that's done without holding the gl_lockref spin_lock.
While the lock is not held Another node's demote request can cause the
state machine to run again, and since the gl_lockref is released in
do_xmote, the second process's call to do_xmote can call go_inval
(rgrp_go_inval) after the gl_object has been cleared, which results in
NULL pointer reference of the rgrp glock's gl_object.

Other go_inval glops functions don't require the gl_object to exist, as
evidenced by function inode_go_inval() which explicitly checks for if
(ip) before referencing gl_object. This patch does the same thing
for rgrp glocks. Both the go_inval and go_sync ops are patched to check
the existence of gl_object (rgd) before trying to dereference it.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glops.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index e4c585f16ddd..007cd59d0e47 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -193,7 +193,7 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
 	struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
 	int error;
 
-	if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
+	if (!rgd || !test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
 		return 0;
 	GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
 
@@ -222,9 +222,12 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
 	struct address_space *mapping = &sdp->sd_aspace;
 	struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
 	const unsigned bsize = sdp->sd_sb.sb_bsize;
-	loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
-	loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
+	loff_t start, end;
 
+	if (!rgd)
+		return;
+	start = (rgd->rd_addr * bsize) & PAGE_MASK;
+	end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
 	gfs2_rgrp_brelse(rgd);
 	WARN_ON_ONCE(!(flags & DIO_METADATA));
 	truncate_inode_pages_range(mapping, start, end);
-- 
cgit v1.2.1


From 1c9001515e8adc0743c6ae0707dc6f3aac926d0e Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 6 Dec 2022 17:27:14 +0100
Subject: gfs2: Add SDF_DEACTIVATING super block flag

Add a new SDF_DEACTIVATING super block flag that is set when the
filesystem has started to deactivate. This will be used in the next
patch to stop and drain the delete work during unmount.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/incore.h     | 1 +
 fs/gfs2/ops_fstype.c | 1 +
 fs/gfs2/sys.c        | 2 ++
 3 files changed, 4 insertions(+)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 9b380125eb78..d35bb49d1d3f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -606,6 +606,7 @@ enum {
 	SDF_REMOTE_WITHDRAW	= 13, /* Performing remote recovery */
 	SDF_WITHDRAW_RECOVERY	= 14, /* Wait for journal recovery when we are
 					 withdrawing */
+	SDF_DEACTIVATING	= 15,
 };
 
 enum gfs2_freeze_state {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index f13a940f99d3..c9fef0678320 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1743,6 +1743,7 @@ static void gfs2_kill_sb(struct super_block *sb)
 	sdp->sd_root_dir = NULL;
 	sdp->sd_master_dir = NULL;
 	shrink_dcache_sb(sb);
+	set_bit(SDF_DEACTIVATING, &sdp->sd_flags);
 	kill_block_super(sb);
 }
 
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index d87ea98cf535..c40118ea4bbc 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -87,6 +87,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
 		     "Withdraw In Prog:         %d\n"
 		     "Remote Withdraw:          %d\n"
 		     "Withdraw Recovery:        %d\n"
+		     "Deactivating:             %d\n"
 		     "sd_log_error:             %d\n"
 		     "sd_log_flush_lock:        %d\n"
 		     "sd_log_num_revoke:        %u\n"
@@ -115,6 +116,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
 		     test_bit(SDF_WITHDRAW_IN_PROG, &f),
 		     test_bit(SDF_REMOTE_WITHDRAW, &f),
 		     test_bit(SDF_WITHDRAW_RECOVERY, &f),
+		     test_bit(SDF_DEACTIVATING, &f),
 		     sdp->sd_log_error,
 		     rwsem_is_locked(&sdp->sd_log_flush_lock),
 		     sdp->sd_log_num_revoke,
-- 
cgit v1.2.1


From 6c0246a96e08cd1b5036c34c52de99bd9dffbb48 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 6 Dec 2022 00:12:59 +0100
Subject: gfs2: Cease delete work during unmount

Add a check to delete_work_func() so that it quits when it finds that
the filesystem is deactivating.  This speeds up the delete workqueue
draining in gfs2_kill_sb().

In addition, make sure that iopen_go_callback() won't queue any new
delete work while the filesystem is deactivating.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 3 +++
 fs/gfs2/glops.c | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b184375df9bd..5adc7d85dbf3 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1022,6 +1022,8 @@ static void delete_work_func(struct work_struct *work)
 		 * step entirely.
 		 */
 		if (gfs2_try_evict(gl)) {
+			if (test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+				goto out;
 			if (gfs2_queue_verify_evict(gl))
 				return;
 		}
@@ -1033,6 +1035,7 @@ static void delete_work_func(struct work_struct *work)
 					    GFS2_BLKST_UNLINKED);
 		if (IS_ERR(inode)) {
 			if (PTR_ERR(inode) == -EAGAIN &&
+			    !test_bit(SDF_DEACTIVATING, &sdp->sd_flags) &&
 			    gfs2_queue_verify_evict(gl))
 				return;
 		} else {
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 007cd59d0e47..ad14818a790a 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -648,7 +648,8 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
 	struct gfs2_inode *ip = gl->gl_object;
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 
-	if (!remote || sb_rdonly(sdp->sd_vfs))
+	if (!remote || sb_rdonly(sdp->sd_vfs) ||
+	    test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
 		return;
 
 	if (gl->gl_demote_state == LM_ST_UNLOCKED &&
-- 
cgit v1.2.1


From 6b388abc33998330c6fe55a712d61be888fd7b67 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 6 Dec 2022 17:27:14 +0100
Subject: gfs2: Flush delete work before shrinking inode cache

In gfs2_kill_sb(), flush the delete work queue after setting the
SDF_DEACTIVATING flag.  This ensures that no new inodes will be
instantiated anymore, and the inode cache will be empty after the
following kill_block_super() -> generic_shutdown_super() ->
evict_inodes() call.

With that, function gfs2_make_fs_ro() now calls gfs2_flush_delete_work()
after the workqueue has been destroyed.  Skip that by checking for the
presence of the SDF_DEACTIVATING flag.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/ops_fstype.c | 9 +++++++++
 fs/gfs2/super.c      | 6 +++---
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c9fef0678320..9db376950014 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1743,7 +1743,16 @@ static void gfs2_kill_sb(struct super_block *sb)
 	sdp->sd_root_dir = NULL;
 	sdp->sd_master_dir = NULL;
 	shrink_dcache_sb(sb);
+
+	/*
+	 * Flush and then drain the delete workqueue here (via
+	 * destroy_workqueue()) to ensure that any delete work that
+	 * may be running will also see the SDF_DEACTIVATING flag.
+	 */
 	set_bit(SDF_DEACTIVATING, &sdp->sd_flags);
+	gfs2_flush_delete_work(sdp);
+	destroy_workqueue(sdp->sd_delete_wq);
+
 	kill_block_super(sb);
 }
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 7b0bfe41b5cc..63512bb9d6f0 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -529,7 +529,9 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
 	int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 
-	gfs2_flush_delete_work(sdp);
+	if (!test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+		gfs2_flush_delete_work(sdp);
+
 	if (!log_write_allowed && current == sdp->sd_quotad_process)
 		fs_warn(sdp, "The quotad daemon is withdrawing.\n");
 	else if (sdp->sd_quotad_process)
@@ -630,8 +632,6 @@ restart:
 	/*  Unmount the locking protocol  */
 	gfs2_lm_unmount(sdp);
 
-	destroy_workqueue(sdp->sd_delete_wq);
-
 	/*  At this point, we're through participating in the lockspace  */
 	gfs2_sys_fs_del(sdp);
 	free_sbd(sdp);
-- 
cgit v1.2.1


From b88beb9a246f7506778f8680ee9627cd85262ba4 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 26 Jan 2023 20:23:40 +0100
Subject: gfs2: Evict inodes cooperatively

Add a gfs2_evict_inodes() helper that evicts inodes cooperatively across
the cluster.  This avoids running into timeouts during unmount
unnecessarily.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/incore.h     |  1 +
 fs/gfs2/ops_fstype.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/super.c      |  7 +++++++
 3 files changed, 59 insertions(+)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d35bb49d1d3f..79485329118b 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -607,6 +607,7 @@ enum {
 	SDF_WITHDRAW_RECOVERY	= 14, /* Wait for journal recovery when we are
 					 withdrawing */
 	SDF_DEACTIVATING	= 15,
+	SDF_EVICTING		= 16,
 };
 
 enum gfs2_freeze_state {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 9db376950014..6de901c3b89b 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1728,6 +1728,55 @@ static int gfs2_meta_init_fs_context(struct fs_context *fc)
 	return 0;
 }
 
+/**
+ * gfs2_evict_inodes - evict inodes cooperatively
+ * @sb: the superblock
+ *
+ * When evicting an inode with a zero link count, we are trying to upgrade the
+ * inode's iopen glock from SH to EX mode in order to determine if we can
+ * delete the inode.  The other nodes are supposed to evict the inode from
+ * their caches if they can, and to poke the inode's inode glock if they cannot
+ * do so.  Either behavior allows gfs2_upgrade_iopen_glock() to proceed
+ * quickly, but if the other nodes are not cooperating, the lock upgrading
+ * attempt will time out.  Since inodes are evicted sequentially, this can add
+ * up quickly.
+ *
+ * Function evict_inodes() tries to keep the s_inode_list_lock list locked over
+ * a long time, which prevents other inodes from being evicted concurrently.
+ * This precludes the cooperative behavior we are looking for.  This special
+ * version of evict_inodes() avoids that.
+ *
+ * Modeled after drop_pagecache_sb().
+ */
+static void gfs2_evict_inodes(struct super_block *sb)
+{
+	struct inode *inode, *toput_inode = NULL;
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+
+	set_bit(SDF_EVICTING, &sdp->sd_flags);
+
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		spin_lock(&inode->i_lock);
+		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) &&
+		    !need_resched()) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+		atomic_inc(&inode->i_count);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&sb->s_inode_list_lock);
+
+		iput(toput_inode);
+		toput_inode = inode;
+
+		cond_resched();
+		spin_lock(&sb->s_inode_list_lock);
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+	iput(toput_inode);
+}
+
 static void gfs2_kill_sb(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
@@ -1744,6 +1793,8 @@ static void gfs2_kill_sb(struct super_block *sb)
 	sdp->sd_master_dir = NULL;
 	shrink_dcache_sb(sb);
 
+	gfs2_evict_inodes(sb);
+
 	/*
 	 * Flush and then drain the delete workqueue here (via
 	 * destroy_workqueue()) to ensure that any delete work that
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 63512bb9d6f0..52c3502de58c 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -935,6 +935,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 static int gfs2_drop_inode(struct inode *inode)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 
 	if (inode->i_nlink &&
 	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
@@ -959,6 +960,12 @@ static int gfs2_drop_inode(struct inode *inode)
 		return 0;
 	}
 
+	/*
+	 * No longer cache inodes when trying to evict them all.
+	 */
+	if (test_bit(SDF_EVICTING, &sdp->sd_flags))
+		return 1;
+
 	return generic_drop_inode(inode);
 }
 
-- 
cgit v1.2.1


From 445cb1277e10d7e19b631ef8a64aa3f055df377d Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Mon, 9 Jan 2023 15:42:15 -0500
Subject: Revert "GFS2: free disk inode which is deleted by remote node -V2"

This reverts commit 970343cd4904 ("GFS2: free disk inode which is
deleted by remote node -V2").

The original intent behind commit 970343cd49 was to cull dentries when a
remote node requests to demote an iopen glock, which happens when the
remote node tries to delete the inode.  This is now handled by
gfs2_try_evict(), which is called via iopen_go_callback() ->
gfs2_queue_try_to_evict().

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/dentry.c | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 2e215e8c3c88..6fe9ca253b70 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -83,26 +83,8 @@ static int gfs2_dhash(const struct dentry *dentry, struct qstr *str)
 	return 0;
 }
 
-static int gfs2_dentry_delete(const struct dentry *dentry)
-{
-	struct gfs2_inode *ginode;
-
-	if (d_really_is_negative(dentry))
-		return 0;
-
-	ginode = GFS2_I(d_inode(dentry));
-	if (!gfs2_holder_initialized(&ginode->i_iopen_gh))
-		return 0;
-
-	if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags))
-		return 1;
-
-	return 0;
-}
-
 const struct dentry_operations gfs2_dops = {
 	.d_revalidate = gfs2_drevalidate,
 	.d_hash = gfs2_dhash,
-	.d_delete = gfs2_dentry_delete,
 };
 
-- 
cgit v1.2.1


From b66f723bb552ad59c2acb5d45ea45c890f84498b Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 31 Jan 2023 15:06:53 +0100
Subject: gfs2: Improve gfs2_make_fs_rw error handling

In gfs2_make_fs_rw(), make sure to call gfs2_consist() to report an
inconsistency and mark the filesystem as withdrawn when
gfs2_find_jhead() fails.

At the end of gfs2_make_fs_rw(), when we discover that the filesystem
has been withdrawn, make sure we report an error.  This also replaces
the gfs2_withdrawn() check after gfs2_find_jhead().

Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Cc: syzbot+f51cb4b9afbd87ec06f2@syzkaller.appspotmail.com
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/super.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 52c3502de58c..a83fa62106f0 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -138,8 +138,10 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 		return -EIO;
 
 	error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
-	if (error || gfs2_withdrawn(sdp))
+	if (error) {
+		gfs2_consist(sdp);
 		return error;
+	}
 
 	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 		gfs2_consist(sdp);
@@ -151,7 +153,9 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 	gfs2_log_pointers_init(sdp, head.lh_blkno);
 
 	error = gfs2_quota_init(sdp);
-	if (!error && !gfs2_withdrawn(sdp))
+	if (!error && gfs2_withdrawn(sdp))
+		error = -EIO;
+	if (!error)
 		set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 	return error;
 }
-- 
cgit v1.2.1


From cbb60951ce18c9b6e91d2eb97deb41d8ff616622 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 1 Feb 2023 15:08:50 +0100
Subject: gfs2: jdata writepage fix

The ->writepage() and ->writepages() operations are supposed to write
entire pages.  However, on filesystems with a block size smaller than
PAGE_SIZE, __gfs2_jdata_writepage() only adds the first block to the
current transaction instead of adding the entire page.  Fix that.

Fixes: 18ec7d5c3f43 ("[GFS2] Make journaled data files identical to normal files on disk")
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/aops.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index e782b4f1d104..2f04c0ff7470 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -127,7 +127,6 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
 {
 	struct inode *inode = page->mapping->host;
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 
 	if (PageChecked(page)) {
 		ClearPageChecked(page);
@@ -135,7 +134,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
 			create_empty_buffers(page, inode->i_sb->s_blocksize,
 					     BIT(BH_Dirty)|BIT(BH_Uptodate));
 		}
-		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
+		gfs2_page_add_databufs(ip, page, 0, PAGE_SIZE);
 	}
 	return gfs2_write_jdata_page(page, wbc);
 }
-- 
cgit v1.2.1


From c1b0c3cfcbad25d2c412863c27638c933f1d911b Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 1 Feb 2023 15:50:25 +0100
Subject: gfs2: Convert gfs2_page_add_databufs to folios

Convert gfs2_page_add_databufs() to folios and rename it to
gfs2_trans_add_databufs().

Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/aops.c | 8 ++++----
 fs/gfs2/aops.h | 4 ++--
 fs/gfs2/bmap.c | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 2f04c0ff7470..2748a82de42a 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -37,10 +37,10 @@
 #include "aops.h"
 
 
-void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
-			    unsigned int from, unsigned int len)
+void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
+			     unsigned int from, unsigned int len)
 {
-	struct buffer_head *head = page_buffers(page);
+	struct buffer_head *head = folio_buffers(folio);
 	unsigned int bsize = head->b_size;
 	struct buffer_head *bh;
 	unsigned int to = from + len;
@@ -134,7 +134,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
 			create_empty_buffers(page, inode->i_sb->s_blocksize,
 					     BIT(BH_Dirty)|BIT(BH_Uptodate));
 		}
-		gfs2_page_add_databufs(ip, page, 0, PAGE_SIZE);
+		gfs2_trans_add_databufs(ip, page_folio(page), 0, PAGE_SIZE);
 	}
 	return gfs2_write_jdata_page(page, wbc);
 }
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index ff9877a68780..09db1914425e 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -9,7 +9,7 @@
 #include "incore.h"
 
 extern void adjust_fs_space(struct inode *inode);
-extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
-				   unsigned int from, unsigned int len);
+extern void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
+				    unsigned int from, unsigned int len);
 
 #endif /* __AOPS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e191ecfb1fde..eedf6926c652 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -985,8 +985,8 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 
 	if (!gfs2_is_stuffed(ip))
-		gfs2_page_add_databufs(ip, &folio->page, offset_in_page(pos),
-				       copied);
+		gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos),
+					copied);
 
 	folio_unlock(folio);
 	folio_put(folio);
-- 
cgit v1.2.1