summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJozsef Kadlecsik <kadlec@blackhole.kfki.hu>2014-10-26 00:11:29 +0200
committerJozsef Kadlecsik <kadlec@blackhole.kfki.hu>2014-10-26 00:11:29 +0200
commit30f4a463c0255441a9c406dc6a3056f3343ff3c6 (patch)
treed75ed5b0222ae12bb246fd87bcb4e51cd97fa90e
parent58b9405edfb85045a9609ea2ea10c4c6a22c6f2f (diff)
downloadipset-30f4a463c0255441a9c406dc6a3056f3343ff3c6.tar.gz
Fix parallel resizing and listing of the same set
-rw-r--r--kernel/include/linux/netfilter/ipset/ip_set.h13
-rw-r--r--kernel/net/netfilter/ipset/ip_set_core.c32
-rw-r--r--kernel/net/netfilter/ipset/ip_set_hash_gen.h38
3 files changed, 62 insertions, 21 deletions
diff --git a/kernel/include/linux/netfilter/ipset/ip_set.h b/kernel/include/linux/netfilter/ipset/ip_set.h
index 05642d2..e640c62 100644
--- a/kernel/include/linux/netfilter/ipset/ip_set.h
+++ b/kernel/include/linux/netfilter/ipset/ip_set.h
@@ -177,6 +177,9 @@ struct ip_set_type_variant {
/* List elements */
int (*list)(const struct ip_set *set, struct sk_buff *skb,
struct netlink_callback *cb);
+ /* Keep listing private when resizing runs parallel */
+ void (*uref)(struct ip_set *set, struct netlink_callback *cb,
+ bool start);
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
@@ -424,12 +427,12 @@ ip_set_init_counter(struct ip_set_counter *counter,
/* Netlink CB args */
enum {
- IPSET_CB_NET = 0,
- IPSET_CB_DUMP,
- IPSET_CB_INDEX,
- IPSET_CB_ARG0,
+ IPSET_CB_NET = 0, /* net namespace */
+ IPSET_CB_DUMP, /* dump single set/all sets */
+ IPSET_CB_INDEX, /* set index */
+ IPSET_CB_PRIVATE, /* set private data */
+ IPSET_CB_ARG0, /* type specific */
IPSET_CB_ARG1,
- IPSET_CB_ARG2,
};
/* register and unregister set references */
diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c
index 8e9dfa7..ee3e09c 100644
--- a/kernel/net/netfilter/ipset/ip_set_core.c
+++ b/kernel/net/netfilter/ipset/ip_set_core.c
@@ -1188,13 +1188,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
static int
ip_set_dump_done(struct netlink_callback *cb)
{
- struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
-
if (cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n",
- ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
- __ip_set_put_byindex(inst,
- (ip_set_id_t) cb->args[IPSET_CB_INDEX]);
+ struct ip_set_net *inst =
+ (struct ip_set_net *)cb->args[IPSET_CB_NET];
+ ip_set_id_t index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
+ struct ip_set *set = ip_set(inst, index);
+
+ if (set->variant->uref && cb->args[IPSET_CB_PRIVATE])
+ set->variant->uref(set, cb, false);
+ pr_debug("release set %s\n", set->name);
+ __ip_set_put_byindex(inst, index);
}
return 0;
}
@@ -1225,12 +1228,6 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
nla_parse(cda, IPSET_ATTR_CMD_MAX,
attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
- /* cb->args[IPSET_CB_NET]: net namespace
- * [IPSET_CB_DUMP]: dump single set/all sets
- * [IPSET_CB_INDEX]: set index
- * [IPSET_CB_ARG0]: type specific
- */
-
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
@@ -1265,6 +1262,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
u32 dump_type, dump_flags;
int ret = 0;
+ bool uref = false;
if (!cb->args[IPSET_CB_DUMP]) {
ret = dump_init(cb, inst);
@@ -1338,6 +1336,10 @@ dump_last:
goto release_refcount;
if (dump_flags & IPSET_FLAG_LIST_HEADER)
goto next_set;
+ if (set->variant->uref) {
+ uref = true;
+ set->variant->uref(set, cb, true);
+ }
/* Fall through and add elements */
default:
rcu_read_lock_bh();
@@ -1354,6 +1356,10 @@ dump_last:
dump_type = DUMP_LAST;
cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
cb->args[IPSET_CB_INDEX] = 0;
+ if (uref) {
+ uref = false;
+ set->variant->uref(set, cb, false);
+ }
goto dump_last;
}
goto out;
@@ -1378,6 +1384,8 @@ out:
pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len);
dump_attrs(nlh);
}
+ if (uref)
+ set->variant->uref(set, cb, false);
return ret < 0 ? ret : skb->len;
}
diff --git a/kernel/net/netfilter/ipset/ip_set_hash_gen.h b/kernel/net/netfilter/ipset/ip_set_hash_gen.h
index 6fd7db7..2ffd2da 100644
--- a/kernel/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/kernel/net/netfilter/ipset/ip_set_hash_gen.h
@@ -75,6 +75,8 @@ struct hbucket {
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
+ atomic_t ref; /* References for resizing */
+ atomic_t uref; /* References for dumping */
u8 htable_bits; /* size of hash table == 2^htable_bits */
struct hbucket * __rcu bucket[0]; /* hashtable buckets */
};
@@ -184,6 +186,7 @@ htable_bits(u32 hashsize)
#undef mtype_del
#undef mtype_test_cidrs
#undef mtype_test
+#undef mtype_uref
#undef mtype_expire
#undef mtype_resize
#undef mtype_head
@@ -225,6 +228,7 @@ htable_bits(u32 hashsize)
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
#define mtype_test IPSET_TOKEN(MTYPE, _test)
+#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
@@ -562,6 +566,7 @@ retry:
spin_lock_bh(&set->lock);
orig = h->table;
+ atomic_inc(&orig->ref);
pr_debug("attempt to resize set %s from %u to %u, t %p\n",
set->name, orig->htable_bits, htable_bits, orig);
for (i = 0; i < jhash_size(orig->htable_bits); i++) {
@@ -607,6 +612,7 @@ retry:
if (ret < 0) {
spin_unlock_bh(&set->lock);
mtype_ahash_destroy(set, t, false);
+ atomic_dec(&orig->ref);
if (ret == -EAGAIN)
goto retry;
return ret;
@@ -634,7 +640,9 @@ retry:
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- mtype_ahash_destroy(set, orig, false);
+ /* If there's nobody else dumping the table, destroy it */
+ if (atomic_read(&orig->uref) == 0)
+ mtype_ahash_destroy(set, orig, false);
return 0;
@@ -1032,12 +1040,33 @@ nla_put_failure:
return -EMSGSIZE;
}
+/* Make possible to run dumping parallel with resizing */
+static void
+mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
+{
+ struct htype *h = set->data;
+ struct htable *t;
+
+ if (start) {
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh_nfnl(h->table);
+ atomic_inc(&t->uref);
+ cb->args[IPSET_CB_PRIVATE] = (unsigned long) t;
+ rcu_read_unlock_bh();
+ } else {
+ t = (struct htable *) cb->args[IPSET_CB_PRIVATE];
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref))
+ /* Resizing didn't destroy the hash table */
+ mtype_ahash_destroy(set, t, false);
+ cb->args[IPSET_CB_PRIVATE] = 0;
+ }
+}
+
/* Reply a LIST/SAVE request: dump the elements of the specified set */
static int
mtype_list(const struct ip_set *set,
struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct htype *h = set->data;
const struct htable *t;
struct nlattr *atd, *nested;
const struct hbucket *n;
@@ -1052,11 +1081,11 @@ mtype_list(const struct ip_set *set,
return -EMSGSIZE;
pr_debug("list hash set %s\n", set->name);
- t = rcu_dereference_bh_nfnl(h->table);
+ t = (const struct htable *) cb->args[IPSET_CB_PRIVATE];
for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
cb->args[IPSET_CB_ARG0]++) {
incomplete = skb_tail_pointer(skb);
- n = rcu_dereference_bh(hbucket(t, cb->args[IPSET_CB_ARG0]));
+ n = hbucket(t, cb->args[IPSET_CB_ARG0]);
pr_debug("cb->arg bucket: %lu, t %p n %p\n",
cb->args[IPSET_CB_ARG0], t, n);
if (n == NULL)
@@ -1126,6 +1155,7 @@ static const struct ip_set_type_variant mtype_variant = {
.flush = mtype_flush,
.head = mtype_head,
.list = mtype_list,
+ .uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
};