diff options
author | Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | 2014-10-26 00:11:29 +0200 |
---|---|---|
committer | Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | 2014-10-26 00:11:29 +0200 |
commit | 30f4a463c0255441a9c406dc6a3056f3343ff3c6 (patch) | |
tree | d75ed5b0222ae12bb246fd87bcb4e51cd97fa90e | |
parent | 58b9405edfb85045a9609ea2ea10c4c6a22c6f2f (diff) | |
download | ipset-30f4a463c0255441a9c406dc6a3056f3343ff3c6.tar.gz |
Fix parallel resizing and listing of the same set
-rw-r--r-- | kernel/include/linux/netfilter/ipset/ip_set.h | 13 | ||||
-rw-r--r-- | kernel/net/netfilter/ipset/ip_set_core.c | 32 | ||||
-rw-r--r-- | kernel/net/netfilter/ipset/ip_set_hash_gen.h | 38 |
3 files changed, 62 insertions, 21 deletions
diff --git a/kernel/include/linux/netfilter/ipset/ip_set.h b/kernel/include/linux/netfilter/ipset/ip_set.h index 05642d2..e640c62 100644 --- a/kernel/include/linux/netfilter/ipset/ip_set.h +++ b/kernel/include/linux/netfilter/ipset/ip_set.h @@ -177,6 +177,9 @@ struct ip_set_type_variant { /* List elements */ int (*list)(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb); + /* Keep listing private when resizing runs parallel */ + void (*uref)(struct ip_set *set, struct netlink_callback *cb, + bool start); /* Return true if "b" set is the same as "a" * according to the create set parameters */ @@ -424,12 +427,12 @@ ip_set_init_counter(struct ip_set_counter *counter, /* Netlink CB args */ enum { - IPSET_CB_NET = 0, - IPSET_CB_DUMP, - IPSET_CB_INDEX, - IPSET_CB_ARG0, + IPSET_CB_NET = 0, /* net namespace */ + IPSET_CB_DUMP, /* dump single set/all sets */ + IPSET_CB_INDEX, /* set index */ + IPSET_CB_PRIVATE, /* set private data */ + IPSET_CB_ARG0, /* type specific */ IPSET_CB_ARG1, - IPSET_CB_ARG2, }; /* register and unregister set references */ diff --git a/kernel/net/netfilter/ipset/ip_set_core.c b/kernel/net/netfilter/ipset/ip_set_core.c index 8e9dfa7..ee3e09c 100644 --- a/kernel/net/netfilter/ipset/ip_set_core.c +++ b/kernel/net/netfilter/ipset/ip_set_core.c @@ -1188,13 +1188,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { - struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; - if (cb->args[IPSET_CB_ARG0]) { - pr_debug("release set %s\n", - ip_set(inst, cb->args[IPSET_CB_INDEX])->name); - __ip_set_put_byindex(inst, - (ip_set_id_t) cb->args[IPSET_CB_INDEX]); + struct ip_set_net *inst = + (struct ip_set_net *)cb->args[IPSET_CB_NET]; + ip_set_id_t index = (ip_set_id_t) cb->args[IPSET_CB_INDEX]; + struct ip_set *set = ip_set(inst, index); + + if (set->variant->uref && cb->args[IPSET_CB_PRIVATE]) + set->variant->uref(set, cb, false); + pr_debug("release set %s\n", set->name); + __ip_set_put_byindex(inst, index); } return 0; } @@ -1225,12 +1228,6 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); - /* cb->args[IPSET_CB_NET]: net namespace - * [IPSET_CB_DUMP]: dump single set/all sets - * [IPSET_CB_INDEX]: set index - * [IPSET_CB_ARG0]: type specific - */ - if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; @@ -1265,6 +1262,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type, dump_flags; int ret = 0; + bool uref = false; if (!cb->args[IPSET_CB_DUMP]) { ret = dump_init(cb, inst); @@ -1338,6 +1336,10 @@ dump_last: goto release_refcount; if (dump_flags & IPSET_FLAG_LIST_HEADER) goto next_set; + if (set->variant->uref) { + uref = true; + set->variant->uref(set, cb, true); + } /* Fall through and add elements */ default: rcu_read_lock_bh(); @@ -1354,6 +1356,10 @@ dump_last: dump_type = DUMP_LAST; cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); cb->args[IPSET_CB_INDEX] = 0; + if (uref) { + uref = false; + set->variant->uref(set, cb, false); + } goto dump_last; } goto out; @@ -1378,6 +1384,8 @@ out: pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len); dump_attrs(nlh); } + if (uref) + set->variant->uref(set, cb, false); return ret < 0 ? ret : skb->len; } diff --git a/kernel/net/netfilter/ipset/ip_set_hash_gen.h b/kernel/net/netfilter/ipset/ip_set_hash_gen.h index 6fd7db7..2ffd2da 100644 --- a/kernel/net/netfilter/ipset/ip_set_hash_gen.h +++ b/kernel/net/netfilter/ipset/ip_set_hash_gen.h @@ -75,6 +75,8 @@ struct hbucket { /* The hash table: the table size stored here in order to make resizing easy */ struct htable { + atomic_t ref; /* References for resizing */ + atomic_t uref; /* References for dumping */ u8 htable_bits; /* size of hash table == 2^htable_bits */ struct hbucket * __rcu bucket[0]; /* hashtable buckets */ }; @@ -184,6 +186,7 @@ htable_bits(u32 hashsize) #undef mtype_del #undef mtype_test_cidrs #undef mtype_test +#undef mtype_uref #undef mtype_expire #undef mtype_resize #undef mtype_head @@ -225,6 +228,7 @@ htable_bits(u32 hashsize) #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) #define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_uref IPSET_TOKEN(MTYPE, _uref) #define mtype_expire IPSET_TOKEN(MTYPE, _expire) #define mtype_resize IPSET_TOKEN(MTYPE, _resize) #define mtype_head IPSET_TOKEN(MTYPE, _head) @@ -562,6 +566,7 @@ retry: spin_lock_bh(&set->lock); orig = h->table; + atomic_inc(&orig->ref); pr_debug("attempt to resize set %s from %u to %u, t %p\n", set->name, orig->htable_bits, htable_bits, orig); for (i = 0; i < jhash_size(orig->htable_bits); i++) { @@ -607,6 +612,7 @@ retry: if (ret < 0) { spin_unlock_bh(&set->lock); mtype_ahash_destroy(set, t, false); + atomic_dec(&orig->ref); if (ret == -EAGAIN) goto retry; return ret; @@ -634,7 +640,9 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - mtype_ahash_destroy(set, orig, false); + /* If there's nobody else dumping the table, destroy it */ + if (atomic_read(&orig->uref) == 0) + mtype_ahash_destroy(set, orig, false); return 0; @@ -1032,12 +1040,33 @@ nla_put_failure: return -EMSGSIZE; } +/* Make possible to run dumping parallel with resizing */ +static void +mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) +{ + struct htype *h = set->data; + struct htable *t; + + if (start) { + rcu_read_lock_bh(); + t = rcu_dereference_bh_nfnl(h->table); + atomic_inc(&t->uref); + cb->args[IPSET_CB_PRIVATE] = (unsigned long) t; + rcu_read_unlock_bh(); + } else { + t = (struct htable *) cb->args[IPSET_CB_PRIVATE]; + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) + /* Resizing didn't destroy the hash table */ + mtype_ahash_destroy(set, t, false); + cb->args[IPSET_CB_PRIVATE] = 0; + } +} + /* Reply a LIST/SAVE request: dump the elements of the specified set */ static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { - const struct htype *h = set->data; const struct htable *t; struct nlattr *atd, *nested; const struct hbucket *n; @@ -1052,11 +1081,11 @@ mtype_list(const struct ip_set *set, return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); - t = rcu_dereference_bh_nfnl(h->table); + t = (const struct htable *) cb->args[IPSET_CB_PRIVATE]; for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); cb->args[IPSET_CB_ARG0]++) { incomplete = skb_tail_pointer(skb); - n = rcu_dereference_bh(hbucket(t, cb->args[IPSET_CB_ARG0])); + n = hbucket(t, cb->args[IPSET_CB_ARG0]); pr_debug("cb->arg bucket: %lu, t %p n %p\n", cb->args[IPSET_CB_ARG0], t, n); if (n == NULL) @@ -1126,6 +1155,7 @@ static const struct ip_set_type_variant mtype_variant = { .flush = mtype_flush, .head = mtype_head, .list = mtype_list, + .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, }; |