summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Anderson <sontek@gmail.com>2022-07-15 18:14:35 -0400
committerJohn Anderson <sontek@gmail.com>2022-07-15 18:14:35 -0400
commit0fd419f6b83931d6899ea19a6668a3bc2aded3c2 (patch)
tree92ab88bb3028502258fe46d29c4273a2688f6b82
parentfd7a6deaf8d92595ab1fc9682eb5bfc9b953b39c (diff)
downloadpymemcache-0fd419f6b83931d6899ea19a6668a3bc2aded3c2.tar.gz
add some compression benchmarks
-rw-r--r--.gitignore1
-rw-r--r--pymemcache/serde.py12
-rw-r--r--pymemcache/test/test_benchmark.py8
-rw-r--r--pymemcache/test/test_compression.py243
-rw-r--r--test-requirements.txt2
5 files changed, 252 insertions, 14 deletions
diff --git a/.gitignore b/.gitignore
index bb523fd..a3ff1ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,4 +48,3 @@ coverage.xml
#Docs
docs/_build
docs/apidoc/
-venv/
diff --git a/pymemcache/serde.py b/pymemcache/serde.py
index 2926403..6e77766 100644
--- a/pymemcache/serde.py
+++ b/pymemcache/serde.py
@@ -137,16 +137,8 @@ class CompressedSerde:
compress=zlib.compress,
decompress=zlib.decompress,
serde=pickle_serde,
- # Discovered scientifically by testing at what point the serialization
- # begins to improve, with a little padded on since compression adds
- # CPU overhead
- # >>> foo = 'foo'*4
- # >>> len(zlib.compress(foo.encode('utf-8'))), len(foo)
- # (13, 12)
- # >>> foo = 'foo'*5
- # >>> len(zlib.compress(foo.encode('utf-8'))), len(foo)
- # (13, 15)
- min_compress_len=30,
+ # Discovered via the `test_optimal_compression_length` test.
+ min_compress_len=400,
):
self._serde = serde
self._compress = compress
diff --git a/pymemcache/test/test_benchmark.py b/pymemcache/test/test_benchmark.py
index 55653bd..f123482 100644
--- a/pymemcache/test/test_benchmark.py
+++ b/pymemcache/test/test_benchmark.py
@@ -83,14 +83,16 @@ def benchmark(count, func, *args, **kwargs):
@pytest.mark.benchmark()
def test_bench_get(request, client, pairs, count):
- key, value = next(pairs)
+ key = "pymemcache_test:0"
+ value = pairs[key]
client.set(key, value)
benchmark(count, client.get, key)
@pytest.mark.benchmark()
def test_bench_set(request, client, pairs, count):
- key, value = next(pairs.items())
+ key = "pymemcache_test:0"
+ value = pairs[key]
benchmark(count, client.set, key, value)
@@ -113,4 +115,4 @@ def test_bench_delete(request, client, pairs, count):
@pytest.mark.benchmark()
def test_bench_delete_multi(request, client, pairs, count):
# deleting missing key takes the same work client-side as real keys
- benchmark(count, client.delete_multi, list(pairs))
+ benchmark(count, client.delete_multi, list(pairs.keys()))
diff --git a/pymemcache/test/test_compression.py b/pymemcache/test/test_compression.py
new file mode 100644
index 0000000..1fcc666
--- /dev/null
+++ b/pymemcache/test/test_compression.py
@@ -0,0 +1,243 @@
+from faker import Faker
+
+import pytest
+import random
+import string
+import time
+import zstd
+import zlib
+
+fake = Faker(
+ ['it_IT', 'en_US', 'ja_JP']
+)
+
+from pymemcache.client.base import Client
+from pymemcache.serde import (
+ CompressedSerde,
+ pickle_serde,
+)
+
+
+def get_random_string(length):
+ letters = string.ascii_letters
+ chars = string.punctuation
+ digits = string.digits
+ total = letters + chars + digits
+ result_str = ''.join(random.choice(total) for i in range(length))
+ return result_str
+
+
+class CustomObject():
+ """
+ Custom class for verifying serialization
+ """
+ def __init__(self):
+ self.number = random.randint(0, 100)
+ self.string = fake.text()
+ self.object = fake.profile()
+
+
+class CustomObjectValue():
+ def __init__(self, value):
+ self.value = value
+
+
+def benchmark(count, func, *args, **kwargs):
+ start = time.time()
+
+ for _ in range(count):
+ result = func(*args, **kwargs)
+
+ duration = time.time() - start
+ print(str(duration))
+
+ return result
+
+
+@pytest.fixture(scope="session")
+def names():
+ names = []
+ for _ in range(15):
+ names.append(fake.name())
+
+ return names
+
+
+@pytest.fixture(scope="session")
+def paragraphs():
+ paragraphs = []
+ for _ in range(15):
+ paragraphs.append(fake.text())
+
+ return paragraphs
+
+
+@pytest.fixture(scope="session")
+def objects():
+ objects = []
+ for _ in range(15):
+ objects.append(CustomObject())
+
+ return objects
+
+
+# Always run compression for the benchmarks
+min_compress_len = 1
+
+default_serde = CompressedSerde(
+ min_compress_len=min_compress_len
+)
+
+zlib_serde = CompressedSerde(
+ compress=lambda value: zlib.compress(value, 9),
+ decompress=lambda value: zlib.decompress(value),
+ min_compress_len=min_compress_len
+)
+
+zstd_serde = CompressedSerde(
+ compress=lambda value: zstd.compress(value),
+ decompress=lambda value: zstd.decompress(value),
+ min_compress_len=min_compress_len
+)
+
+serializers = [
+ None,
+ default_serde,
+ zlib_serde,
+ zstd_serde,
+]
+ids = ["none", "zlib ", "zlib9", "zstd "]
+
+@pytest.mark.benchmark()
+@pytest.mark.parametrize("serde", serializers, ids=ids)
+def test_bench_compress_set_strings(count, host, port, serde, names):
+ client = Client(
+ (host, port),
+ serde=serde,
+ encoding='utf-8'
+ )
+ def test():
+ for index, name in enumerate(names):
+ key = f"name_{index}"
+ client.set(key, name)
+
+ benchmark(count, test)
+
+
+@pytest.mark.benchmark()
+@pytest.mark.parametrize("serde", serializers, ids=ids)
+def test_bench_compress_get_strings(count, host, port, serde, names):
+ client = Client(
+ (host, port),
+ serde=serde,
+ encoding='utf-8'
+ )
+ for index, name in enumerate(names):
+ key = f"name_{index}"
+ client.set(key, name)
+
+ def test():
+ for index, _ in enumerate(names):
+ key = f"name_{index}"
+ client.get(key)
+
+ benchmark(count, test)
+
+
+@pytest.mark.benchmark()
+@pytest.mark.parametrize("serde", serializers, ids=ids)
+def test_bench_compress_set_large_strings(count, host, port, serde, paragraphs):
+ client = Client(
+ (host, port),
+ serde=serde,
+ encoding='utf-8'
+ )
+ def test():
+ for index, p in enumerate(paragraphs):
+ key = f"paragraph_{index}"
+ client.set(key, p)
+
+ benchmark(count, test)
+
+
+@pytest.mark.benchmark()
+@pytest.mark.parametrize("serde", serializers, ids=ids)
+def test_bench_compress_get_large_strings(count, host, port, serde, paragraphs):
+ client = Client(
+ (host, port),
+ serde=serde,
+ encoding='utf-8'
+ )
+ for index, p in enumerate(paragraphs):
+ key = f"paragraphs_{index}"
+ client.set(key, p)
+
+ def test():
+ for index, _ in enumerate(paragraphs):
+ key = f"paragraphs_{index}"
+ client.get(key)
+
+ benchmark(count, test)
+
+
+@pytest.mark.benchmark()
+@pytest.mark.parametrize("serde", serializers, ids=ids)
+def test_bench_compress_set_objects(count, host, port, serde, objects):
+ client = Client(
+ (host, port),
+ serde=serde,
+ encoding='utf-8'
+ )
+ def test():
+ for index, o in enumerate(objects):
+ key = f"objects_{index}"
+ client.set(key, o)
+
+ benchmark(count, test)
+
+
+@pytest.mark.benchmark()
+@pytest.mark.parametrize("serde", serializers, ids=ids)
+def test_bench_compress_get_objects(count, host, port, serde, objects):
+ client = Client(
+ (host, port),
+ serde=serde,
+ encoding='utf-8'
+ )
+ for index, o in enumerate(objects):
+ key = f"objects_{index}"
+ client.set(key, o)
+
+ def test():
+ for index, _ in enumerate(objects):
+ key = f"objects_{index}"
+ client.get(key)
+
+ benchmark(count, test)
+
+
+@pytest.mark.benchmark()
+def test_optimal_compression_length():
+ for l in range(5, 2000):
+ input_data = get_random_string(l)
+ start = len(input_data)
+
+ for index, serializer in enumerate(serializers[1:]):
+ name = ids[index+1]
+ value, _ = serializer.serialize("foo", input_data)
+ end = len(value)
+ print(f"serializer={name}\t start={start}\t end={end}")
+
+
+@pytest.mark.benchmark()
+def test_optimal_compression_length_objects():
+ for l in range(5, 2000):
+ input_data = get_random_string(l)
+ obj = CustomObjectValue(input_data)
+ start = len(pickle_serde.serialize("foo", obj)[0])
+
+ for index, serializer in enumerate(serializers[1:]):
+ name = ids[index+1]
+ value, _ = serializer.serialize("foo", obj)
+ end = len(value)
+ print(f"serializer={name}\t start={start}\t end={end}")
diff --git a/test-requirements.txt b/test-requirements.txt
index ac08524..a707c65 100644
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -1,5 +1,7 @@
+Faker==13.15.0
pytest==7.1.1
pytest-cov==3.0.0
gevent==21.12.0; "PyPy" not in platform_python_implementation
pylibmc==1.6.1; sys.platform != 'win32'
python-memcached==1.59
+zstd==1.5.2.5