diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | pymemcache/serde.py | 12 | ||||
-rw-r--r-- | pymemcache/test/test_benchmark.py | 8 | ||||
-rw-r--r-- | pymemcache/test/test_compression.py | 243 | ||||
-rw-r--r-- | test-requirements.txt | 2 |
5 files changed, 252 insertions, 14 deletions
@@ -48,4 +48,3 @@ coverage.xml #Docs docs/_build docs/apidoc/ -venv/ diff --git a/pymemcache/serde.py b/pymemcache/serde.py index 2926403..6e77766 100644 --- a/pymemcache/serde.py +++ b/pymemcache/serde.py @@ -137,16 +137,8 @@ class CompressedSerde: compress=zlib.compress, decompress=zlib.decompress, serde=pickle_serde, - # Discovered scientifically by testing at what point the serialization - # begins to improve, with a little padded on since compression adds - # CPU overhead - # >>> foo = 'foo'*4 - # >>> len(zlib.compress(foo.encode('utf-8'))), len(foo) - # (13, 12) - # >>> foo = 'foo'*5 - # >>> len(zlib.compress(foo.encode('utf-8'))), len(foo) - # (13, 15) - min_compress_len=30, + # Discovered via the `test_optimal_compression_length` test. + min_compress_len=400, ): self._serde = serde self._compress = compress diff --git a/pymemcache/test/test_benchmark.py b/pymemcache/test/test_benchmark.py index 55653bd..f123482 100644 --- a/pymemcache/test/test_benchmark.py +++ b/pymemcache/test/test_benchmark.py @@ -83,14 +83,16 @@ def benchmark(count, func, *args, **kwargs): @pytest.mark.benchmark() def test_bench_get(request, client, pairs, count): - key, value = next(pairs) + key = "pymemcache_test:0" + value = pairs[key] client.set(key, value) benchmark(count, client.get, key) @pytest.mark.benchmark() def test_bench_set(request, client, pairs, count): - key, value = next(pairs.items()) + key = "pymemcache_test:0" + value = pairs[key] benchmark(count, client.set, key, value) @@ -113,4 +115,4 @@ def test_bench_delete(request, client, pairs, count): @pytest.mark.benchmark() def test_bench_delete_multi(request, client, pairs, count): # deleting missing key takes the same work client-side as real keys - benchmark(count, client.delete_multi, list(pairs)) + benchmark(count, client.delete_multi, list(pairs.keys())) diff --git a/pymemcache/test/test_compression.py b/pymemcache/test/test_compression.py new file mode 100644 index 0000000..1fcc666 --- /dev/null +++ b/pymemcache/test/test_compression.py @@ -0,0 +1,243 @@ +from faker import Faker + +import pytest +import random +import string +import time +import zstd +import zlib + +fake = Faker( + ['it_IT', 'en_US', 'ja_JP'] +) + +from pymemcache.client.base import Client +from pymemcache.serde import ( + CompressedSerde, + pickle_serde, +) + + +def get_random_string(length): + letters = string.ascii_letters + chars = string.punctuation + digits = string.digits + total = letters + chars + digits + result_str = ''.join(random.choice(total) for i in range(length)) + return result_str + + +class CustomObject(): + """ + Custom class for verifying serialization + """ + def __init__(self): + self.number = random.randint(0, 100) + self.string = fake.text() + self.object = fake.profile() + + +class CustomObjectValue(): + def __init__(self, value): + self.value = value + + +def benchmark(count, func, *args, **kwargs): + start = time.time() + + for _ in range(count): + result = func(*args, **kwargs) + + duration = time.time() - start + print(str(duration)) + + return result + + +@pytest.fixture(scope="session") +def names(): + names = [] + for _ in range(15): + names.append(fake.name()) + + return names + + +@pytest.fixture(scope="session") +def paragraphs(): + paragraphs = [] + for _ in range(15): + paragraphs.append(fake.text()) + + return paragraphs + + +@pytest.fixture(scope="session") +def objects(): + objects = [] + for _ in range(15): + objects.append(CustomObject()) + + return objects + + +# Always run compression for the benchmarks +min_compress_len = 1 + +default_serde = CompressedSerde( + min_compress_len=min_compress_len +) + +zlib_serde = CompressedSerde( + compress=lambda value: zlib.compress(value, 9), + decompress=lambda value: zlib.decompress(value), + min_compress_len=min_compress_len +) + +zstd_serde = CompressedSerde( + compress=lambda value: zstd.compress(value), + decompress=lambda value: zstd.decompress(value), + min_compress_len=min_compress_len +) + +serializers = [ + None, + default_serde, + zlib_serde, + zstd_serde, +] +ids = ["none", "zlib ", "zlib9", "zstd "] + +@pytest.mark.benchmark() +@pytest.mark.parametrize("serde", serializers, ids=ids) +def test_bench_compress_set_strings(count, host, port, serde, names): + client = Client( + (host, port), + serde=serde, + encoding='utf-8' + ) + def test(): + for index, name in enumerate(names): + key = f"name_{index}" + client.set(key, name) + + benchmark(count, test) + + +@pytest.mark.benchmark() +@pytest.mark.parametrize("serde", serializers, ids=ids) +def test_bench_compress_get_strings(count, host, port, serde, names): + client = Client( + (host, port), + serde=serde, + encoding='utf-8' + ) + for index, name in enumerate(names): + key = f"name_{index}" + client.set(key, name) + + def test(): + for index, _ in enumerate(names): + key = f"name_{index}" + client.get(key) + + benchmark(count, test) + + +@pytest.mark.benchmark() +@pytest.mark.parametrize("serde", serializers, ids=ids) +def test_bench_compress_set_large_strings(count, host, port, serde, paragraphs): + client = Client( + (host, port), + serde=serde, + encoding='utf-8' + ) + def test(): + for index, p in enumerate(paragraphs): + key = f"paragraph_{index}" + client.set(key, p) + + benchmark(count, test) + + +@pytest.mark.benchmark() +@pytest.mark.parametrize("serde", serializers, ids=ids) +def test_bench_compress_get_large_strings(count, host, port, serde, paragraphs): + client = Client( + (host, port), + serde=serde, + encoding='utf-8' + ) + for index, p in enumerate(paragraphs): + key = f"paragraphs_{index}" + client.set(key, p) + + def test(): + for index, _ in enumerate(paragraphs): + key = f"paragraphs_{index}" + client.get(key) + + benchmark(count, test) + + +@pytest.mark.benchmark() +@pytest.mark.parametrize("serde", serializers, ids=ids) +def test_bench_compress_set_objects(count, host, port, serde, objects): + client = Client( + (host, port), + serde=serde, + encoding='utf-8' + ) + def test(): + for index, o in enumerate(objects): + key = f"objects_{index}" + client.set(key, o) + + benchmark(count, test) + + +@pytest.mark.benchmark() +@pytest.mark.parametrize("serde", serializers, ids=ids) +def test_bench_compress_get_objects(count, host, port, serde, objects): + client = Client( + (host, port), + serde=serde, + encoding='utf-8' + ) + for index, o in enumerate(objects): + key = f"objects_{index}" + client.set(key, o) + + def test(): + for index, _ in enumerate(objects): + key = f"objects_{index}" + client.get(key) + + benchmark(count, test) + + +@pytest.mark.benchmark() +def test_optimal_compression_length(): + for l in range(5, 2000): + input_data = get_random_string(l) + start = len(input_data) + + for index, serializer in enumerate(serializers[1:]): + name = ids[index+1] + value, _ = serializer.serialize("foo", input_data) + end = len(value) + print(f"serializer={name}\t start={start}\t end={end}") + + +@pytest.mark.benchmark() +def test_optimal_compression_length_objects(): + for l in range(5, 2000): + input_data = get_random_string(l) + obj = CustomObjectValue(input_data) + start = len(pickle_serde.serialize("foo", obj)[0]) + + for index, serializer in enumerate(serializers[1:]): + name = ids[index+1] + value, _ = serializer.serialize("foo", obj) + end = len(value) + print(f"serializer={name}\t start={start}\t end={end}") diff --git a/test-requirements.txt b/test-requirements.txt index ac08524..a707c65 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,5 +1,7 @@ +Faker==13.15.0 pytest==7.1.1 pytest-cov==3.0.0 gevent==21.12.0; "PyPy" not in platform_python_implementation pylibmc==1.6.1; sys.platform != 'win32' python-memcached==1.59 +zstd==1.5.2.5 |