Skip to content

Commit

Permalink
fix: update compression settings and change library (#22992)
Browse files Browse the repository at this point in the history
  • Loading branch information
aspicer authored Jun 17, 2024
1 parent 1690aaf commit 29b6654
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 10 deletions.
15 changes: 11 additions & 4 deletions posthog/caching/test/test_tolerant_zlib_compressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ class TestTolerantZlibCompressor(TestCase):
short_uncompressed_bytes = b"hello world"
# needs to be long enough to trigger compression
uncompressed_bytes = ("hello world hello world hello world hello world hello world" * 100).encode("utf-8")
compressed_bytes = b"x\x9c\xed\xcb\xb1\t\x00 \x0c\x00\xc1U2\x9cB\x8a@\xc0\xc6\xf5\x9d!\x95\xcdu_\xfc\xe5\xae\xea\xb8}jE\xcez\xb8\xa3(\x8a\xa2(\x8a\xa2(\x8a\xa2(\x8a\xa2(\x8a\xa2\xe8\x1f\xfa\x00\xaf\xed\xb6)"
zlib_compressed_bytes = b"x\x9c\xed\xcb\xb1\t\x00 \x0c\x00\xc1U2\x9cB\x8a@\xc0\xc6\xf5\x9d!\x95\xcdu_\xfc\xe5\xae\xea\xb8}jE\xcez\xb8\xa3(\x8a\xa2(\x8a\xa2(\x8a\xa2(\x8a\xa2(\x8a\xa2\xe8\x1f\xfa\x00\xaf\xed\xb6)"
zstd_compressed_bytes = b'(\xb5/\xfd`\x0c\x16\xbd\x02\x00`hello world \x80\xc7\xa8\xe0\xf77\xf0\x951\x12x\x81\xc1\xff\xbf\x7fDD\x94\x88\x880""JD\x84\x18\x11\x11%"D\x8c\x88\x88\x12!"FDD\t\x11\x11#""\x89\x88\x88\x11\x11\xa1DD\xc4\x88\x08Q""bD\x88(\x11\x11ed\xaa\xf9]\xa6'

@parameterized.expand(
[
Expand All @@ -25,7 +26,7 @@ class TestTolerantZlibCompressor(TestCase):
"test_when_enabled_can_compress",
True,
uncompressed_bytes,
compressed_bytes,
zstd_compressed_bytes,
),
(
"test_when_enabled_does_not_compress_small_values",
Expand All @@ -51,13 +52,19 @@ def test_the_zlib_compressor_compression(self, _, setting: bool, input: bytes, o
(
"test_when_enabled_can_decompress",
True,
compressed_bytes,
zlib_compressed_bytes,
uncompressed_bytes,
),
(
"test_when_enabled_can_decompress_zstd",
True,
zstd_compressed_bytes,
uncompressed_bytes,
),
(
"test_when_disabled_can_still_decompress",
False,
compressed_bytes,
zlib_compressed_bytes,
uncompressed_bytes,
),
]
Expand Down
23 changes: 19 additions & 4 deletions posthog/caching/tolerant_zlib_compressor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import zlib
import zstd

from django_redis.compressors.base import BaseCompressor

Expand All @@ -19,6 +20,13 @@
""",
)

USING_ZLIB_VALUE_COUNTER = Counter(
"posthog_redis_using_zlib_value_counter",
"""
A counter to track cache keys that are still being decompressed with (deprecated) zlib
""",
)


class TolerantZlibCompressor(BaseCompressor):
"""
Expand All @@ -30,17 +38,24 @@ class TolerantZlibCompressor(BaseCompressor):
"""

# we don't want to compress all values, e.g. feature flag cache in decide is already small
min_length = 1024
preset = 6
min_length = 512
zstd_preset = 0
zstd_threads = 1
zlib_preset = 6

def compress(self, value: bytes) -> bytes:
if settings.USE_REDIS_COMPRESSION and len(value) > self.min_length:
return zlib.compress(value, self.preset)
return zstd.compress(value, self.zstd_preset, self.zstd_threads)
return value

def decompress(self, value: bytes) -> bytes:
try:
return zlib.decompress(value)
try:
return zstd.decompress(value)
except zstd.Error:
r = zlib.decompress(value) # Phasing out zlib, it is 10x slower and compresses worse
USING_ZLIB_VALUE_COUNTER.inc()
return r
except zlib.error:
if settings.USE_REDIS_COMPRESSION:
COULD_NOT_DECOMPRESS_VALUE_COUNTER.inc()
Expand Down
2 changes: 1 addition & 1 deletion posthog/settings/data_stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def _parse_kafka_hosts(hosts_string: str) -> list[str]:
# Controls whether the TolerantZlibCompressor is used for Redis compression when writing to Redis.
# The TolerantZlibCompressor is a drop-in replacement for the standard Django ZlibCompressor that
# can cope with compressed and uncompressed reading at the same time
USE_REDIS_COMPRESSION = get_from_env("USE_REDIS_COMPRESSION", False, type_cast=str_to_bool)
USE_REDIS_COMPRESSION = get_from_env("USE_REDIS_COMPRESSION", True, type_cast=str_to_bool)

# AWS ElastiCache supports "reader" endpoints.
# See "Finding a Redis (Cluster Mode Disabled) Cluster's Endpoints (Console)"
Expand Down
3 changes: 2 additions & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,5 @@ openai==1.10.0
tiktoken==0.6.0
nh3==0.2.14
hogql-parser==1.0.14
zxcvbn==4.4.28
zxcvbn==4.4.28
zstd==1.5.5.1
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -689,5 +689,7 @@ xmlsec==1.3.13
# via python3-saml
yarl==1.7.2
# via aiohttp
zstd==1.5.5.1
# via -r requirements.in
zxcvbn==4.4.28
# via -r requirements.in

0 comments on commit 29b6654

Please sign in to comment.