PostHog · tomasfarias · Nov 7, 2023 · Nov 7, 2023 · Nov 7, 2023 · Nov 7, 2023
diff --git a/.github/actions/run-backend-tests/action.yml b/.github/actions/run-backend-tests/action.yml
@@ -83,7 +83,7 @@ runs:
         - uses: syphar/restore-virtualenv@v1
           id: cache-backend-tests
           with:
-              custom_cache_key_element: v1
+              custom_cache_key_element: v2
 
         - uses: syphar/restore-pip-download-cache@v1
           if: steps.cache-backend-tests.outputs.cache-hit != 'true'

diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml
@@ -109,7 +109,7 @@ jobs:
             - uses: syphar/restore-virtualenv@v1
               id: cache-backend-tests
               with:
-                  custom_cache_key_element: v1-
+                  custom_cache_key_element: v2-
 
             - uses: syphar/restore-pip-download-cache@v1
               if: steps.cache-backend-tests.outputs.cache-hit != 'true'
@@ -331,7 +331,7 @@ jobs:
             - uses: syphar/restore-virtualenv@v1
               id: cache-backend-tests
               with:
-                  custom_cache_key_element: v1-
+                  custom_cache_key_element: v2-
 
             - uses: syphar/restore-pip-download-cache@v1
               if: steps.cache-backend-tests.outputs.cache-hit != 'true'

diff --git a/posthog/temporal/tests/batch_exports/conftest.py b/posthog/temporal/tests/batch_exports/conftest.py
@@ -1,5 +1,7 @@
+import psycopg
 import pytest
 import pytest_asyncio
+from psycopg import sql
 
 
 @pytest.fixture
@@ -39,4 +41,72 @@ async def truncate_events(clickhouse_client):
     This is useful if during the test setup we insert a lot of events we wish to clean-up.
     """
     yield
-    await clickhouse_client.execute_query("TRUNCATE TABLE `sharded_events`")
+    await clickhouse_client.execute_query("TRUNCATE TABLE IF EXISTS `sharded_events`")
+
+
+@pytest_asyncio.fixture
+async def setup_postgres_test_db(postgres_config):
+    """Fixture to manage a database for Redshift export testing.
+
+    Managing a test database involves the following steps:
+    1. Creating a test database.
+    2. Initializing a connection to that database.
+    3. Creating a test schema.
+    4. Yielding the connection to be used in tests.
+    5. After tests, drop the test schema and any tables in it.
+    6. Drop the test database.
+    """
+    connection = await psycopg.AsyncConnection.connect(
+        user=postgres_config["user"],
+        password=postgres_config["password"],
+        host=postgres_config["host"],
+        port=postgres_config["port"],
+    )
+    await connection.set_autocommit(True)
+
+    async with connection.cursor() as cursor:
+        await cursor.execute(
+            sql.SQL("SELECT 1 FROM pg_database WHERE datname = %s"),
+            (postgres_config["database"],),
+        )
+
+        if await cursor.fetchone() is None:
+            await cursor.execute(sql.SQL("CREATE DATABASE {}").format(sql.Identifier(postgres_config["database"])))
+
+    await connection.close()
+
+    # We need a new connection to connect to the database we just created.
+    connection = await psycopg.AsyncConnection.connect(
+        user=postgres_config["user"],
+        password=postgres_config["password"],
+        host=postgres_config["host"],
+        port=postgres_config["port"],
+        dbname=postgres_config["database"],
+    )
+    await connection.set_autocommit(True)
+
+    async with connection.cursor() as cursor:
+        await cursor.execute(
+            sql.SQL("CREATE SCHEMA IF NOT EXISTS {}").format(sql.Identifier(postgres_config["schema"]))
+        )
+
+    yield
+
+    async with connection.cursor() as cursor:
+        await cursor.execute(sql.SQL("DROP SCHEMA {} CASCADE").format(sql.Identifier(postgres_config["schema"])))
+
+    await connection.close()
+
+    # We need a new connection to drop the database, as we cannot drop the current database.
+    connection = await psycopg.AsyncConnection.connect(
+        user=postgres_config["user"],
+        password=postgres_config["password"],
+        host=postgres_config["host"],
+        port=postgres_config["port"],
+    )
+    await connection.set_autocommit(True)
+
+    async with connection.cursor() as cursor:
+        await cursor.execute(sql.SQL("DROP DATABASE {}").format(sql.Identifier(postgres_config["database"])))
+
+    await connection.close()
diff --git a/posthog/temporal/tests/batch_exports/test_batch_exports.py b/posthog/temporal/tests/batch_exports/test_batch_exports.py
@@ -1,38 +1,25 @@
 import csv
-import dataclasses
 import datetime as dt
 import io
 import json
-import logging
 import operator
-import random
-import string
-import uuid
 from random import randint
-from unittest.mock import patch
 
 import pytest
-from freezegun import freeze_time
-from temporalio import activity, workflow
 
-from posthog.clickhouse.log_entries import (
-    KAFKA_LOG_ENTRIES,
-)
 from posthog.temporal.tests.utils.datetimes import (
     to_isoformat,
 )
 from posthog.temporal.tests.utils.events import generate_test_events_in_clickhouse
 from posthog.temporal.workflows.batch_exports import (
     BatchExportTemporaryFile,
-    KafkaLoggingHandler,
-    get_batch_exports_logger,
     get_data_interval,
     get_results_iterator,
     get_rows_count,
     json_dumps_bytes,
 )
 
-pytestmark = [pytest.mark.django_db, pytest.mark.asyncio]
+pytestmark = [pytest.mark.asyncio, pytest.mark.django_db]
 
 
 async def test_get_rows_count(clickhouse_client):
@@ -540,104 +527,3 @@ def test_batch_export_temporary_file_write_records_to_tsv(records):
         assert be_file.bytes_since_last_reset == 0
         assert be_file.records_total == len(records)
         assert be_file.records_since_last_reset == 0
-
-
-def test_kafka_logging_handler_produces_to_kafka(caplog):
-    """Test a mocked call to Kafka produce from the KafkaLoggingHandler."""
-    logger_name = "test-logger"
-    logger = logging.getLogger(logger_name)
-    handler = KafkaLoggingHandler(topic=KAFKA_LOG_ENTRIES)
-    handler.setLevel(logging.DEBUG)
-    logger.addHandler(handler)
-
-    team_id = random.randint(1, 10000)
-    batch_export_id = str(uuid.uuid4())
-    run_id = str(uuid.uuid4())
-    timestamp = "2023-09-21 00:01:01.000001"
-
-    expected_tuples = []
-    expected_kafka_produce_calls_kwargs = []
-
-    with patch("posthog.kafka_client.client._KafkaProducer.produce") as produce:
-        with caplog.at_level(logging.DEBUG):
-            with freeze_time(timestamp):
-                for level in (10, 20, 30, 40, 50):
-                    random_message = "".join(random.choice(string.ascii_letters) for _ in range(30))
-
-                    logger.log(
-                        level,
-                        random_message,
-                        extra={
-                            "team_id": team_id,
-                            "batch_export_id": batch_export_id,
-                            "workflow_run_id": run_id,
-                        },
-                    )
-
-                    expected_tuples.append(
-                        (
-                            logger_name,
-                            level,
-                            random_message,
-                        )
-                    )
-                    data = {
-                        "message": random_message,
-                        "team_id": team_id,
-                        "log_source": "batch_exports",
-                        "log_source_id": batch_export_id,
-                        "instance_id": run_id,
-                        "timestamp": timestamp,
-                        "level": logging.getLevelName(level),
-                    }
-                    expected_kafka_produce_calls_kwargs.append({"topic": KAFKA_LOG_ENTRIES, "data": data, "key": None})
-
-        assert caplog.record_tuples == expected_tuples
-
-        kafka_produce_calls_kwargs = [call.kwargs for call in produce.call_args_list]
-        assert kafka_produce_calls_kwargs == expected_kafka_produce_calls_kwargs
-
-
-@dataclasses.dataclass
-class TestInputs:
-    team_id: int
-    data_interval_end: str | None = None
-    interval: str = "hour"
-    batch_export_id: str = ""
-
-
-@dataclasses.dataclass
-class TestInfo:
-    workflow_id: str
-    run_id: str
-    workflow_run_id: str
-    attempt: int
-
-
-@pytest.mark.parametrize("context", [activity.__name__, workflow.__name__])
-def test_batch_export_logger_adapter(context, caplog):
-    """Test BatchExportLoggerAdapter sets the appropiate context variables."""
-    team_id = random.randint(1, 10000)
-    inputs = TestInputs(team_id=team_id)
-    logger = get_batch_exports_logger(inputs=inputs)
-
-    batch_export_id = str(uuid.uuid4())
-    run_id = str(uuid.uuid4())
-    attempt = random.randint(1, 10)
-    info = TestInfo(
-        workflow_id=f"{batch_export_id}-{dt.datetime.utcnow().isoformat()}",
-        run_id=run_id,
-        workflow_run_id=run_id,
-        attempt=attempt,
-    )
-
-    with patch("posthog.kafka_client.client._KafkaProducer.produce"):
-        with patch(context + ".info", return_value=info):
-            for level in (10, 20, 30, 40, 50):
-                logger.log(level, "test")
-
-    records = caplog.get_records("call")
-    assert all(record.team_id == team_id for record in records)
-    assert all(record.batch_export_id == batch_export_id for record in records)
-    assert all(record.workflow_run_id == run_id for record in records)
-    assert all(record.attempt == attempt for record in records)