Skip to content

Commit

Permalink
Merge pull request #255 from TogetherCrew/feat/218-violation-detection
Browse files Browse the repository at this point in the history
fix: violation detection fix extraction query!
  • Loading branch information
amindadgar authored Aug 8, 2024
2 parents 72b168f + 790f1ad commit 1d47276
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 12 deletions.
7 changes: 6 additions & 1 deletion dags/violation_detection_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def process_platforms(platform: dict[str, Any]):
recompute = platform["recompute"]
discord_users = platform["selected_discord_users"]

logging.info(f"Processing PLATFORM_ID: {platform_id}!")

# EXTRACT

# TODO: Get resource_identifier from platform analyzer config
Expand All @@ -72,6 +74,7 @@ def process_platforms(platform: dict[str, Any]):

# Load
if len(transformed_data) != 0:
logging.info(f"Loading {len(transformed_data)} documents into db!")
loader = LoadPlatformLabeledData()
loader.load(platform_id=platform, transformed_data=transformed_data)

Expand All @@ -84,7 +87,9 @@ def process_platforms(platform: dict[str, Any]):
reporter = SendReportDiscordUser(platform_id=platform_id)
reporter.send(discord_user_id=discord_id, message=report)
else:
logging.warning("No documents were transformed!")
logging.warning(
f"PLATFORM_ID: {platform_id}, No documents were transformed!"
)

platforms = get_violation_modules()
process_platforms.expand(platform=platforms)
2 changes: 1 addition & 1 deletion dags/violation_detection_helpers/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def extract(
{
**date_query,
"text": {"$ne": None},
"source_id": {"$in": resources},
self.resource_name: {"$in": resources},
}
)
return cursor, override_recompute
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ def test_extract_all_resources(self):
},
]
self.client[self.platform_id]["rawmemberactivities"].insert_many(sample_data)
extract_data = ExtractPlatformRawData(self.platform_id, "channel_id")
extract_data = ExtractPlatformRawData(self.platform_id, "category_id")

cursor, override_recompute = extract_data.extract(
from_date=datetime(2020, 1, 1),
to_date=None,
resources=["8888", "8880"],
resources=["34567", "34569"],
recompute=False,
)
results = list(cursor)
Expand Down Expand Up @@ -104,12 +104,12 @@ def test_extract_some_resources(self):
},
]
self.client[self.platform_id]["rawmemberactivities"].insert_many(sample_data)
extract_data = ExtractPlatformRawData(self.platform_id, "channel_id")
extract_data = ExtractPlatformRawData(self.platform_id, "category_id")

cursor, override_recompute = extract_data.extract(
from_date=datetime(2020, 1, 1),
to_date=None,
resources=["8888"],
resources=["34567"],
recompute=False,
)
results = list(cursor)
Expand Down Expand Up @@ -155,12 +155,12 @@ def test_extract_no_data_date_filtered(self):
},
]
self.client[self.platform_id]["rawmemberactivities"].insert_many(sample_data)
extract_data = ExtractPlatformRawData(self.platform_id, "channel_id")
extract_data = ExtractPlatformRawData(self.platform_id, "category_id")

cursor, override_recompute = extract_data.extract(
from_date=datetime(2023, 1, 1),
to_date=None,
resources=["8888"],
resources=["34567"],
recompute=False,
)
results = list(cursor)
Expand Down Expand Up @@ -205,12 +205,12 @@ def test_extract_some_with_to_date(self):
},
]
self.client[self.platform_id]["rawmemberactivities"].insert_many(sample_data)
extract_data = ExtractPlatformRawData(self.platform_id, "channel_id")
extract_data = ExtractPlatformRawData(self.platform_id, "category_id")

cursor, override_recompute = extract_data.extract(
from_date=datetime(2022, 1, 1),
to_date=datetime(2023, 2, 1),
resources=["8888"],
resources=["34567"],
recompute=False,
)
results = list(cursor)
Expand Down Expand Up @@ -264,12 +264,12 @@ def test_extract_no_override(self):
},
]
self.client[self.platform_id]["rawmemberactivities"].insert_many(sample_data)
extract_data = ExtractPlatformRawData(self.platform_id, "channel_id")
extract_data = ExtractPlatformRawData(self.platform_id, "category_id")

cursor, override_recompute = extract_data.extract(
from_date=datetime(2020, 1, 1),
to_date=None,
resources=["8888", "8880"],
resources=["34567", "34569"],
recompute=False,
)
results = list(cursor)
Expand Down

0 comments on commit 1d47276

Please sign in to comment.