Skip to content

Commit

Permalink
feat: Added word max limit!
Browse files Browse the repository at this point in the history
  • Loading branch information
amindadgar committed Dec 5, 2024
1 parent bc758f7 commit ba4c527
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ def fetch_raw_messages(
guild_id: str,
selected_channels: list[str],
from_date: datetime,
**kwargs,
) -> list[dict]:
"""
fetch rawinfo messages from mongodb database
Expand All @@ -20,6 +21,10 @@ def fetch_raw_messages(
from_date : datetime
get the raw data from a specific date
default is None, meaning get all the messages
kwargs : dict
min_word_limit : int
the minimum words that the messages shuold contain
default is 8 characters
Returns
--------
Expand All @@ -29,6 +34,8 @@ def fetch_raw_messages(
client = MongoSingleton.get_instance().get_client()
user_ids = get_real_users(guild_id)

limit_max_words = kwargs.get("min_word_limit", 8)

cursor = (
client[guild_id]["rawinfos"]
.find(
Expand All @@ -38,6 +45,7 @@ def fetch_raw_messages(
"createdDate": {"$gte": from_date},
"isGeneratedByWebhook": False,
"channelId": {"$in": selected_channels},
"$where": f"this.content.length > {limit_max_words}",
}
)
.sort("createdDate", 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,177 @@ def test_fetch_raw_messages_fetch_from_date(self):

# Check if the fetched messages are equal to the expected messages
self.assertCountEqual(messages, expected_messages)

def test_fetch_raw_messages_fetch_limited_characters(self):
"""
fetch raw messages and do filtering
"""
client = MongoSingleton.get_instance().client

guild_id = "1234"
channels = ["111111", "22222"]
users_id = ["user1", "user2", "user3", "user4", "user5"]
guild_id = "1234"
self.setup_db(
channels=channels,
guild_id=guild_id,
)

for user in users_id:
is_bot = False
if user == "user3":
is_bot = True

client[guild_id]["guildmembers"].insert_one(
{
"discordId": user,
"username": f"username_{user}",
"roles": None,
"joinedAt": datetime(2023, 1, 1),
"avatar": None,
"isBot": is_bot,
"discriminator": "0",
"permissions": None,
"deletedAt": None,
"globalName": None,
"nickname": None,
}
)

# Dropping any previous data
client[guild_id].drop_collection("rawinfos")

# Insert messages with different dates
raw_data = []

data = {
"type": 0,
"author": users_id[0],
"content": "AA",
"user_mentions": [],
"role_mentions": [],
"reactions": [],
"replied_user": None,
"createdDate": datetime(2023, 10, 1),
"messageId": str(np.random.randint(1000000, 9999999)),
"channelId": channels[0],
"channelName": f"general {channels[0]}",
"threadId": None,
"threadName": None,
"isGeneratedByWebhook": False,
}
raw_data.append(data)

data = {
"type": 0,
"author": users_id[1],
"content": "A sample text with more than 8 characters!",
"user_mentions": [],
"role_mentions": [],
"reactions": [],
"replied_user": None,
"createdDate": datetime(2023, 10, 1),
"messageId": str(np.random.randint(1000000, 9999999)),
"channelId": channels[1],
"channelName": f"general {channels[1]}",
"threadId": None,
"threadName": None,
"isGeneratedByWebhook": False,
}
raw_data.append(data)

client[guild_id]["rawinfos"].insert_many(raw_data)

# Fetch messages from a specific date (October 3, 2023)
messages = fetch_raw_messages(guild_id, selected_channels=channels)
# Check if the fetched messages are equal to the expected messages
self.assertCountEqual(messages, 1)

def test_fetch_raw_messages_fetch_limited_characters(self):
"""
fetch raw messages and do filtering
"""
client = MongoSingleton.get_instance().client

guild_id = "1234"
channels = ["111111", "22222"]
users_id = ["user1", "user2", "user3", "user4", "user5"]
guild_id = "1234"
self.setup_db(
channels=channels,
guild_id=guild_id,
)

for user in users_id:
is_bot = False
if user == "user3":
is_bot = True

client[guild_id]["guildmembers"].insert_one(
{
"discordId": user,
"username": f"username_{user}",
"roles": None,
"joinedAt": datetime(2023, 1, 1),
"avatar": None,
"isBot": is_bot,
"discriminator": "0",
"permissions": None,
"deletedAt": None,
"globalName": None,
"nickname": None,
}
)

# Dropping any previous data
client[guild_id].drop_collection("rawinfos")

# Insert messages with different dates
raw_data = []

data = {
"type": 0,
"author": users_id[0],
"content": "AA",
"user_mentions": [],
"role_mentions": [],
"reactions": [],
"replied_user": None,
"createdDate": datetime(2023, 10, 1),
"messageId": str(np.random.randint(1000000, 9999999)),
"channelId": channels[0],
"channelName": f"general {channels[0]}",
"threadId": None,
"threadName": None,
"isGeneratedByWebhook": False,
}
raw_data.append(data)

data = {
"type": 0,
"author": users_id[1],
"content": "A sample text with more than 8 characters!",
"user_mentions": [],
"role_mentions": [],
"reactions": [],
"replied_user": None,
"createdDate": datetime(2023, 10, 1),
"messageId": str(np.random.randint(1000000, 9999999)),
"channelId": channels[1],
"channelName": f"general {channels[1]}",
"threadId": None,
"threadName": None,
"isGeneratedByWebhook": False,
}
raw_data.append(data)

client[guild_id]["rawinfos"].insert_many(raw_data)

# Fetch messages from a specific date (October 3, 2023)
messages = fetch_raw_messages(
guild_id,
selected_channels=channels,
min_word_limit=1,
)
# Check if the fetched messages are equal to the expected messages
self.assertCountEqual(messages, 2)

0 comments on commit ba4c527

Please sign in to comment.