Skip to content

Commit

Permalink
feat: Adding discourse message link to documents!
Browse files Browse the repository at this point in the history
  • Loading branch information
amindadgar committed Nov 27, 2024
1 parent 7c1f32a commit b598cef
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 5 deletions.
2 changes: 2 additions & 0 deletions dags/hivemind_etl_helpers/src/db/discourse/fetch_raw_posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,10 @@ def fetch_raw_posts(
author.username AS author_username,
author.name AS author_name,
t.title AS topic,
t.id AS topic_id,
p.id AS postId,
$forum_endpoint AS forum_endpoint,
p.postNumber as post_number,
p.raw AS raw,
p.createdAt AS createdAt,
p.updatedAt AS updatedAt,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,18 @@ def transform_raw_to_documents(
doc: Document

if not exclude_metadata:
forum_endpoint = post["forum_endpoint"]
topic_id = post["topic_id"]
post_number = post["post_number"]

link = f"https://{forum_endpoint}/t/{topic_id}/{post_number}"

doc = Document(
text=post["raw"],
metadata={
"author_name": post["author_name"],
"author_username": post["author_username"],
"forum_endpoint": post["forum_endpoint"],
"forum_endpoint": forum_endpoint,
"createdAt": post["createdAt"],
"updatedAt": post["updatedAt"],
"postId": post["postId"],
Expand All @@ -49,6 +55,7 @@ def transform_raw_to_documents(
"liker_names": post["liker_names"],
"replier_usernames": post["replier_usernames"],
"replier_names": post["replier_names"],
"link": link,
},
)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def test_fetch_some_data_without_from_date(self):
p.topicId = 1,
p.id = 100,
p.createdAt = '2022-01-01T00:00:00.000Z',
p.updatedAt = '2022-01-01T01:00:00.000Z'
p.updatedAt = '2022-01-01T01:00:00.000Z',
p.postNumber: 1.0
WITH p
CREATE (a:DiscourseUser) -[:POSTED]->(p)
SET
Expand Down Expand Up @@ -83,7 +84,8 @@ def test_fetch_some_data_without_from_date(self):
p.topicId = 2,
p.id = 101,
p.createdAt = '2022-01-01T00:01:00.000Z',
p.updatedAt = '2022-01-01T01:01:00.000Z'
p.updatedAt = '2022-01-01T01:01:00.000Z',
p.postNumber: 2.0
WITH p
CREATE (a:DiscourseUser) -[:POSTED]->(p)
SET
Expand Down Expand Up @@ -119,6 +121,7 @@ def test_fetch_some_data_without_from_date(self):
if data["author_username"] == "user#1":
self.assertEqual(data["author_name"], "user1")
self.assertEqual(data["topic"], "topic#1")
self.assertEqual(data["topic_id"], 1)
self.assertEqual(data["createdAt"], "2022-01-01T00:00:00.000Z")
self.assertEqual(data["updatedAt"], "2022-01-01T01:00:00.000Z")
self.assertEqual(data["authorTrustLevel"], 4)
Expand All @@ -130,9 +133,11 @@ def test_fetch_some_data_without_from_date(self):
self.assertEqual(data["replier_usernames"], ["user#2"])
self.assertEqual(data["replier_names"], ["user2"])
self.assertEqual(data["forum_endpoint"], "wwwdwadeswdpoi123")
self.assertEqual(data["post_number"], 1.0)
elif data["author_username"] == "user#2":
self.assertEqual(data["author_name"], "user2")
self.assertEqual(data["topic"], "topic#2")
self.assertEqual(data["topic_id"], 2)
self.assertEqual(data["createdAt"], "2022-01-01T00:01:00.000Z")
self.assertEqual(data["updatedAt"], "2022-01-01T01:01:00.000Z")
self.assertEqual(data["raw"], "texttexttext of post 2")
Expand All @@ -144,6 +149,7 @@ def test_fetch_some_data_without_from_date(self):
self.assertEqual(data["replier_usernames"], [])
self.assertEqual(data["replier_names"], [])
self.assertEqual(data["forum_endpoint"], "wwwdwadeswdpoi123")
self.assertEqual(data["post_number"], 2.0)
else:
raise IndexError("It shouldn't get here!")

Expand All @@ -166,7 +172,8 @@ def test_fetch_some_data_with_from_date(self):
p.topicId = 1,
p.id = 100,
p.createdAt = '2022-01-01T00:00:00.000Z',
p.updatedAt = '2022-01-01T01:00:00.000Z'
p.updatedAt = '2022-01-01T01:00:00.000Z',
p.postNumber: 1.0
WITH p
CREATE (a:DiscourseUser) -[:POSTED]->(p)
SET
Expand Down Expand Up @@ -194,7 +201,8 @@ def test_fetch_some_data_with_from_date(self):
p.topicId = 2,
p.id = 101,
p.createdAt = '2022-05-01T00:01:00.000Z',
p.updatedAt = '2022-05-01T01:01:00.000Z'
p.updatedAt = '2022-05-01T01:01:00.000Z',
p.postNumber: 2.0
WITH p
CREATE (a:DiscourseUser) -[:POSTED]->(p)
SET
Expand Down Expand Up @@ -230,6 +238,7 @@ def test_fetch_some_data_with_from_date(self):
if data["author_username"] == "user#2":
self.assertEqual(data["author_name"], "user2")
self.assertEqual(data["topic"], "topic#2")
self.assertEqual(data["post_number"], 2.0)
self.assertEqual(data["createdAt"], "2022-05-01T00:01:00.000Z")
self.assertEqual(data["updatedAt"], "2022-05-01T01:01:00.000Z")
self.assertEqual(data["raw"], "texttexttext of post 2")
Expand Down

0 comments on commit b598cef

Please sign in to comment.