From 9cce94327513a47a4581a8606e305754dcca8d17 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Mon, 27 May 2024 12:36:41 +0330 Subject: [PATCH 1/3] feat: getting org id from platform! the repo ids on ETL side would be ignored for now. --- dags/hivemind_etl_helpers/src/utils/modules/github.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dags/hivemind_etl_helpers/src/utils/modules/github.py b/dags/hivemind_etl_helpers/src/utils/modules/github.py index 5bed6935..3dc2f642 100644 --- a/dags/hivemind_etl_helpers/src/utils/modules/github.py +++ b/dags/hivemind_etl_helpers/src/utils/modules/github.py @@ -39,14 +39,19 @@ def get_learning_platforms(self): if platform["name"] != self.platform_name: continue + platform_id = platform["platform"] + organization_id = self.get_platform_metadata( + platform_id=platform_id, + metadata_name="installationId", + ) modules_options = platform["metadata"] platforms_data.append( { "community_id": str(community), - "organization_ids": modules_options.get("organizationId", []), + "organization_ids": [organization_id], "repo_ids": modules_options.get("repoIds", []), - "from_date": modules_options["fromDate"], + # "from_date": modules_options["fromDate"], } ) From 9a70f86d6012309f392b68cfd1b723f9bf2a1d9d Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Mon, 27 May 2024 15:42:16 +0330 Subject: [PATCH 2/3] fix: test cases! for now ignoring all metadata of github (commented them out) --- .../test_github_get_communities_org.py | 219 ++++++++++++++---- 1 file changed, 170 insertions(+), 49 deletions(-) diff --git a/dags/hivemind_etl_helpers/tests/integration/test_github_get_communities_org.py b/dags/hivemind_etl_helpers/tests/integration/test_github_get_communities_org.py index c7bb94ee..f12e041b 100644 --- a/dags/hivemind_etl_helpers/tests/integration/test_github_get_communities_org.py +++ b/dags/hivemind_etl_helpers/tests/integration/test_github_get_communities_org.py @@ -10,6 +10,7 @@ class TestQueryGitHubModulesDB(unittest.TestCase): def setUp(self): client = MongoSingleton.get_instance().client client["Core"].drop_collection("modules") + client["Core"].drop_collection("platforms") self.modules_github = ModulesGitHub() self.client = client @@ -22,18 +23,37 @@ def test_get_github_communities_data_single_modules(self): """ single github platform for one community """ + platform_id = ObjectId("6579c364f1120850414e0dc6") + community_id = ObjectId("6579c364f1120850414e0dc5") + + self.client["Core"]["platforms"].insert_one( + { + "_id": platform_id, + "name": "github", + "metadata": { + "installationId": "345678", + "account": {}, + }, + "community": community_id, + "disconnectedAt": None, + "connectedAt": datetime.now(), + "createdAt": datetime.now(), + "updatedAt": datetime.now(), + } + ) + self.client["Core"]["modules"].insert_one( { "name": "hivemind", - "community": ObjectId("6579c364f1120850414e0dc5"), + "community": community_id, "options": { "platforms": [ { - "platform": ObjectId("6579c364f1120850414e0dc6"), + "platform": platform_id, "name": "github", "metadata": { - "fromDate": datetime(2024, 1, 1), - "organizationId": ["1234"], + # "fromDate": datetime(2024, 1, 1), + # "organizationId": ["1234"], "repoIds": ["111", "234"], }, } @@ -52,9 +72,9 @@ def test_get_github_communities_data_single_modules(self): result[0], { "community_id": "6579c364f1120850414e0dc5", - "organization_ids": ["1234"], + "organization_ids": ["345678"], "repo_ids": ["111", "234"], - "from_date": datetime(2024, 1, 1), + # "from_date": datetime(2024, 1, 1), }, ) @@ -62,28 +82,59 @@ def test_get_github_communities_data_multiple_platforms(self): """ two github platform for one community """ + platform_id = ObjectId("6579c364f1120850414e0dc6") + platform_id2 = ObjectId("6579c364f1120850414e0dc7") + community_id = ObjectId("6579c364f1120850414e0dc5") + + self.client["Core"]["platforms"].insert_one( + { + "_id": platform_id, + "name": "github", + "metadata": { + "installationId": "11111", + "account": {}, + }, + "community": community_id, + "disconnectedAt": None, + "connectedAt": datetime.now(), + "createdAt": datetime.now(), + "updatedAt": datetime.now(), + } + ) + self.client["Core"]["platforms"].insert_one( + { + "_id": platform_id2, + "name": "github", + "metadata": { + "installationId": "222222", + "account": {}, + }, + "community": community_id, + "disconnectedAt": None, + "connectedAt": datetime.now(), + "createdAt": datetime.now(), + "updatedAt": datetime.now(), + } + ) self.client["Core"]["modules"].insert_one( { "name": "hivemind", - "community": ObjectId("1009c364f1120850414e0dc5"), + "community": community_id, "options": { "platforms": [ { - "platform": ObjectId("6579c364f1120850414e0dc6"), + "platform": platform_id, "name": "github", "metadata": { - "fromDate": datetime(2024, 1, 1), - "organizationId": ["1234"], - "repoIds": ["111", "234"], + # "repoIds": ["111", "234"], }, }, { - "platform": ObjectId("6579c364f1120850414e0dc7"), + "platform": platform_id2, "name": "github", "metadata": { - "fromDate": datetime(2024, 2, 2), - "organizationId": ["4321"], - "repoIds": ["2132", "8888"], + # "fromDate": datetime(2024, 2, 2), + # "repoIds": ["2132", "8888"], }, }, ] @@ -96,22 +147,24 @@ def test_get_github_communities_data_multiple_platforms(self): self.assertIsInstance(result, list) self.assertEqual(len(result), 2) + print(result[0]) + self.assertEqual( result[0], { - "community_id": "1009c364f1120850414e0dc5", - "organization_ids": ["1234"], - "repo_ids": ["111", "234"], - "from_date": datetime(2024, 1, 1), + "community_id": str(community_id), + "organization_ids": ["11111"], + "repo_ids": [], + # "from_date": datetime(2024, 1, 1), }, ) self.assertEqual( result[1], { - "community_id": "1009c364f1120850414e0dc5", - "organization_ids": ["4321"], - "repo_ids": ["2132", "8888"], - "from_date": datetime(2024, 2, 2), + "community_id": str(community_id), + "organization_ids": ["222222"], + "repo_ids": [], + # "from_date": datetime(2024, 2, 2), }, ) @@ -121,27 +174,94 @@ def test_get_github_communities_data_multiple_platforms_multiple_communities( """ two github platform for two separate communities """ + platform_id = ObjectId("6579c364f1120850414e0dc6") + platform_id2 = ObjectId("6579c364f1120850414e0dc7") + platform_id3 = ObjectId("6579c364f1120850414e0dc8") + platform_id4 = ObjectId("6579c364f1120850414e0dc9") + community_id = ObjectId("6579c364f1120850414e0dc5") + community_id2 = ObjectId("2009c364f1120850414e0dc5") + + self.client["Core"]["platforms"].insert_one( + { + "_id": platform_id, + "name": "github", + "metadata": { + "installationId": "11111", + "account": {}, + }, + "community": community_id, + "disconnectedAt": None, + "connectedAt": datetime.now(), + "createdAt": datetime.now(), + "updatedAt": datetime.now(), + } + ) + self.client["Core"]["platforms"].insert_one( + { + "_id": platform_id2, + "name": "github", + "metadata": { + "installationId": "222222", + "account": {}, + }, + "community": community_id, + "disconnectedAt": None, + "connectedAt": datetime.now(), + "createdAt": datetime.now(), + "updatedAt": datetime.now(), + } + ) + self.client["Core"]["platforms"].insert_one( + { + "_id": platform_id3, + "name": "github", + "metadata": { + "installationId": "333333", + "account": {}, + }, + "community": community_id2, + "disconnectedAt": None, + "connectedAt": datetime.now(), + "createdAt": datetime.now(), + "updatedAt": datetime.now(), + } + ) + self.client["Core"]["platforms"].insert_one( + { + "_id": platform_id4, + "name": "discord", + "metadata": { + "learning": {}, + "answering": {}, + }, + "community": community_id2, + "disconnectedAt": None, + "connectedAt": datetime.now(), + "createdAt": datetime.now(), + "updatedAt": datetime.now(), + } + ) + self.client["Core"]["modules"].insert_many( [ { "name": "hivemind", - "community": ObjectId("1009c364f1120850414e0dc5"), + "community": community_id, "options": { "platforms": [ { - "platform": ObjectId("6579c364f1120850414e0dc6"), + "platform": platform_id, "name": "github", "metadata": { - "fromDate": datetime(2024, 1, 1), - "organizationId": ["1234"], + # "fromDate": datetime(2024, 1, 1), }, }, { - "platform": ObjectId("6579c364f1120850414e0dc7"), + "platform": platform_id2, "name": "github", "metadata": { - "fromDate": datetime(2024, 2, 2), - "repoIds": ["1111"], + # "fromDate": datetime(2024, 2, 2), + "repoIds": ["AAAAA"], }, }, ] @@ -149,22 +269,22 @@ def test_get_github_communities_data_multiple_platforms_multiple_communities( }, { "name": "hivemind", - "community": ObjectId("2009c364f1120850414e0dc5"), + "community": community_id2, "options": { "platforms": [ { - "platform": ObjectId("6579c364f1120850414e0db5"), + "platform": platform_id3, "name": "github", "metadata": { - "fromDate": datetime(2024, 3, 1), - "organizationId": ["111111"], + # "fromDate": datetime(2024, 3, 1), + # "organizationId": ["111111"], }, }, { - "platform": ObjectId("6579c364f1120850414e0dc7"), + "platform": platform_id4, "name": "discord", "metadata": { - "fromDate": datetime(2024, 3, 1), + # "fromDate": datetime(2024, 3, 1), "selectedChannels": ["666", "777"], }, }, @@ -180,34 +300,35 @@ def test_get_github_communities_data_multiple_platforms_multiple_communities( self.assertEqual(len(results), 3) for res in results: - if res["organization_ids"] == ["1234"]: + print(res, "|||", str(community_id)) + if res["organization_ids"] == ["11111"]: self.assertEqual( res, { - "community_id": "1009c364f1120850414e0dc5", - "organization_ids": ["1234"], + "community_id": str(community_id), + "organization_ids": ["11111"], "repo_ids": [], - "from_date": datetime(2024, 1, 1), + # "from_date": datetime(2024, 1, 1), }, ) - elif res["organization_ids"] == []: + elif res["organization_ids"] == ["222222"]: self.assertEqual( res, { - "community_id": "1009c364f1120850414e0dc5", - "organization_ids": [], - "repo_ids": ["1111"], - "from_date": datetime(2024, 2, 2), + "community_id": str(community_id), + "organization_ids": ["222222"], + "repo_ids": ["AAAAA"], + # "from_date": datetime(2024, 2, 2), }, ) - elif res["organization_ids"] == ["111111"]: + elif res["organization_ids"] == ["333333"]: self.assertEqual( res, { - "community_id": "2009c364f1120850414e0dc5", - "organization_ids": ["111111"], + "community_id": str(community_id2), + "organization_ids": ["333333"], "repo_ids": [], - "from_date": datetime(2024, 3, 1), + # "from_date": datetime(2024, 3, 1), }, ) else: From 6b9fd546d90d4f9971d59d18c4ffccf7d7332386 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Mon, 27 May 2024 15:48:38 +0330 Subject: [PATCH 3/3] fix: Adding the from_date as None! in order not to break anything. --- dags/hivemind_etl_helpers/src/utils/modules/github.py | 4 +++- .../tests/integration/test_github_get_communities_org.py | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/dags/hivemind_etl_helpers/src/utils/modules/github.py b/dags/hivemind_etl_helpers/src/utils/modules/github.py index 3dc2f642..5634fbea 100644 --- a/dags/hivemind_etl_helpers/src/utils/modules/github.py +++ b/dags/hivemind_etl_helpers/src/utils/modules/github.py @@ -23,7 +23,8 @@ def get_learning_platforms(self): "community_id": "community1", "organization_ids": ["1111", "2222"], "repo_ids": ["132", "45232"], - "from_date": datetime(2024, 1, 1) + # "from_date": datetime(2024, 1, 1) + "from_date": None }] ``` """ @@ -52,6 +53,7 @@ def get_learning_platforms(self): "organization_ids": [organization_id], "repo_ids": modules_options.get("repoIds", []), # "from_date": modules_options["fromDate"], + "from_date": None, } ) diff --git a/dags/hivemind_etl_helpers/tests/integration/test_github_get_communities_org.py b/dags/hivemind_etl_helpers/tests/integration/test_github_get_communities_org.py index f12e041b..8709dc68 100644 --- a/dags/hivemind_etl_helpers/tests/integration/test_github_get_communities_org.py +++ b/dags/hivemind_etl_helpers/tests/integration/test_github_get_communities_org.py @@ -75,6 +75,7 @@ def test_get_github_communities_data_single_modules(self): "organization_ids": ["345678"], "repo_ids": ["111", "234"], # "from_date": datetime(2024, 1, 1), + "from_date": None, }, ) @@ -156,6 +157,7 @@ def test_get_github_communities_data_multiple_platforms(self): "organization_ids": ["11111"], "repo_ids": [], # "from_date": datetime(2024, 1, 1), + "from_date": None, }, ) self.assertEqual( @@ -165,6 +167,7 @@ def test_get_github_communities_data_multiple_platforms(self): "organization_ids": ["222222"], "repo_ids": [], # "from_date": datetime(2024, 2, 2), + "from_date": None, }, ) @@ -309,6 +312,7 @@ def test_get_github_communities_data_multiple_platforms_multiple_communities( "organization_ids": ["11111"], "repo_ids": [], # "from_date": datetime(2024, 1, 1), + "from_date": None, }, ) elif res["organization_ids"] == ["222222"]: @@ -319,6 +323,7 @@ def test_get_github_communities_data_multiple_platforms_multiple_communities( "organization_ids": ["222222"], "repo_ids": ["AAAAA"], # "from_date": datetime(2024, 2, 2), + "from_date": None, }, ) elif res["organization_ids"] == ["333333"]: @@ -329,6 +334,7 @@ def test_get_github_communities_data_multiple_platforms_multiple_communities( "organization_ids": ["333333"], "repo_ids": [], # "from_date": datetime(2024, 3, 1), + "from_date": None, }, ) else: