From d34195b41de838d44ffea7dc7f741cd54d41fc6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 17 Aug 2023 16:06:35 +0200 Subject: [PATCH] [behance] fix and update 'user' extractor (#4417) --- gallery_dl/extractor/behance.py | 294 +++++++++++++++++++++++++------- 1 file changed, 235 insertions(+), 59 deletions(-) diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 79404c3581..64a91633e3 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -26,14 +26,36 @@ def items(self): def galleries(self): """Return all relevant gallery URLs""" - @staticmethod - def _update(data): + def _request_graphql(self, endpoint, variables): + url = self.root + "/v3/graphql" + headers = { + "Origin" : self.root, + "Referer": self.root + "/", + "X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d", + "X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==", + "X-Requested-With": "XMLHttpRequest", + } + cookies = { + "bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d", + "gk_suid": "62735605", + "ilo0" : "true", + } + data = { + "query" : GRAPHQL_QUERIES[endpoint], + "variables": variables, + } + + return self.request(url, method="POST", headers=headers, + cookies=cookies, json=data).json()["data"] + + def _update(self, data): # compress data to simple lists if data["fields"] and isinstance(data["fields"][0], dict): data["fields"] = [ field.get("name") or field.get("label") for field in data["fields"] ] + data["owners"] = [ owner.get("display_name") or owner.get("displayName") for owner in data["owners"] @@ -193,7 +215,7 @@ class BehanceUserExtractor(BehanceExtractor): categorytransfer = True pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$" test = ("https://www.behance.net/alexstrohl", { - "count": ">= 8", + "count": ">= 11", "pattern": BehanceGalleryExtractor.pattern, }) @@ -202,17 +224,20 @@ def __init__(self, match): self.user = match.group(1) def galleries(self): - url = "{}/{}/projects".format(self.root, self.user) - params = {"offset": 0} - headers = {"X-Requested-With": "XMLHttpRequest"} + endpoint = "GetProfileProjects" + variables = { + "username": self.user, + "after" : "MAo=", + } while True: - data = self.request(url, params=params, headers=headers).json() - work = data["profile"]["activeSection"]["work"] - yield from work["projects"] - if not work["hasMore"]: + data = self._request_graphql(endpoint, variables) + items = data["user"]["profileProjects"] + yield from items["nodes"] + + if not items["pageInfo"]["hasNextPage"]: return - params["offset"] += len(work["projects"]) + variables["after"] = items["pageInfo"]["endCursor"] class BehanceCollectionExtractor(BehanceExtractor): @@ -221,7 +246,7 @@ class BehanceCollectionExtractor(BehanceExtractor): categorytransfer = True pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)" test = ("https://www.behance.net/collection/71340149/inspiration", { - "count": ">= 145", + "count": ">= 150", "pattern": BehanceGalleryExtractor.pattern, }) @@ -230,21 +255,186 @@ def __init__(self, match): self.collection_id = match.group(1) def galleries(self): - url = self.root + "/v3/graphql" - headers = { - "Origin" : self.root, - "Referer": self.root + "/collection/" + self.collection_id, - "X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d", - "X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==", - "X-Requested-With": "XMLHttpRequest", + endpoint = "GetMoodboardItemsAndRecommendations" + variables = { + "afterItem": "MAo=", + "firstItem": 40, + "id" : int(self.collection_id), + "shouldGetItems" : True, + "shouldGetMoodboardFields": False, + "shouldGetRecommendations": False, } - cookies = { - "bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d", - "gk_suid": "66981391", - "ilo0" : "true", + + while True: + data = self._request_graphql(endpoint, variables) + items = data["moodboard"]["items"] + + for node in items["nodes"]: + yield node["entity"] + + if not items["pageInfo"]["hasNextPage"]: + return + variables["afterItem"] = items["pageInfo"]["endCursor"] + + +GRAPHQL_QUERIES = { + "GetProfileProjects": """\ +query GetProfileProjects($username: String, $after: String) { + user(username: $username) { + profileProjects(first: 12, after: $after) { + pageInfo { + endCursor + hasNextPage + } + nodes { + __typename + adminFlags { + mature_lock + privacy_lock + dmca_lock + flagged_lock + privacy_violation_lock + trademark_lock + spam_lock + eu_ip_lock } + colors { + r + g + b + } + covers { + size_202 { + url + } + size_404 { + url + } + size_808 { + url + } + } + features { + url + name + featuredOn + ribbon { + image + image2x + image3x + } + } + fields { + id + label + slug + url + } + hasMatureContent + id + isFeatured + isHiddenFromWorkTab + isMatureReviewSubmitted + isOwner + isFounder + isPinnedToSubscriptionOverview + isPrivate + linkedAssets { + ...sourceLinkFields + } + linkedAssetsCount + sourceFiles { + ...sourceFileFields + } + matureAccess + modifiedOn + name + owners { + ...OwnerFields + images { + size_50 { + url + } + } + } + premium + publishedOn + stats { + appreciations { + all + } + views { + all + } + comments { + all + } + } + slug + tools { + id + title + category + categoryLabel + categoryId + approved + url + backgroundColor + } + url + } + } + } +} + +fragment sourceFileFields on SourceFile { + __typename + sourceFileId + projectId + userId + title + assetId + renditionUrl + mimeType + size + category + licenseType + unitAmount + currency + tier + hidden + extension + hasUserPurchased +} + +fragment sourceLinkFields on LinkedAsset { + __typename + name + premium + url + category + licenseType +} + +fragment OwnerFields on User { + displayName + hasPremiumAccess + id + isFollowing + isProfileOwner + location + locationUrl + url + username + availabilityInfo { + availabilityTimeline + isAvailableFullTime + isAvailableFreelance + } +} +""", - query = """ + "GetMoodboardItemsAndRecommendations": """\ query GetMoodboardItemsAndRecommendations( $id: Int! $firstItem: Int! @@ -289,13 +479,7 @@ def galleries(self): url isOwner owners { - id - displayName - url - firstName - location - locationUrl - isFollowing + ...OwnerFields images { size_50 { url @@ -320,6 +504,7 @@ def galleries(self): } fragment projectFields on Project { + __typename id isOwner publishedOn @@ -348,13 +533,7 @@ def galleries(self): b } owners { - url - displayName - id - location - locationUrl - isProfileOwner - isFollowing + ...OwnerFields images { size_50 { url @@ -488,26 +667,23 @@ def galleries(self): } } } -""" - variables = { - "afterItem": "MAo=", - "firstItem": 40, - "id" : int(self.collection_id), - "shouldGetItems" : True, - "shouldGetMoodboardFields": False, - "shouldGetRecommendations": False, - } - data = {"query": query, "variables": variables} - while True: - items = self.request( - url, method="POST", headers=headers, - cookies=cookies, json=data, - ).json()["data"]["moodboard"]["items"] - - for node in items["nodes"]: - yield node["entity"] +fragment OwnerFields on User { + displayName + hasPremiumAccess + id + isFollowing + isProfileOwner + location + locationUrl + url + username + availabilityInfo { + availabilityTimeline + isAvailableFullTime + isAvailableFreelance + } +} +""", - if not items["pageInfo"]["hasNextPage"]: - return - variables["afterItem"] = items["pageInfo"]["endCursor"] +}