Skip to content

Commit

Permalink
[behance] fix and update 'user' extractor (#4417)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Aug 17, 2023
1 parent 4d3cf70 commit d34195b
Showing 1 changed file with 235 additions and 59 deletions.
294 changes: 235 additions & 59 deletions gallery_dl/extractor/behance.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,36 @@ def items(self):
def galleries(self):
"""Return all relevant gallery URLs"""

@staticmethod
def _update(data):
def _request_graphql(self, endpoint, variables):
url = self.root + "/v3/graphql"
headers = {
"Origin" : self.root,
"Referer": self.root + "/",
"X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
"X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==",
"X-Requested-With": "XMLHttpRequest",
}
cookies = {
"bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
"gk_suid": "62735605",
"ilo0" : "true",
}
data = {
"query" : GRAPHQL_QUERIES[endpoint],
"variables": variables,
}

return self.request(url, method="POST", headers=headers,
cookies=cookies, json=data).json()["data"]

def _update(self, data):
# compress data to simple lists
if data["fields"] and isinstance(data["fields"][0], dict):
data["fields"] = [
field.get("name") or field.get("label")
for field in data["fields"]
]

data["owners"] = [
owner.get("display_name") or owner.get("displayName")
for owner in data["owners"]
Expand Down Expand Up @@ -193,7 +215,7 @@ class BehanceUserExtractor(BehanceExtractor):
categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
test = ("https://www.behance.net/alexstrohl", {
"count": ">= 8",
"count": ">= 11",
"pattern": BehanceGalleryExtractor.pattern,
})

Expand All @@ -202,17 +224,20 @@ def __init__(self, match):
self.user = match.group(1)

def galleries(self):
url = "{}/{}/projects".format(self.root, self.user)
params = {"offset": 0}
headers = {"X-Requested-With": "XMLHttpRequest"}
endpoint = "GetProfileProjects"
variables = {
"username": self.user,
"after" : "MAo=",
}

while True:
data = self.request(url, params=params, headers=headers).json()
work = data["profile"]["activeSection"]["work"]
yield from work["projects"]
if not work["hasMore"]:
data = self._request_graphql(endpoint, variables)
items = data["user"]["profileProjects"]
yield from items["nodes"]

if not items["pageInfo"]["hasNextPage"]:
return
params["offset"] += len(work["projects"])
variables["after"] = items["pageInfo"]["endCursor"]


class BehanceCollectionExtractor(BehanceExtractor):
Expand All @@ -221,7 +246,7 @@ class BehanceCollectionExtractor(BehanceExtractor):
categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
test = ("https://www.behance.net/collection/71340149/inspiration", {
"count": ">= 145",
"count": ">= 150",
"pattern": BehanceGalleryExtractor.pattern,
})

Expand All @@ -230,21 +255,186 @@ def __init__(self, match):
self.collection_id = match.group(1)

def galleries(self):
url = self.root + "/v3/graphql"
headers = {
"Origin" : self.root,
"Referer": self.root + "/collection/" + self.collection_id,
"X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
"X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==",
"X-Requested-With": "XMLHttpRequest",
endpoint = "GetMoodboardItemsAndRecommendations"
variables = {
"afterItem": "MAo=",
"firstItem": 40,
"id" : int(self.collection_id),
"shouldGetItems" : True,
"shouldGetMoodboardFields": False,
"shouldGetRecommendations": False,
}
cookies = {
"bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d",
"gk_suid": "66981391",
"ilo0" : "true",

while True:
data = self._request_graphql(endpoint, variables)
items = data["moodboard"]["items"]

for node in items["nodes"]:
yield node["entity"]

if not items["pageInfo"]["hasNextPage"]:
return
variables["afterItem"] = items["pageInfo"]["endCursor"]


GRAPHQL_QUERIES = {
"GetProfileProjects": """\
query GetProfileProjects($username: String, $after: String) {
user(username: $username) {
profileProjects(first: 12, after: $after) {
pageInfo {
endCursor
hasNextPage
}
nodes {
__typename
adminFlags {
mature_lock
privacy_lock
dmca_lock
flagged_lock
privacy_violation_lock
trademark_lock
spam_lock
eu_ip_lock
}
colors {
r
g
b
}
covers {
size_202 {
url
}
size_404 {
url
}
size_808 {
url
}
}
features {
url
name
featuredOn
ribbon {
image
image2x
image3x
}
}
fields {
id
label
slug
url
}
hasMatureContent
id
isFeatured
isHiddenFromWorkTab
isMatureReviewSubmitted
isOwner
isFounder
isPinnedToSubscriptionOverview
isPrivate
linkedAssets {
...sourceLinkFields
}
linkedAssetsCount
sourceFiles {
...sourceFileFields
}
matureAccess
modifiedOn
name
owners {
...OwnerFields
images {
size_50 {
url
}
}
}
premium
publishedOn
stats {
appreciations {
all
}
views {
all
}
comments {
all
}
}
slug
tools {
id
title
category
categoryLabel
categoryId
approved
url
backgroundColor
}
url
}
}
}
}
fragment sourceFileFields on SourceFile {
__typename
sourceFileId
projectId
userId
title
assetId
renditionUrl
mimeType
size
category
licenseType
unitAmount
currency
tier
hidden
extension
hasUserPurchased
}
fragment sourceLinkFields on LinkedAsset {
__typename
name
premium
url
category
licenseType
}
fragment OwnerFields on User {
displayName
hasPremiumAccess
id
isFollowing
isProfileOwner
location
locationUrl
url
username
availabilityInfo {
availabilityTimeline
isAvailableFullTime
isAvailableFreelance
}
}
""",

query = """
"GetMoodboardItemsAndRecommendations": """\
query GetMoodboardItemsAndRecommendations(
$id: Int!
$firstItem: Int!
Expand Down Expand Up @@ -289,13 +479,7 @@ def galleries(self):
url
isOwner
owners {
id
displayName
url
firstName
location
locationUrl
isFollowing
...OwnerFields
images {
size_50 {
url
Expand All @@ -320,6 +504,7 @@ def galleries(self):
}
fragment projectFields on Project {
__typename
id
isOwner
publishedOn
Expand Down Expand Up @@ -348,13 +533,7 @@ def galleries(self):
b
}
owners {
url
displayName
id
location
locationUrl
isProfileOwner
isFollowing
...OwnerFields
images {
size_50 {
url
Expand Down Expand Up @@ -488,26 +667,23 @@ def galleries(self):
}
}
}
"""
variables = {
"afterItem": "MAo=",
"firstItem": 40,
"id" : int(self.collection_id),
"shouldGetItems" : True,
"shouldGetMoodboardFields": False,
"shouldGetRecommendations": False,
}
data = {"query": query, "variables": variables}
while True:
items = self.request(
url, method="POST", headers=headers,
cookies=cookies, json=data,
).json()["data"]["moodboard"]["items"]

for node in items["nodes"]:
yield node["entity"]
fragment OwnerFields on User {
displayName
hasPremiumAccess
id
isFollowing
isProfileOwner
location
locationUrl
url
username
availabilityInfo {
availabilityTimeline
isAvailableFullTime
isAvailableFreelance
}
}
""",

if not items["pageInfo"]["hasNextPage"]:
return
variables["afterItem"] = items["pageInfo"]["endCursor"]
}

0 comments on commit d34195b

Please sign in to comment.