Skip to content

Commit

Permalink
send Referer headers by default
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Sep 18, 2023
1 parent cb4798f commit 3ecb512
Show file tree
Hide file tree
Showing 33 changed files with 36 additions and 72 deletions.
18 changes: 17 additions & 1 deletion docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,21 @@ Description
browser would use HTTP/2.


extractor.*.referer
-------------------
Type
* ``bool``
* ``string``
Default
``true``
Description
Send `Referer <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referer>`__
headers with all outgoing HTTP requests.

If this is a ``string``, send it as Referer
instead of the extractor's ``root`` domain.


extractor.*.headers
-------------------
Type
Expand All @@ -576,7 +591,8 @@ Default
"User-Agent" : "<extractor.*.user-agent>",
"Accept" : "*/*",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate"
"Accept-Encoding": "gzip, deflate",
"Referer" : "<extractor.*.referer>"
}
Description
Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/500px.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ class _500pxExtractor(Extractor):
root = "https://500px.com"
cookies_domain = ".500px.com"

def _init(self):
self.session.headers["Referer"] = self.root + "/"

def items(self):
data = self.metadata()

Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/8chan.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,6 @@ def __init__(self, match):
_8chanExtractor.__init__(self, match)
_, self.board, self.page = match.groups()

def _init(self):
self.session.headers["Referer"] = self.root + "/"

def items(self):
page = text.parse_int(self.page, 1)
url = "{}/{}/{}.json".format(self.root, self.board, page)
Expand Down
2 changes: 0 additions & 2 deletions gallery_dl/extractor/artstation.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def _pagination(self, url, params=None, json=None):
headers = {
"Accept" : "application/json, text/plain, */*",
"Origin" : self.root,
"Referer": self.root + "/",
}

if json:
Expand Down Expand Up @@ -147,7 +146,6 @@ def _init_csrf_token(self):
headers = {
"Accept" : "*/*",
"Origin" : self.root,
"Referer": self.root + "/",
}
return self.request(
url, method="POST", headers=headers, json={},
Expand Down
5 changes: 2 additions & 3 deletions gallery_dl/extractor/behance.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ def galleries(self):
def _request_graphql(self, endpoint, variables):
url = self.root + "/v3/graphql"
headers = {
"Origin" : self.root,
"Referer": self.root + "/",
"X-BCP" : self._bcp,
"Origin": self.root,
"X-BCP" : self._bcp,
"X-Requested-With": "XMLHttpRequest",
}
data = {
Expand Down
3 changes: 1 addition & 2 deletions gallery_dl/extractor/bunkr.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def fetch_album(self, album_id):
cdn = None
files = []
append = files.append
headers = {"Referer": self.root + "/"}

pos = page.index('class="grid-images')
for url in text.extract_iter(page, '<a href="', '"', pos):
Expand All @@ -63,7 +62,7 @@ def fetch_album(self, album_id):
else:
domain = domain.replace("cdn", "media-files", 1)
url = urlunsplit((scheme, domain, path, query, fragment))
append({"file": url, "_http_headers": headers})
append({"file": url})

return files, {
"album_id" : self.album_id,
Expand Down
7 changes: 7 additions & 0 deletions gallery_dl/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,13 @@ def _init_session(self):
else:
headers["Accept-Encoding"] = "gzip, deflate"

custom_referer = self.config("referer", True)
if custom_referer:
if isinstance(custom_referer, str):
headers["Referer"] = custom_referer
elif self.root:
headers["Referer"] = self.root + "/"

custom_headers = self.config("headers")
if custom_headers:
headers.update(custom_headers)
Expand Down
4 changes: 1 addition & 3 deletions gallery_dl/extractor/deviantart.py
Original file line number Diff line number Diff line change
Expand Up @@ -1422,11 +1422,9 @@ def user_watching(self, user, offset=None):

def _call(self, endpoint, params):
url = "https://www.deviantart.com/_napi" + endpoint
headers = {"Referer": "https://www.deviantart.com/"}
params["csrf_token"] = self.csrf_token or self._fetch_csrf_token()

response = self.request(
url, params=params, headers=headers, fatal=None)
response = self.request(url, params=params, fatal=None)

if response.status_code == 404:
raise exception.StopExtraction(
Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/exhentai.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def initialize(self):

if self.version != "ex":
self.cookies.set("nw", "1", domain=self.cookies_domain)
self.session.headers["Referer"] = self.root + "/"
self.original = self.config("original", True)

limits = self.config("limits", False)
Expand Down
5 changes: 1 addition & 4 deletions gallery_dl/extractor/fantia.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ class FantiaExtractor(Extractor):
def _init(self):
self.headers = {
"Accept" : "application/json, text/plain, */*",
"Referer": self.root,
"X-Requested-With": "XMLHttpRequest",
}
self._empty_plan = {
Expand Down Expand Up @@ -65,11 +64,9 @@ def posts(self):

def _pagination(self, url):
params = {"page": 1}
headers = self.headers.copy()
del headers["X-Requested-With"]

while True:
page = self.request(url, params=params, headers=headers).text
page = self.request(url, params=params).text
self._csrf_token(page)

post_id = None
Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/foolfuuka.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ def __init__(self, match):
if self.category == "b4k":
self.remote = self._remote_direct

def _init(self):
self.session.headers["Referer"] = self.root + "/"

def items(self):
yield Message.Directory, self.metadata()
for post in self.posts():
Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/hiperdex.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ class HiperdexBase():
category = "hiperdex"
root = "https://hiperdex.com"

def _init(self):
self.session.headers["Referer"] = self.root + "/"

@memcache(keyarg=1)
def manga_data(self, manga, page=None):
if not page:
Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/hotleak.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ class HotleakExtractor(Extractor):
archive_fmt = "{type}_{creator}_{id}"
root = "https://hotleak.vip"

def _init(self):
self.session.headers["Referer"] = self.root + "/"

def items(self):
for post in self.posts():
yield Message.Directory, post
Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/imagefap.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ class ImagefapExtractor(Extractor):
archive_fmt = "{gallery_id}_{image_id}"
request_interval = (2.0, 4.0)

def _init(self):
self.session.headers["Referer"] = self.root + "/"

def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)

Expand Down
6 changes: 1 addition & 5 deletions gallery_dl/extractor/imgur.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,7 @@ def _pagination_v2(self, endpoint, params=None, key=None):
params["client_id"] = self.client_id
params["page"] = 0
params["sort"] = "newest"

headers = {
"Referer": "https://imgur.com/",
"Origin": "https://imgur.com",
}
headers = {"Origin": "https://imgur.com"}

while True:
data = self._call(endpoint, params, headers)["data"]
Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/itaku.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ def __init__(self, extractor):
self.root = extractor.root + "/api"
self.headers = {
"Accept": "application/json, text/plain, */*",
"Referer": extractor.root + "/",
}

def galleries_images(self, username, section=None):
Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/kemonoparty.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def __init__(self, match):
Extractor.__init__(self, match)

def _init(self):
self.session.headers["Referer"] = self.root + "/"
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/mangafox.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ def __init__(self, match):
self.urlbase = self.root + base
ChapterExtractor.__init__(self, match, self.urlbase + "/1.html")

def _init(self):
self.session.headers["Referer"] = self.root + "/"

def metadata(self, page):
manga, pos = text.extract(page, "<title>", "</title>")
count, pos = text.extract(
Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/mangakakalot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ def __init__(self, match):
self.path = match.group(1)
ChapterExtractor.__init__(self, match, self.root + self.path)

def _init(self):
self.session.headers['Referer'] = self.root + "/"

def metadata(self, page):
_ , pos = text.extract(page, '<span itemprop="title">', '<')
manga , pos = text.extract(page, '<span itemprop="title">', '<', pos)
Expand Down
2 changes: 0 additions & 2 deletions gallery_dl/extractor/manganelo.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ def __init__(self, match):
super().__init__(match, "https://" + domain + path)

def _init(self):
self.session.headers['Referer'] = self.root + "/"

if self._match_chapter is None:
ManganeloBase._match_chapter = re.compile(
r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/naverwebtoon.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ def items(self):
url = self.root + "/api/article/list"
headers = {
"Accept": "application/json, text/plain, */*",
"Referer": self.root + "/",
}
params = {
"titleId": self.title_id,
Expand Down
2 changes: 0 additions & 2 deletions gallery_dl/extractor/newgrounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ def _extract_media_data(self, extr, url):
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"Referer": self.root,
}
sources = self.request(url, headers=headers).json()["sources"]

Expand Down Expand Up @@ -478,7 +477,6 @@ def _pagination(self, path, params):
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"Referer": self.root,
}
params["inner"] = "1"
params["page"] = 1
Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/nijie.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def initialize(self):

BaseExtractor.initialize(self)

self.session.headers["Referer"] = self.root + "/"
self.user_name = None
if self.category == "horne":
self._extract_data = self._extract_data_horne
Expand Down
6 changes: 3 additions & 3 deletions gallery_dl/extractor/nozomi.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ class NozomiExtractor(Extractor):
filename_fmt = "{postid} {dataid}.{extension}"
archive_fmt = "{dataid}"

def items(self):
def _init(self):
self.session.headers["Origin"] = self.root

def items(self):
data = self.metadata()
self.session.headers["Origin"] = self.root
self.session.headers["Referer"] = self.root + "/"

for post_id in map(str, self.posts()):
url = "https://j.nozomi.la/post/{}/{}/{}.json".format(
Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/patreon.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def posts(self):

def _pagination(self, url):
headers = {
"Referer" : self.root + "/",
"Content-Type": "application/vnd.api+json",
}

Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/pinterest.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,6 @@ def __init__(self, extractor):
"Accept" : "application/json, text/javascript, "
"*/*, q=0.01",
"Accept-Language" : "en-US,en;q=0.5",
"Referer" : self.root + "/",
"X-Requested-With" : "XMLHttpRequest",
"X-APP-VERSION" : "0c4af40",
"X-CSRFToken" : csrf_token,
Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/pornpics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ def __init__(self, match):
super().__init__(match)
self.item = match.group(1)

def _init(self):
self.session.headers["Referer"] = self.root + "/"

def items(self):
for gallery in self.galleries():
gallery["_extractor"] = PornpicsGalleryExtractor
Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/reactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def __init__(self, match):
self.category = netloc.rpartition(".")[0]

def _init(self):
self.session.headers["Referer"] = self.root
self.gif = self.config("gif", False)

def items(self):
Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/redgifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ class RedgifsAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
"Referer" : extractor.root + "/",
"authorization" : None,
"content-type" : "application/json",
"x-customheader": extractor.root + "/",
Expand Down
1 change: 0 additions & 1 deletion gallery_dl/extractor/sankaku.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ def __init__(self, extractor):
self.extractor = extractor
self.headers = {
"Accept" : "application/vnd.sankaku.api+json;v=2",
"Referer" : extractor.root + "/",
"Platform": "web-app",
"Origin" : extractor.root,
}
Expand Down
7 changes: 3 additions & 4 deletions gallery_dl/extractor/skeb.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def metadata(self):
"""Return additional metadata"""

def _pagination(self, url, params):
headers = {"Referer": self.root, "Authorization": "Bearer null"}
headers = {"Authorization": "Bearer null"}
params["offset"] = 0

while True:
Expand All @@ -69,7 +69,7 @@ def _pagination(self, url, params):
def _get_post_data(self, user_name, post_num):
url = "{}/api/users/{}/works/{}".format(
self.root, user_name, post_num)
headers = {"Referer": self.root, "Authorization": "Bearer null"}
headers = {"Authorization": "Bearer null"}
resp = self.request(url, headers=headers).json()
creator = resp["creator"]
post = {
Expand Down Expand Up @@ -190,7 +190,6 @@ def posts(self):
}
headers = {
"Origin": self.root,
"Referer": self.root + "/",
"x-algolia-api-key": "9a4ce7d609e71bf29e977925e4c6740c",
"x-algolia-application-id": "HB1JT3KRE9",
}
Expand Down Expand Up @@ -243,7 +242,7 @@ def users(self):
url = "{}/api/users/{}/following_creators".format(
self.root, self.user_name)
params = {"sort": "date", "offset": 0, "limit": 90}
headers = {"Referer": self.root, "Authorization": "Bearer null"}
headers = {"Authorization": "Bearer null"}

while True:
data = self.request(url, params=params, headers=headers).json()
Expand Down
3 changes: 0 additions & 3 deletions gallery_dl/extractor/vipergirls.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ class VipergirlsExtractor(Extractor):
cookies_domain = ".vipergirls.to"
cookies_names = ("vg_userid", "vg_password")

def _init(self):
self.session.headers["Referer"] = self.root + "/"

def items(self):
self.login()

Expand Down
Loading

0 comments on commit 3ecb512

Please sign in to comment.