download all images from user #236

compwron · 2022-03-27T01:34:17Z

Here is some messy code which uses this library to download all images for a particular user.

# first run:
# pip install pixivpy-async
# pip install requests
# the token will eventually expire- to get a new one follow the doc at https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362
# note- to see NSFW art, log into your account and edit "Viewing restriction" https://www.pixiv.net/setting_user.php

TOKEN=""
ITER_LIMIT = 10
ILLUSTRATIONS_PAGE = 30

from queue import Empty
from pixivpy_async import *
import asyncio
from os.path import exists

def calc_next_url(current_user_id, current_offset):
   return f"https://app-api.pixiv.net/v1/user/illusts?user_id={current_user_id}&filter=for_ios&type=illust&offset={current_offset}"

async def download(aapi, illust):
    # if illust["x_restrict"] == 0:
    #    print("not restricted")
    #    return
    create_date = illust["create_date"][:10].replace("-","_")
    id = illust["id"]
    artist = f"{illust['user']['id']} {illust['user']['name']} {illust['user']['account']}"
    if len(illust.get("meta_single_page", {})):
        await aapi.download(illust["meta_single_page"]["original_image_url"], name=f"{create_date}_01")
        print(f"downloaded {artist} post {id} image 1")
    elif len(illust.get("meta_pages", [])):
        for index, page in enumerate(illust["meta_pages"]):
          await aapi.download(page["image_urls"]["original"], name=f"{create_date}_{index+1:02d}")
          print(f"downloaded {artist} post {id} image {index+1}")
    else:
        print(f"{id} already downloaded")

async def gettem(aapi, artist_id, current_offset, iter=0):
    print("Next page...")
    next_url = calc_next_url(artist_id, current_offset)
    print(next_url)
    await asyncio.sleep(30) # try to not get rate limited?
    next_qs = aapi.parse_qs(next_url)
    print(next_qs)
    json_result = await aapi.user_illusts(**next_qs)
    print("next url?", json_result.next_url, json_result["next_url"])
    if len(json_result["illusts"]) == 0:
        print(f"Rate limited? Sleeping... iter: {iter} of limit {ITER_LIMIT}")
        await asyncio.sleep(10)
        if iter > ITER_LIMIT:
            raise Exception(f"nothing in illusts: {json_result}")
        iter += 1
        gettem(aapi, artist_id, current_offset - ILLUSTRATIONS_PAGE, iter)
    for illust in json_result["illusts"]:
        await download(aapi, illust)

async def main():
    artist_id = 151689
    current_user_id = 275527
    current_offset = ILLUSTRATIONS_PAGE # pages are 30 items long
    async with PixivClient() as client:
        aapi = AppPixivAPI(client=client)
        await aapi.login(refresh_token=TOKEN)
        json_result = await aapi.user_illusts(artist_id)
        # print(json_result)
        for illust in json_result["illusts"]:
            await download(aapi, illust)
        print("next url?", json_result.next_url, json_result["next_url"])
        print(json_result["next_url"])
        while True: # continue until errorsplode
            print("still true")
            await gettem(aapi, artist_id, current_offset)
            current_offset += ILLUSTRATIONS_PAGE
asyncio.run(main())

The text was updated successfully, but these errors were encountered:

eggplants · 2022-03-28T08:05:50Z

My example:
https://github.com/eggplants/pixiv-bulk-downloader/blob/eaf30d6f65fc2a1db7452e0cefee1c544e19bebe/pbd/base.py#L32-L85

Xdynix · 2022-03-28T08:12:03Z

Not sure the purpose of this thread, but here is mine. It utilized tqdm to create a nice looking progress bar.

Code

import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path

import requests
from tqdm import tqdm

from pixivpy3 import AppPixivAPI

USER_ID = '15919563'
DOWNLOAD_DIR = Path(r'SOME-WHERE')
REFRESH_TOKEN_FILE = Path(r'SOME-WHERE\refresh-token.txt')


def auth_pixiv_api(api: AppPixivAPI, refresh_token_file: Path):
    with refresh_token_file.open('rt') as f:
        refresh_token = f.read().strip()
    api.auth(refresh_token=refresh_token)
    with refresh_token_file.open('wt') as f:
        print(api.refresh_token, file=f)


def download(url: str, file: Path, headers=None, force=False):
    if file.exists() and not force:
        return

    with requests.get(url, headers=headers, stream=True) as response:
        response.raise_for_status()
        with tqdm(
                total=int(response.headers.get('Content-Length', 0)),
                desc=f'Download: {file.name}',
                unit='B', unit_scale=True, unit_divisor=1024,
                leave=False,
        ) as progress:
            file.parent.mkdir(exist_ok=True)
            with file.open('wb') as f:
                for chunk in response.iter_content(chunk_size=1024):
                    if not chunk:
                        continue
                    f.write(chunk)
                    progress.update(len(chunk))


def main():
    api = AppPixivAPI()
    auth_pixiv_api(api, REFRESH_TOKEN_FILE)

    with ThreadPoolExecutor(
            max_workers=5,
            initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),),
    ) as executor:
        qs = {'user_id': USER_ID}
        root = DOWNLOAD_DIR / USER_ID
        while qs:
            json_result = api.user_illusts(**qs)
            qs = api.parse_qs(json_result.next_url)
            for illust in json_result.illusts:
                if illust.type == 'ugoira':
                    img_urls = []  # Skip ugoira
                elif illust.page_count == 1:
                    img_urls = [illust.meta_single_page.original_image_url]
                else:
                    img_urls = [
                        page.image_urls.original
                        for page in illust.meta_pages
                    ]
                for url in img_urls:
                    executor.submit(
                        download,
                        url,
                        root / os.path.basename(url),
                        headers={'Referer': 'https://app-api.pixiv.net/'},
                        force=True,
                    )


if __name__ == '__main__':
    main()

I used to have a complex crawler that can even convert ugoira to gif, but now I don't use it anymore, so I don't continue to maintain it.

eggplants · 2022-03-28T08:21:10Z

@compwron Did you want a method to "download all images from user", or did you want to know how to implement it with pixivpy?

upbit added the question label Apr 6, 2022

Xdynix mentioned this issue Jun 5, 2023

请问如何下载同一个illust_id中的所有图片 #272

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

download all images from user #236

download all images from user #236

compwron commented Mar 27, 2022

eggplants commented Mar 28, 2022

Xdynix commented Mar 28, 2022 •

edited

Loading

eggplants commented Mar 28, 2022

download all images from user #236

download all images from user #236

Comments

compwron commented Mar 27, 2022

eggplants commented Mar 28, 2022

Xdynix commented Mar 28, 2022 • edited Loading

eggplants commented Mar 28, 2022

Xdynix commented Mar 28, 2022 •

edited

Loading