Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

download all images from user #236

Open
compwron opened this issue Mar 27, 2022 · 3 comments
Open

download all images from user #236

compwron opened this issue Mar 27, 2022 · 3 comments
Labels

Comments

@compwron
Copy link

Here is some messy code which uses this library to download all images for a particular user.

# first run:
# pip install pixivpy-async
# pip install requests
# the token will eventually expire- to get a new one follow the doc at https://gist.github.com/ZipFile/c9ebedb224406f4f11845ab700124362
# note- to see NSFW art, log into your account and edit "Viewing restriction" https://www.pixiv.net/setting_user.php

TOKEN=""
ITER_LIMIT = 10
ILLUSTRATIONS_PAGE = 30

from queue import Empty
from pixivpy_async import *
import asyncio
from os.path import exists

def calc_next_url(current_user_id, current_offset):
   return f"https://app-api.pixiv.net/v1/user/illusts?user_id={current_user_id}&filter=for_ios&type=illust&offset={current_offset}"

async def download(aapi, illust):
    # if illust["x_restrict"] == 0:
    #    print("not restricted")
    #    return
    create_date = illust["create_date"][:10].replace("-","_")
    id = illust["id"]
    artist = f"{illust['user']['id']} {illust['user']['name']} {illust['user']['account']}"
    if len(illust.get("meta_single_page", {})):
        await aapi.download(illust["meta_single_page"]["original_image_url"], name=f"{create_date}_01")
        print(f"downloaded {artist} post {id} image 1")
    elif len(illust.get("meta_pages", [])):
        for index, page in enumerate(illust["meta_pages"]):
          await aapi.download(page["image_urls"]["original"], name=f"{create_date}_{index+1:02d}")
          print(f"downloaded {artist} post {id} image {index+1}")
    else:
        print(f"{id} already downloaded")

async def gettem(aapi, artist_id, current_offset, iter=0):
    print("Next page...")
    next_url = calc_next_url(artist_id, current_offset)
    print(next_url)
    await asyncio.sleep(30) # try to not get rate limited?
    next_qs = aapi.parse_qs(next_url)
    print(next_qs)
    json_result = await aapi.user_illusts(**next_qs)
    print("next url?", json_result.next_url, json_result["next_url"])
    if len(json_result["illusts"]) == 0:
        print(f"Rate limited? Sleeping... iter: {iter} of limit {ITER_LIMIT}")
        await asyncio.sleep(10)
        if iter > ITER_LIMIT:
            raise Exception(f"nothing in illusts: {json_result}")
        iter += 1
        gettem(aapi, artist_id, current_offset - ILLUSTRATIONS_PAGE, iter)
    for illust in json_result["illusts"]:
        await download(aapi, illust)

async def main():
    artist_id = 151689
    current_user_id = 275527
    current_offset = ILLUSTRATIONS_PAGE # pages are 30 items long
    async with PixivClient() as client:
        aapi = AppPixivAPI(client=client)
        await aapi.login(refresh_token=TOKEN)
        json_result = await aapi.user_illusts(artist_id)
        # print(json_result)
        for illust in json_result["illusts"]:
            await download(aapi, illust)
        print("next url?", json_result.next_url, json_result["next_url"])
        print(json_result["next_url"])
        while True: # continue until errorsplode
            print("still true")
            await gettem(aapi, artist_id, current_offset)
            current_offset += ILLUSTRATIONS_PAGE
asyncio.run(main())
@Xdynix
Copy link
Collaborator

Xdynix commented Mar 28, 2022

Not sure the purpose of this thread, but here is mine. It utilized tqdm to create a nice looking progress bar.

Code
import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path

import requests
from tqdm import tqdm

from pixivpy3 import AppPixivAPI

USER_ID = '15919563'
DOWNLOAD_DIR = Path(r'SOME-WHERE')
REFRESH_TOKEN_FILE = Path(r'SOME-WHERE\refresh-token.txt')


def auth_pixiv_api(api: AppPixivAPI, refresh_token_file: Path):
    with refresh_token_file.open('rt') as f:
        refresh_token = f.read().strip()
    api.auth(refresh_token=refresh_token)
    with refresh_token_file.open('wt') as f:
        print(api.refresh_token, file=f)


def download(url: str, file: Path, headers=None, force=False):
    if file.exists() and not force:
        return

    with requests.get(url, headers=headers, stream=True) as response:
        response.raise_for_status()
        with tqdm(
                total=int(response.headers.get('Content-Length', 0)),
                desc=f'Download: {file.name}',
                unit='B', unit_scale=True, unit_divisor=1024,
                leave=False,
        ) as progress:
            file.parent.mkdir(exist_ok=True)
            with file.open('wb') as f:
                for chunk in response.iter_content(chunk_size=1024):
                    if not chunk:
                        continue
                    f.write(chunk)
                    progress.update(len(chunk))


def main():
    api = AppPixivAPI()
    auth_pixiv_api(api, REFRESH_TOKEN_FILE)

    with ThreadPoolExecutor(
            max_workers=5,
            initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),),
    ) as executor:
        qs = {'user_id': USER_ID}
        root = DOWNLOAD_DIR / USER_ID
        while qs:
            json_result = api.user_illusts(**qs)
            qs = api.parse_qs(json_result.next_url)
            for illust in json_result.illusts:
                if illust.type == 'ugoira':
                    img_urls = []  # Skip ugoira
                elif illust.page_count == 1:
                    img_urls = [illust.meta_single_page.original_image_url]
                else:
                    img_urls = [
                        page.image_urls.original
                        for page in illust.meta_pages
                    ]
                for url in img_urls:
                    executor.submit(
                        download,
                        url,
                        root / os.path.basename(url),
                        headers={'Referer': 'https://app-api.pixiv.net/'},
                        force=True,
                    )


if __name__ == '__main__':
    main()

I used to have a complex crawler that can even convert ugoira to gif, but now I don't use it anymore, so I don't continue to maintain it.

@eggplants
Copy link
Contributor

@compwron Did you want a method to "download all images from user", or did you want to know how to implement it with pixivpy?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

4 participants