Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

github-release脚本获取仓库所有release #171

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 75 additions & 47 deletions github-release.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,21 @@
TIMEOUT_OPTION = (7, 10)
total_size = 0

def sizeof_fmt(num, suffix='iB'):
for unit in ['','K','M','G','T','P','E','Z']:

def sizeof_fmt(num, suffix="iB"):
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
if abs(num) < 1024.0:
return "%3.2f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.2f%s%s" % (num, 'Y', suffix)
return "%.2f%s%s" % (num, "Y", suffix)


# wrap around requests.get to use token if available
def github_get(*args, **kwargs):
headers = kwargs['headers'] if 'headers' in kwargs else {}
if 'GITHUB_TOKEN' in os.environ:
headers['Authorization'] = 'token {}'.format(
os.environ['GITHUB_TOKEN'])
kwargs['headers'] = headers
headers = kwargs["headers"] if "headers" in kwargs else {}
if "GITHUB_TOKEN" in os.environ:
headers["Authorization"] = "token {}".format(os.environ["GITHUB_TOKEN"])
kwargs["headers"] = headers
return requests.get(*args, **kwargs)


Expand All @@ -44,7 +45,12 @@ def do_download(remote_url: str, dst_file: Path, remote_ts: float, remote_size:
r.raise_for_status()
tmp_dst_file = None
try:
with tempfile.NamedTemporaryFile(prefix="." + dst_file.name + ".", suffix=".tmp", dir=dst_file.parent, delete=False) as f:
with tempfile.NamedTemporaryFile(
prefix="." + dst_file.name + ".",
suffix=".tmp",
dir=dst_file.parent,
delete=False,
) as f:
tmp_dst_file = Path(f.name)
for chunk in r.iter_content(chunk_size=1024**2):
if chunk: # filter out keep-alive new chunks
Expand All @@ -53,7 +59,9 @@ def do_download(remote_url: str, dst_file: Path, remote_ts: float, remote_size:
# check for downloaded size
downloaded_size = tmp_dst_file.stat().st_size
if remote_size != -1 and downloaded_size != remote_size:
raise Exception(f'File {dst_file.as_posix()} size mismatch: downloaded {downloaded_size} bytes, expected {remote_size} bytes')
raise Exception(
f"File {dst_file.as_posix()} size mismatch: downloaded {downloaded_size} bytes, expected {remote_size} bytes"
)
os.utime(tmp_dst_file, (remote_ts, remote_ts))
tmp_dst_file.chmod(0o644)
tmp_dst_file.replace(dst_file)
Expand All @@ -71,8 +79,7 @@ def downloading_worker(q):

url, dst_file, working_dir, updated, remote_size = item

print("downloading", url, "to",
dst_file.relative_to(working_dir), flush=True)
print("downloading", url, "to", dst_file.relative_to(working_dir), flush=True)
try:
do_download(url, dst_file, updated, remote_size)
except Exception:
Expand All @@ -86,30 +93,35 @@ def downloading_worker(q):
def create_workers(n):
task_queue = queue.Queue()
for i in range(n):
t = threading.Thread(target=downloading_worker, args=(task_queue, ))
t = threading.Thread(target=downloading_worker, args=(task_queue,))
t.start()
return task_queue


def ensure_safe_name(filename):
filename = filename.replace('\0', ' ')
if filename == '.':
return ' .'
elif filename == '..':
return '. .'
filename = filename.replace("\0", " ")
if filename == ".":
return " ."
elif filename == "..":
return ". ."
else:
return filename.replace('/', '\\').replace('\\', '_')
return filename.replace("/", "\\").replace("\\", "_")


def main():
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--base-url", default=BASE_URL)
parser.add_argument("--working-dir", default=WORKING_DIR)
parser.add_argument("--workers", default=1, type=int,
help='number of concurrent downloading jobs')
parser.add_argument("--fast-skip", action='store_true',
help='do not verify size and timestamp of existing files')
parser.add_argument(
"--workers", default=1, type=int, help="number of concurrent downloading jobs"
)
parser.add_argument(
"--fast-skip",
action="store_true",
help="do not verify size and timestamp of existing files",
)
parser.add_argument("--config", default=CONFIG)
args = parser.parse_args()

Expand All @@ -124,14 +136,15 @@ def main():
with open(args.config, "r") as f:
REPOS = json.load(f)

def download(release, release_dir, tarball = False):
def download(release, release_dir, tarball=False):
global total_size

if tarball:
url = release['tarball_url']
url = release["tarball_url"]
updated = datetime.strptime(
release['published_at'], '%Y-%m-%dT%H:%M:%SZ').timestamp()
dst_file = release_dir / 'repo-snapshot.tar.gz'
release["published_at"], "%Y-%m-%dT%H:%M:%SZ"
).timestamp()
dst_file = release_dir / "repo-snapshot.tar.gz"
remote_filelist.append(dst_file.relative_to(working_dir))

if dst_file.is_file():
Expand All @@ -141,30 +154,34 @@ def download(release, release_dir, tarball = False):
# tarball has no size information, use -1 to skip size check
task_queue.put((url, dst_file, working_dir, updated, -1))

for asset in release['assets']:
url = asset['browser_download_url']
for asset in release["assets"]:
url = asset["browser_download_url"]
updated = datetime.strptime(
asset['updated_at'], '%Y-%m-%dT%H:%M:%SZ').timestamp()
dst_file = release_dir / ensure_safe_name(asset['name'])
asset["updated_at"], "%Y-%m-%dT%H:%M:%SZ"
).timestamp()
dst_file = release_dir / ensure_safe_name(asset["name"])
remote_filelist.append(dst_file.relative_to(working_dir))
remote_size = asset['size']
remote_size = asset["size"]
total_size += remote_size

if dst_file.is_file():
if args.fast_skip:
print("fast skipping", dst_file.relative_to(
working_dir), flush=True)
print(
"fast skipping", dst_file.relative_to(working_dir), flush=True
)
continue
else:
stat = dst_file.stat()
local_filesize = stat.st_size
local_mtime = stat.st_mtime
# print(f"{local_filesize} vs {asset['size']}")
# print(f"{local_mtime} vs {updated}")
if local_mtime > updated or \
remote_size == local_filesize and local_mtime == updated:
print("skipping", dst_file.relative_to(
working_dir), flush=True)
if (
local_mtime > updated
or remote_size == local_filesize
and local_mtime == updated
):
print("skipping", dst_file.relative_to(working_dir), flush=True)
continue
else:
dst_file.parent.mkdir(parents=True, exist_ok=True)
Expand All @@ -182,10 +199,10 @@ def link_latest(name, repo_dir):
pass

for cfg in REPOS:
flat = False # build a folder for each release
versions = 1 # keep only one release
tarball = False # do not download the tarball
prerelease = False # filter out pre-releases
flat = False # build a folder for each release
versions = 1 # keep only one release
tarball = False # do not download the tarball
prerelease = False # filter out pre-releases
if isinstance(cfg, str):
repo = cfg
else:
Expand All @@ -203,17 +220,27 @@ def link_latest(name, repo_dir):
print(f"syncing {repo} to {repo_dir}")

try:
r = github_get(f"{args.base_url}{repo}/releases")
headers = {"Accept": "application/vnd.github+json"}
r = github_get(
f"{args.base_url}{repo}/releases?per_page=100", headers=headers
jimorsm marked this conversation as resolved.
Show resolved Hide resolved
)
r.raise_for_status()
releases = r.json()

next_url = r.headers["link"]
jimorsm marked this conversation as resolved.
Show resolved Hide resolved
if next_url:
for url_str in next_url.split(","):
r = github_get(url_str.split(";")[0].strip()[1:-1], headers=headers)
r.raise_for_status()
releases.extend(r.json())
except:
traceback.print_exc()
break

n_downloaded = 0
for release in releases:
if not release['draft'] and (prerelease or not release['prerelease']):
name = ensure_safe_name(release['name'] or release['tag_name'])
if not release["draft"] and (prerelease or not release["prerelease"]):
name = ensure_safe_name(release["name"] or release["tag_name"])
if len(name) == 0:
print("Error: Unnamed release")
continue
Expand All @@ -238,7 +265,7 @@ def link_latest(name, repo_dir):

if cleaning:
local_filelist = []
for local_file in working_dir.glob('**/*'):
for local_file in working_dir.glob("**/*"):
if local_file.is_file():
local_filelist.append(local_file.relative_to(working_dir))

Expand All @@ -247,7 +274,7 @@ def link_latest(name, repo_dir):
old_file = working_dir / old_file
old_file.unlink()

for local_dir in working_dir.glob('*/*/*'):
for local_dir in working_dir.glob("*/*/*"):
if local_dir.is_dir():
try:
# remove empty dirs only
Expand All @@ -257,6 +284,7 @@ def link_latest(name, repo_dir):

print("Total size is", sizeof_fmt(total_size, suffix=""))


if __name__ == "__main__":
main()

Expand Down