-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GithubMiner ported from radon-h2020/radon-iac-miner
- Loading branch information
stefanodallapalma
authored and
stefanodallapalma
committed
Sep 17, 2020
1 parent
02f49c9
commit 7fc1396
Showing
11 changed files
with
605 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,3 +127,6 @@ dmypy.json | |
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# PyCharm | ||
.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
include requirements.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,70 @@ | ||
# radon-repositories-collector | ||
A tool to query GraphQL for collecting repositories metadata. | ||
|
||
|
||
## How to install | ||
|
||
A PyPIP package will be available soon! | ||
In the meantime, install it from source code with: | ||
|
||
``` | ||
pip install -r requirements.txt | ||
pip install . | ||
``` | ||
|
||
## Command-line usage | ||
|
||
``` | ||
usage: radon-repositories-collector [-h] [-v] [--from DATE_FROM] | ||
[--to DATE_TO] [--pushed-after DATE_PUSH] | ||
[--min-issues MIN_ISSUES] | ||
[--min-releases MIN_RELEASES] | ||
[--min-stars MIN_STARS] | ||
[--min-watchers MIN_WATCHERS] [--verbose] | ||
dest | ||
A Python library to collect repositories metadata from GitHub. | ||
positional arguments: | ||
dest destination folder for report | ||
optional arguments: | ||
-h, --help show this help message and exit | ||
-v, --version show program's version number and exit | ||
--from DATE_FROM collect repositories created since this date (default: | ||
2014-01-01 00:00:00) | ||
--to DATE_TO collect repositories created up to this date (default: | ||
2014-01-01 00:00:00) | ||
--pushed-after DATE_PUSH | ||
collect only repositories pushed after this date | ||
(default: 2019-01-01 00:00:00) | ||
--min-issues MIN_ISSUES | ||
collect repositories with at least <min-issues> issues | ||
(default: 0) | ||
--min-releases MIN_RELEASES | ||
collect repositories with at least <min-releases> | ||
releases (default: 0) | ||
--min-stars MIN_STARS | ||
collect repositories with at least <min-stars> stars | ||
(default: 0) | ||
--min-watchers MIN_WATCHERS | ||
collect repositories with at least <min-watchers> | ||
watchers (default: 0) | ||
--verbose show log (default: False) | ||
``` | ||
|
||
|
||
**Important!** The tool requires a personal access token to access the GraphQL APIs. See how to get one [here](https://github.com/settings/tokens). | ||
Once generated, paste the token in the input field when asked. For example: | ||
|
||
``` | ||
radon-repositories-collector . --from 2020-01-01 --to 2020-01-02 | ||
Github access token: <paste your token here> | ||
``` | ||
|
||
You may want to avoid the previous step. If so, add ```GITHUB_ACCESS_TOKEN=<paste here your token>``` to the environment variables. | ||
|
||
|
||
### Output | ||
Running the tool from command-line generates an HTML report accessible at *\<dest\>/report.html*. |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
import argparse | ||
import copy | ||
import io | ||
import json | ||
import os | ||
|
||
from datetime import datetime | ||
from dotenv import load_dotenv | ||
from getpass import getpass | ||
|
||
from collector.github import GithubRepositoriesCollector | ||
from collector.report import create_report | ||
|
||
with open('config.json', 'r') as in_stream: | ||
configuration = json.load(in_stream) | ||
|
||
def date(x: str) -> datetime: | ||
""" | ||
Check the passed date is well-formatted | ||
:param x: a datetime | ||
:return: datetime(x); raise an ArgumentTypeError otherwise | ||
""" | ||
try: | ||
# String to datetime | ||
x = datetime.strptime(x, '%Y-%m-%d') | ||
except Exception: | ||
raise argparse.ArgumentTypeError('Date format must be: YYYY-MM-DD') | ||
|
||
return x | ||
|
||
|
||
def unsigned_int(x: str) -> int: | ||
""" | ||
Check the number is greater than or equal to zero | ||
:param x: a number | ||
:return: int(x); raise an ArgumentTypeError otherwise | ||
""" | ||
x = int(x) | ||
if x < 0: | ||
raise argparse.ArgumentTypeError('Minimum bound is 0') | ||
return x | ||
|
||
|
||
def valid_path(x: str) -> str: | ||
""" | ||
Check the path exists | ||
:param x: a path | ||
:return: the path if exists; raise an ArgumentTypeError otherwise | ||
""" | ||
if not os.path.isdir(x): | ||
raise argparse.ArgumentTypeError('Insert a valid path') | ||
|
||
return x | ||
|
||
|
||
def get_parser(): | ||
description = 'A Python library to collect repositories metadata from GitHub.' | ||
|
||
parser = argparse.ArgumentParser(prog='radon-repositories-collector', description=description) | ||
parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + configuration.get('version', '0.0')) | ||
|
||
parser.add_argument(action='store', | ||
dest='dest', | ||
type=valid_path, | ||
help='destination folder for report') | ||
|
||
parser.add_argument('--from', | ||
action='store', | ||
dest='date_from', | ||
type=date, | ||
default=datetime.strptime('2014-01-01', '%Y-%m-%d'), | ||
help='collect repositories created since this date (default: %(default)s)') | ||
|
||
parser.add_argument('--to', | ||
action='store', | ||
dest='date_to', | ||
type=date, | ||
default=datetime.strptime('2014-01-01', '%Y-%m-%d'), | ||
help='collect repositories created up to this date (default: %(default)s)') | ||
|
||
parser.add_argument('--pushed-after', | ||
action='store', | ||
dest='date_push', | ||
type=date, | ||
default=datetime.strptime('2019-01-01', '%Y-%m-%d'), | ||
help='collect only repositories pushed after this date (default: %(default)s)') | ||
|
||
parser.add_argument('--min-issues', | ||
action='store', | ||
dest='min_issues', | ||
type=unsigned_int, | ||
default=0, | ||
help='collect repositories with at least <min-issues> issues (default: %(default)s)') | ||
|
||
parser.add_argument('--min-releases', | ||
action='store', | ||
dest='min_releases', | ||
type=unsigned_int, | ||
default=0, | ||
help='collect repositories with at least <min-releases> releases (default: %(default)s)') | ||
|
||
parser.add_argument('--min-stars', | ||
action='store', | ||
dest='min_stars', | ||
type=unsigned_int, | ||
default=0, | ||
help='collect repositories with at least <min-stars> stars (default: %(default)s)') | ||
|
||
parser.add_argument('--min-watchers', | ||
action='store', | ||
dest='min_watchers', | ||
type=unsigned_int, | ||
default=0, | ||
help='collect repositories with at least <min-watchers> watchers (default: %(default)s)') | ||
|
||
parser.add_argument('--verbose', | ||
action='store_true', | ||
dest='verbose', | ||
default=False, | ||
help='show log (default: %(default)s)') | ||
|
||
return parser | ||
|
||
|
||
|
||
def main(): | ||
args = get_parser().parse_args() | ||
|
||
load_dotenv() | ||
|
||
token = os.getenv('GITHUB_ACCESS_TOKEN') | ||
if not token: | ||
token = getpass('Github access token:') | ||
|
||
github = GithubRepositoriesCollector( | ||
access_token=token, | ||
date_from=args.date_from, | ||
date_to=args.date_to, | ||
pushed_after=args.date_push, | ||
min_stars=args.min_stars, | ||
min_releases=args.min_releases, | ||
min_watchers=args.min_watchers, | ||
min_issues=args.min_issues | ||
) | ||
|
||
repositories = list() | ||
for repository in github.collect_repositories(): | ||
|
||
if args.verbose: | ||
print(f'Collecting {repository["url"]} ... ', end='', flush=True) | ||
|
||
# Save repository to collection | ||
repositories.append(copy.deepcopy(repository)) | ||
|
||
if args.verbose: | ||
print('DONE') | ||
|
||
# Generate html report | ||
html = create_report(repositories) | ||
filename = os.path.join(args.dest, 'report.html') | ||
|
||
with io.open(filename, "w", encoding="utf-8") as f: | ||
f.write(html) | ||
|
||
if args.verbose: | ||
print(f'Report created at {filename}') | ||
|
||
exit(0) |
Oops, something went wrong.