Skip to content

Commit

Permalink
GithubMiner ported from radon-h2020/radon-iac-miner
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanodallapalma authored and stefanodallapalma committed Sep 17, 2020
1 parent 02f49c9 commit 7fc1396
Show file tree
Hide file tree
Showing 11 changed files with 605 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,6 @@ dmypy.json

# Pyre type checker
.pyre/

# PyCharm
.idea/
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include requirements.txt
68 changes: 68 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,70 @@
# radon-repositories-collector
A tool to query GraphQL for collecting repositories metadata.


## How to install

A PyPIP package will be available soon!
In the meantime, install it from source code with:

```
pip install -r requirements.txt
pip install .
```

## Command-line usage

```
usage: radon-repositories-collector [-h] [-v] [--from DATE_FROM]
[--to DATE_TO] [--pushed-after DATE_PUSH]
[--min-issues MIN_ISSUES]
[--min-releases MIN_RELEASES]
[--min-stars MIN_STARS]
[--min-watchers MIN_WATCHERS] [--verbose]
dest
A Python library to collect repositories metadata from GitHub.
positional arguments:
dest destination folder for report
optional arguments:
-h, --help show this help message and exit
-v, --version show program's version number and exit
--from DATE_FROM collect repositories created since this date (default:
2014-01-01 00:00:00)
--to DATE_TO collect repositories created up to this date (default:
2014-01-01 00:00:00)
--pushed-after DATE_PUSH
collect only repositories pushed after this date
(default: 2019-01-01 00:00:00)
--min-issues MIN_ISSUES
collect repositories with at least <min-issues> issues
(default: 0)
--min-releases MIN_RELEASES
collect repositories with at least <min-releases>
releases (default: 0)
--min-stars MIN_STARS
collect repositories with at least <min-stars> stars
(default: 0)
--min-watchers MIN_WATCHERS
collect repositories with at least <min-watchers>
watchers (default: 0)
--verbose show log (default: False)
```


**Important!** The tool requires a personal access token to access the GraphQL APIs. See how to get one [here](https://github.com/settings/tokens).
Once generated, paste the token in the input field when asked. For example:

```
radon-repositories-collector . --from 2020-01-01 --to 2020-01-02
Github access token: <paste your token here>
```

You may want to avoid the previous step. If so, add ```GITHUB_ACCESS_TOKEN=<paste here your token>``` to the environment variables.


### Output
Running the tool from command-line generates an HTML report accessible at *\<dest\>/report.html*.
Empty file added collector/__init__.py
Empty file.
168 changes: 168 additions & 0 deletions collector/command_line.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import argparse
import copy
import io
import json
import os

from datetime import datetime
from dotenv import load_dotenv
from getpass import getpass

from collector.github import GithubRepositoriesCollector
from collector.report import create_report

with open('config.json', 'r') as in_stream:
configuration = json.load(in_stream)

def date(x: str) -> datetime:
"""
Check the passed date is well-formatted
:param x: a datetime
:return: datetime(x); raise an ArgumentTypeError otherwise
"""
try:
# String to datetime
x = datetime.strptime(x, '%Y-%m-%d')
except Exception:
raise argparse.ArgumentTypeError('Date format must be: YYYY-MM-DD')

return x


def unsigned_int(x: str) -> int:
"""
Check the number is greater than or equal to zero
:param x: a number
:return: int(x); raise an ArgumentTypeError otherwise
"""
x = int(x)
if x < 0:
raise argparse.ArgumentTypeError('Minimum bound is 0')
return x


def valid_path(x: str) -> str:
"""
Check the path exists
:param x: a path
:return: the path if exists; raise an ArgumentTypeError otherwise
"""
if not os.path.isdir(x):
raise argparse.ArgumentTypeError('Insert a valid path')

return x


def get_parser():
description = 'A Python library to collect repositories metadata from GitHub.'

parser = argparse.ArgumentParser(prog='radon-repositories-collector', description=description)
parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + configuration.get('version', '0.0'))

parser.add_argument(action='store',
dest='dest',
type=valid_path,
help='destination folder for report')

parser.add_argument('--from',
action='store',
dest='date_from',
type=date,
default=datetime.strptime('2014-01-01', '%Y-%m-%d'),
help='collect repositories created since this date (default: %(default)s)')

parser.add_argument('--to',
action='store',
dest='date_to',
type=date,
default=datetime.strptime('2014-01-01', '%Y-%m-%d'),
help='collect repositories created up to this date (default: %(default)s)')

parser.add_argument('--pushed-after',
action='store',
dest='date_push',
type=date,
default=datetime.strptime('2019-01-01', '%Y-%m-%d'),
help='collect only repositories pushed after this date (default: %(default)s)')

parser.add_argument('--min-issues',
action='store',
dest='min_issues',
type=unsigned_int,
default=0,
help='collect repositories with at least <min-issues> issues (default: %(default)s)')

parser.add_argument('--min-releases',
action='store',
dest='min_releases',
type=unsigned_int,
default=0,
help='collect repositories with at least <min-releases> releases (default: %(default)s)')

parser.add_argument('--min-stars',
action='store',
dest='min_stars',
type=unsigned_int,
default=0,
help='collect repositories with at least <min-stars> stars (default: %(default)s)')

parser.add_argument('--min-watchers',
action='store',
dest='min_watchers',
type=unsigned_int,
default=0,
help='collect repositories with at least <min-watchers> watchers (default: %(default)s)')

parser.add_argument('--verbose',
action='store_true',
dest='verbose',
default=False,
help='show log (default: %(default)s)')

return parser



def main():
args = get_parser().parse_args()

load_dotenv()

token = os.getenv('GITHUB_ACCESS_TOKEN')
if not token:
token = getpass('Github access token:')

github = GithubRepositoriesCollector(
access_token=token,
date_from=args.date_from,
date_to=args.date_to,
pushed_after=args.date_push,
min_stars=args.min_stars,
min_releases=args.min_releases,
min_watchers=args.min_watchers,
min_issues=args.min_issues
)

repositories = list()
for repository in github.collect_repositories():

if args.verbose:
print(f'Collecting {repository["url"]} ... ', end='', flush=True)

# Save repository to collection
repositories.append(copy.deepcopy(repository))

if args.verbose:
print('DONE')

# Generate html report
html = create_report(repositories)
filename = os.path.join(args.dest, 'report.html')

with io.open(filename, "w", encoding="utf-8") as f:
f.write(html)

if args.verbose:
print(f'Report created at {filename}')

exit(0)
Loading

0 comments on commit 7fc1396

Please sign in to comment.