Skip to content

Commit

Permalink
Added option to filter for primary_language and updatate to version 0…
Browse files Browse the repository at this point in the history
….0.4
  • Loading branch information
stefanodallapalma committed Sep 28, 2021
1 parent baa40eb commit b168b45
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 11 deletions.
21 changes: 17 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ The package can be installed from [PyPI](https://pypi.org/project/repositories-c
```python
import os
from datetime import datetime
from repocollector import GithubRepositoriesCollector
from repocollector.github import GithubRepositoriesCollector

github_crawler = GithubRepositoriesCollector(
access_token=os.getenv('GITHUB_ACCESS_TOKEN'), # or paste your token
Expand All @@ -27,7 +27,8 @@ github_crawler = GithubRepositoriesCollector(
min_issues=0,
min_releases=0,
min_stars=0,
min_watchers=0)
min_watchers=0,
primary_language='language') # e.g., python

for repo in github_crawler.collect_repositories():
print('id:', repo['id']) # e.g., 123456
Expand Down Expand Up @@ -88,17 +89,29 @@ optional arguments:
--min-watchers MIN_WATCHERS
collect repositories with at least <min-watchers>
watchers (default: 0)
--primary-language LANGUAGE
collect repositories written in this language
--verbose show log (default: False)
```


**Important!** The tool requires a personal access token to access the GraphQL APIs. See how to get one [here](https://github.com/settings/tokens).
Add ```GITHUB_ACCESS_TOKEN=<paste here your token>``` to the environment variables.


### Output
**Output**
Running the tool from command-line generates an HTML report accessible at *\<dest\>/report.html*.

**Example**
The following command search for repositories written in python created between 2014-02-01 and 2014-02-03.
The report is saved in the folder /tmp/

```
repositories-collector 2014-02-01 2014-02-03 /tmp/ --primary-language python
```





## Contributions

Expand Down
10 changes: 9 additions & 1 deletion repocollector/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ def get_parser():
default=0,
help='collect repositories with at least <min-watchers> watchers (default: %(default)s)')

parser.add_argument('--primary-language',
action='store',
dest='primary_language',
type=str,
default=None,
help='collect repositories written in this language')

parser.add_argument('--verbose',
action='store_true',
dest='verbose',
Expand All @@ -134,7 +141,8 @@ def main():
min_stars=args.min_stars,
min_releases=args.min_releases,
min_watchers=args.min_watchers,
min_issues=args.min_issues
min_issues=args.min_issues,
primary_language=args.primary_language
)

repositories = list()
Expand Down
12 changes: 9 additions & 3 deletions repocollector/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from datetime import datetime

QUERY = """{ search(query: "is:public stars:>=MIN_STARS mirror:false archived:false created:SINCE..UNTIL
pushed:>=PUSHED_AFTER", type: REPOSITORY, first: 50 AFTER) { repositoryCount pageInfo { endCursor startCursor
pushed:>=PUSHED_AFTER LANGUAGE:LANGUAGE", type: REPOSITORY, first: 50 AFTER) { repositoryCount pageInfo { endCursor startCursor
hasNextPage } edges { node { ... on Repository { databaseId defaultBranchRef { name } owner { login } name url description
primaryLanguage { name } stargazers { totalCount } watchers { totalCount } releases { totalCount } issues {
totalCount } createdAt pushedAt updatedAt hasIssuesEnabled isArchived isDisabled isMirror isFork object(expression:
Expand All @@ -33,8 +33,8 @@ def __init__(self,
min_stars: int = 0,
min_releases: int = 0,
min_watchers: int = 0,
min_issues: int = 0
):
min_issues: int = 0,
primary_language: str = None):
"""
Crawl GitHub to extract repositories
Expand All @@ -46,6 +46,7 @@ def __init__(self,
:param min_releases: the minimum number of releases the repositories must have
:param min_watchers: the minimum number of watchers the repositories must have
:param min_issues: the minimum number of issues the repositories must have
:param primary_language: get repositories written in this language
"""

self._token = access_token
Expand All @@ -65,6 +66,11 @@ def __init__(self,
self.query = re.sub('UNTIL', str(self.until), self.query)
self.query = re.sub('PUSHED_AFTER', self.pushed_after, self.query)

if primary_language:
self.query = re.sub('LANGUAGE:LANGUAGE', f'language:{primary_language}', self.query)
else:
self.query = re.sub('LANGUAGE:LANGUAGE', '', self.query)

@property
def quota(self):
return self._quota
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
with open("README.md", "r") as fh:
long_description = fh.read()

VERSION = "0.0.3"
VERSION = "0.0.4"

setup(name='repositories_collector',
version=VERSION,
Expand All @@ -25,12 +25,12 @@
'console_scripts': ['repositories-collector=repocollector.cli:main'],
},
classifiers=[
"Development Status :: 3 - Alpha",
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3.7",
"License :: OSI Approved :: Apache Software License",
"Topic :: Software Development :: Libraries :: Python Modules",
"Operating System :: OS Independent"
],
insall_requires=requirements
install_requires=requirements
)

0 comments on commit b168b45

Please sign in to comment.