Skip to content

Commit

Permalink
Add more docstrings.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbenjoseph committed Jun 28, 2022
1 parent 23ffa8f commit 3c938b0
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions daisybell/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ def scan(model: Pipeline, params: dict = {}) -> Generator:


def handle_dataset(url: str, alterative_path: str = None) -> os.PathLike:
"""
Handles the dataset.
Parameters:
url: The url of the dataset.
alterative_path: An alternative path to the dataset if not using the default (~/.iqtlabs).
Returns:
The path to the dataset.
"""
if alterative_path:
output_path = Path(alterative_path)
else:
Expand All @@ -60,13 +70,31 @@ def handle_dataset(url: str, alterative_path: str = None) -> os.PathLike:


def handle_books_dataset(params: dict) -> pd.DataFrame:
"""
Downloads the books dataset or provides the cached copy.
Parameters:
params: The parameters passed to daisybell.
Returns:
A pandas DataFrame with the books dataset.
"""
books_url = (
"https://iqtlabs-aia-datasets.s3.amazonaws.com/public_domain_books.tar.gz"
)
return handle_dataset(books_url, params.get("books_path"))


def handle_wikidata_dataset(params: dict) -> pd.DataFrame:
"""
Downloads the wikidata dataset or provides the cached copy.
Parameters:
params: The parameters passed to daisybell.
Returns:
A pandas DataFrame with the wikidata dataset.
"""
wikidata_url = (
"https://iqtlabs-aia-datasets.s3.amazonaws.com/wikidata_person_names-v1.csv.gz"
)
Expand All @@ -76,6 +104,15 @@ def handle_wikidata_dataset(params: dict) -> pd.DataFrame:
def handle_common_params_to_masking_and_zeroshot(
params: dict,
) -> Tuple[str, int, pd.DataFrame]:
"""
Handles the common parameters to masking and zeroshot scanners.
Parameters:
params: The parameters passed to daisybell.
Returns:
A tuple of the suffix, the maximum number of names per language, and the wikidata dataframe.
"""
if params.get("suffix"):
suffix = params["suffix"]
else:
Expand Down

0 comments on commit 3c938b0

Please sign in to comment.