From 7dea8afb3178a4f73c21f87c4a3b8df1005ef173 Mon Sep 17 00:00:00 2001 From: "Fabrizio (Misto) Milo" Date: Thu, 12 Jan 2023 11:49:43 -0800 Subject: [PATCH 1/2] sanitize whole repository --- .flake8 | 14 +++ .github/workflows/code_quality.yml | 13 ++ .gitignore | 1 + .pre-commit-config.yaml | 45 +++++++ .vscode/settings.json | 7 ++ README.md | 2 +- pile/__init__.py | 3 - pile/datasets/__init__.py | 2 +- pile/datasets/dm_mathematics/__init__.py | 1 - .../datasets/dm_mathematics/dm_mathematics.py | 11 +- pile/datasets/enron/__init__.py | 1 - pile/datasets/enron/enron.py | 7 +- pile/datasets/euro_parl/__init__.py | 1 - pile/datasets/euro_parl/euro_parl.py | 11 +- pile/datasets/free_law/__init__.py | 1 - pile/datasets/free_law/free_law.py | 7 +- pile/datasets/grade_school_math/__init__.py | 1 - .../grade_school_math/grade_school_math.py | 11 +- pile/datasets/nih_reporter/__init__.py | 1 - pile/datasets/nih_reporter/nih_reporter.py | 13 +- pile/datasets/phil_papers/__init__.py | 1 - pile/datasets/phil_papers/phil_papers.py | 7 +- pile/datasets/project_gutenberg/__init__.py | 1 - .../project_gutenberg/project_gutenberg.py | 11 +- pile/datasets/wikipedia/__init__.py | 1 - pile/datasets/wikipedia/wikipedia.py | 7 +- pile/file_utils.py | 9 +- pile/filtering/filter_the_stack.py | 77 ++++++++---- pile/filtering/subset_pilev2.py | 82 +++++++------ pile/processing/convert_to_parquet.py | 10 +- pile/processing/data.json | 1 + pile/processing/dedup/decontainmentation.py | 2 +- pile/processing/dedup/duy_local_dedup.py | 55 ++++++--- pile/processing/dedup/grouped_dedup.py | 62 ++++++---- pile/processing/dedup/groups.json | 2 +- pile/processing/dedup/run_grouped_dedup.sh | 2 +- pile/processing/dedup/test_grouped_dup.py | 111 +++++++++--------- pile/processing/deduplication.py | 13 +- .../processing/fix_format/fix_format_arxiv.py | 98 ++++++++-------- .../fix_format/fix_format_pubmed.py | 47 ++++---- pile/processing/local_dedup.py | 8 +- pile/processing/processing.py | 59 ++++++---- pile/processing/processing_compression.py | 26 ++-- pile/processing/requirements.txt | 6 +- pile/processing/test_datasets.py | 5 +- pile/processing/threshold_selection.py | 4 +- pile/scripts/run_group_stats.sh | 2 +- pile/stats/check_percentage_non_c4.py | 38 +++--- pile/templates.py | 32 ++--- pile/utils.py | 15 +-- pyproject.toml | 12 ++ 51 files changed, 580 insertions(+), 379 deletions(-) create mode 100644 .flake8 create mode 100644 .github/workflows/code_quality.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .vscode/settings.json create mode 100644 pyproject.toml diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..9672efc --- /dev/null +++ b/.flake8 @@ -0,0 +1,14 @@ +[flake8] +max-line-width=120 +extend-ignore = + E203 + E501 +exclude = + .git + __pycache__ + docs/source/conf.py + old + build + dist + temp +max-complexity = 10 diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml new file mode 100644 index 0000000..adfa784 --- /dev/null +++ b/.github/workflows/code_quality.yml @@ -0,0 +1,13 @@ +name: Code Quality + +on: [pull_request] + +jobs: + code-quality: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + - uses: pre-commit/action@v2.0.3 diff --git a/.gitignore b/.gitignore index 752327b..379684c 100644 --- a/.gitignore +++ b/.gitignore @@ -133,3 +133,4 @@ dmypy.json test_datasets/ ======= >>>>>>> e259a5cda662d5482a6d9115faae09ec18299969 +.DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b220617 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,45 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 + hooks: + - id: check-case-conflict + - id: check-json + - id: check-symlinks + - id: check-yaml + - id: destroyed-symlinks + - id: end-of-file-fixer + exclude: docs/CNAME + - id: fix-byte-order-marker + - id: fix-encoding-pragma + args: [--remove] + - id: mixed-line-ending + args: [--fix=lf] + - id: requirements-txt-fixer + - id: trailing-whitespace + - repo: https://github.com/psf/black + rev: 22.10.0 + hooks: + - id: black + files: ^(trlx|examples|tests|setup.py)/ + - repo: https://github.com/pycqa/isort + rev: 5.11.2 + hooks: + - id: isort + name: isort (python) + - repo: https://github.com/pycqa/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + - repo: https://github.com/codespell-project/codespell + rev: v2.2.2 + hooks: + - id: codespell + args: [--skip="data/**""] + exclude: > + (?x)^( + .*\.json|.*\.ipynb|.*\.jsonl + )$ + additional_dependencies: + - tomli diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1d927bc --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "cSpell.words": [ + "freelaw", + "levelname", + "philpapers" + ] +} diff --git a/README.md b/README.md index 412d3da..1ded309 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# pilev2 \ No newline at end of file +# pilev2 diff --git a/pile/__init__.py b/pile/__init__.py index 774b12c..929697b 100644 --- a/pile/__init__.py +++ b/pile/__init__.py @@ -1,6 +1,3 @@ -from .templates import Dataset -from .datasets import * - import logging from pathlib import Path diff --git a/pile/datasets/__init__.py b/pile/datasets/__init__.py index e0afdb6..b59b421 100644 --- a/pile/datasets/__init__.py +++ b/pile/datasets/__init__.py @@ -2,7 +2,7 @@ from .enron import EnronEmails from .euro_parl import EuroParl from .freelaw import FreeLaw -from .grade_school_math import * +from .grade_school_math import GradeSchoolMath, GradeSchoolMathNoCalc, NIHRePORTER from .philpapers import PhilPapers from .project_gutenberg import ProjectGutenberg from .wikipedia import Wikipedia diff --git a/pile/datasets/dm_mathematics/__init__.py b/pile/datasets/dm_mathematics/__init__.py index b525ef4..e69de29 100644 --- a/pile/datasets/dm_mathematics/__init__.py +++ b/pile/datasets/dm_mathematics/__init__.py @@ -1 +0,0 @@ -from .dm_mathematics import DMMathematics diff --git a/pile/datasets/dm_mathematics/dm_mathematics.py b/pile/datasets/dm_mathematics/dm_mathematics.py index 13d9cb9..1150342 100644 --- a/pile/datasets/dm_mathematics/dm_mathematics.py +++ b/pile/datasets/dm_mathematics/dm_mathematics.py @@ -1,14 +1,15 @@ import logging -from ...templates import Dataset -from ...file_utils import stream_jsonl, stream_jsonl_zst from pathlib import Path +from ...file_utils import stream_jsonl, stream_jsonl_zst +from ...templates import Dataset + logger = logging.getLogger(__name__) class DMMathematics(Dataset): name = "DeepMind Mathematics Dataset" - + license = "MIT License" urls = [""] @@ -29,7 +30,9 @@ def paths(self): yield path def examples(self): - return list(stream_jsonl(Path(__file__).parent / "dm_mathematics_examples.jsonl")) + return list( + stream_jsonl(Path(__file__).parent / "dm_mathematics_examples.jsonl") + ) def size_on_disk(self): return -1 diff --git a/pile/datasets/enron/__init__.py b/pile/datasets/enron/__init__.py index 5a0bec7..e69de29 100644 --- a/pile/datasets/enron/__init__.py +++ b/pile/datasets/enron/__init__.py @@ -1 +0,0 @@ -from .enron import EnronEmails diff --git a/pile/datasets/enron/enron.py b/pile/datasets/enron/enron.py index 8c1b16d..27bc82d 100644 --- a/pile/datasets/enron/enron.py +++ b/pile/datasets/enron/enron.py @@ -1,14 +1,15 @@ import logging -from ...templates import Dataset -from ...file_utils import stream_jsonl, stream_jsonl_zst from pathlib import Path +from ...file_utils import stream_jsonl, stream_jsonl_zst +from ...templates import Dataset + logger = logging.getLogger(__name__) class EnronEmails(Dataset): name = "Enron Emails" - + license = "Public domain" urls = ["http://eaidata.bmk.sh/data/enron_emails.jsonl.zst"] diff --git a/pile/datasets/euro_parl/__init__.py b/pile/datasets/euro_parl/__init__.py index df700b1..e69de29 100644 --- a/pile/datasets/euro_parl/__init__.py +++ b/pile/datasets/euro_parl/__init__.py @@ -1 +0,0 @@ -from .euro_parl import EuroParl diff --git a/pile/datasets/euro_parl/euro_parl.py b/pile/datasets/euro_parl/euro_parl.py index c79b6dd..0b2d6e7 100644 --- a/pile/datasets/euro_parl/euro_parl.py +++ b/pile/datasets/euro_parl/euro_parl.py @@ -1,17 +1,20 @@ import logging -from ...templates import Dataset -from ...file_utils import stream_jsonl, stream_jsonl_zst from pathlib import Path +from ...file_utils import stream_jsonl, stream_jsonl_zst +from ...templates import Dataset + logger = logging.getLogger(__name__) class EnronEmails(Dataset): name = "EuroParl" - + license = "Except where otherwise indicated, reproduction is authorised, provided that the source is acknowledged" - urls = ["https://the-eye.eu/public/AI/pile_preliminary_components/EuroParliamentProceedings_1996_2011.jsonl.zst"] + urls = [ + "https://the-eye.eu/public/AI/pile_preliminary_components/EuroParliamentProceedings_1996_2011.jsonl.zst" + ] checksum = "6111400e7b7f75ce91fed1b5fc0a3630b8263217bd01ce75f7d8701f26ac0e98" diff --git a/pile/datasets/free_law/__init__.py b/pile/datasets/free_law/__init__.py index 9de9eb3..e69de29 100644 --- a/pile/datasets/free_law/__init__.py +++ b/pile/datasets/free_law/__init__.py @@ -1 +0,0 @@ -from .free_law import FreeLaw diff --git a/pile/datasets/free_law/free_law.py b/pile/datasets/free_law/free_law.py index bf97072..7f277ae 100644 --- a/pile/datasets/free_law/free_law.py +++ b/pile/datasets/free_law/free_law.py @@ -1,14 +1,15 @@ import logging -from ...templates import Dataset -from ...file_utils import stream_jsonl, stream_jsonl_zst from pathlib import Path +from ...file_utils import stream_jsonl, stream_jsonl_zst +from ...templates import Dataset + logger = logging.getLogger(__name__) class FreeLaw(Dataset): name = "FreeLaw Project" - + license = "BSD 2-Clause License" urls = ["http://eaidata.bmk.sh/data/FreeLaw_Opinions.jsonl.zst"] diff --git a/pile/datasets/grade_school_math/__init__.py b/pile/datasets/grade_school_math/__init__.py index e04f399..e69de29 100644 --- a/pile/datasets/grade_school_math/__init__.py +++ b/pile/datasets/grade_school_math/__init__.py @@ -1 +0,0 @@ -from .grade_school_math import GradeSchoolMath, GradeSchoolMathNoCalc diff --git a/pile/datasets/grade_school_math/grade_school_math.py b/pile/datasets/grade_school_math/grade_school_math.py index 1e28386..dae0226 100644 --- a/pile/datasets/grade_school_math/grade_school_math.py +++ b/pile/datasets/grade_school_math/grade_school_math.py @@ -1,9 +1,10 @@ import logging -from ...templates import Dataset -from ...file_utils import stream_jsonl, dump_jsonl -from ...utils import download, mark_done, done_path, sha256sum -from pathlib import Path import re +from pathlib import Path + +from ...file_utils import dump_jsonl, stream_jsonl +from ...templates import Dataset +from ...utils import done_path, download, mark_done, sha256sum logger = logging.getLogger(__name__) @@ -60,7 +61,7 @@ def replicate(self): question_answer_to_pile_format(qa) for qa in stream_jsonl(out_path) ] if self.remove_calculator_strings: - # calculator strings in this dataset are always inbetween << and >> + # calculator strings in this dataset are always in between << and >> # they are there so that the model can indicate when to outsource to a calculator, # but we might not always want this behavior # below removes them diff --git a/pile/datasets/nih_reporter/__init__.py b/pile/datasets/nih_reporter/__init__.py index b68269c..e69de29 100644 --- a/pile/datasets/nih_reporter/__init__.py +++ b/pile/datasets/nih_reporter/__init__.py @@ -1 +0,0 @@ -from .nih_reporter import NIHRePORTER diff --git a/pile/datasets/nih_reporter/nih_reporter.py b/pile/datasets/nih_reporter/nih_reporter.py index b47647b..2aa0aa4 100644 --- a/pile/datasets/nih_reporter/nih_reporter.py +++ b/pile/datasets/nih_reporter/nih_reporter.py @@ -1,17 +1,22 @@ import logging -from ...templates import Dataset -from ...file_utils import stream_jsonl, stream_jsonl_zst from pathlib import Path +from ...file_utils import stream_jsonl, stream_jsonl_zst +from ...templates import Dataset + logger = logging.getLogger(__name__) class NIHRePORTER(Dataset): name = "National Institute of Health RePORTER" - + license = "Public domain" - urls = ["https://mystic.the-eye.eu/public/AI/pile_v2/data/nih_reporter.jsonl.zst ", "http://eaidata.bmk.sh/data/pile_v2/NIH_ExPORTER_awarded_grant_text.jsonl.zst", "https://drive.google.com/file/d/1Sz9mFTPFa4ePYHy0AOSajUtKgVyk9mTg/view?usp=sharing"] + urls = [ + "https://mystic.the-eye.eu/public/AI/pile_v2/data/nih_reporter.jsonl.zst ", + "http://eaidata.bmk.sh/data/pile_v2/NIH_ExPORTER_awarded_grant_text.jsonl.zst", + "https://drive.google.com/file/d/1Sz9mFTPFa4ePYHy0AOSajUtKgVyk9mTg/view?usp=sharing", + ] checksum = "0db76318737fda6c2a2484b809bb53e9e42952c284c0bf2b8862e8428e154833" diff --git a/pile/datasets/phil_papers/__init__.py b/pile/datasets/phil_papers/__init__.py index 1ce0c53..e69de29 100644 --- a/pile/datasets/phil_papers/__init__.py +++ b/pile/datasets/phil_papers/__init__.py @@ -1 +0,0 @@ -from .phil_papers import PhilPapers diff --git a/pile/datasets/phil_papers/phil_papers.py b/pile/datasets/phil_papers/phil_papers.py index 9dc964d..e8c0091 100644 --- a/pile/datasets/phil_papers/phil_papers.py +++ b/pile/datasets/phil_papers/phil_papers.py @@ -1,14 +1,15 @@ import logging -from ...templates import Dataset -from ...file_utils import stream_jsonl, stream_jsonl_zst from pathlib import Path +from ...file_utils import stream_jsonl, stream_jsonl_zst +from ...templates import Dataset + logger = logging.getLogger(__name__) class PhilPapers(Dataset): name = "PhilPapers" - + license = "Open Access" urls = ["http://eaidata.bmk.sh/data/phil_papers.jsonl.zst"] diff --git a/pile/datasets/project_gutenberg/__init__.py b/pile/datasets/project_gutenberg/__init__.py index e8c2c8a..e69de29 100644 --- a/pile/datasets/project_gutenberg/__init__.py +++ b/pile/datasets/project_gutenberg/__init__.py @@ -1 +0,0 @@ -from .project_gutenberg import ProjectGutenberg diff --git a/pile/datasets/project_gutenberg/project_gutenberg.py b/pile/datasets/project_gutenberg/project_gutenberg.py index 687806f..5d59ba7 100644 --- a/pile/datasets/project_gutenberg/project_gutenberg.py +++ b/pile/datasets/project_gutenberg/project_gutenberg.py @@ -1,14 +1,15 @@ import logging -from ...templates import Dataset -from ...file_utils import stream_jsonl, stream_jsonl_zst from pathlib import Path +from ...file_utils import stream_jsonl, stream_jsonl_zst +from ...templates import Dataset + logger = logging.getLogger(__name__) class ProjectGutenberg(Dataset): name = "Project Gutenberg" - + license = "Public domain" urls = [""] @@ -29,7 +30,9 @@ def paths(self): yield path def examples(self): - return list(stream_jsonl(Path(__file__).parent / "project_gutenberg_examples.jsonl")) + return list( + stream_jsonl(Path(__file__).parent / "project_gutenberg_examples.jsonl") + ) def size_on_disk(self): return -1 diff --git a/pile/datasets/wikipedia/__init__.py b/pile/datasets/wikipedia/__init__.py index 15b9dd4..e69de29 100644 --- a/pile/datasets/wikipedia/__init__.py +++ b/pile/datasets/wikipedia/__init__.py @@ -1 +0,0 @@ -from .wikipedia import Wikipedia diff --git a/pile/datasets/wikipedia/wikipedia.py b/pile/datasets/wikipedia/wikipedia.py index 454c01e..375d3ab 100644 --- a/pile/datasets/wikipedia/wikipedia.py +++ b/pile/datasets/wikipedia/wikipedia.py @@ -1,14 +1,15 @@ import logging -from ...templates import Dataset -from ...file_utils import stream_jsonl, stream_jsonl_zst from pathlib import Path +from ...file_utils import stream_jsonl, stream_jsonl_zst +from ...templates import Dataset + logger = logging.getLogger(__name__) class Wikipedia(Dataset): name = "Wikipedia" - + license = "" urls = [""] diff --git a/pile/file_utils.py b/pile/file_utils.py index ec6f05d..4f12112 100644 --- a/pile/file_utils.py +++ b/pile/file_utils.py @@ -1,9 +1,10 @@ -from typing import Union, List -from pathlib import Path -import logging +import io import json +import logging +from pathlib import Path +from typing import List, Union + import zstandard as zstd -import io logger = logging.getLogger(__name__) diff --git a/pile/filtering/filter_the_stack.py b/pile/filtering/filter_the_stack.py index 813f06e..b2546a8 100644 --- a/pile/filtering/filter_the_stack.py +++ b/pile/filtering/filter_the_stack.py @@ -1,40 +1,75 @@ -from datasets import load_from_disk,load_dataset -import multiprocessing as mp -import os -import logging -from tqdm import tqdm -from transformers import GPTNeoXTokenizerFast import argparse -import time +import logging import pathlib -import ast -import IPython.nbconvert as nbconvert +import time +from datasets import load_from_disk logging.basicConfig( - level = logging.INFO, - format= '%(asctime)s %(levelname)-8s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - filename=f"pile/logs/the_stack_{time.time()}.log") + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + filename=f"pile/logs/the_stack_{time.time()}.log", +) logger = logging.getLogger(__name__) -FLAG_LANG_LIST = ["HTML","JSON","JSON5","JSONLD","JSONiq","Java Server Pages","YAML","XML","HTTP","CSV","SVG","Cython","Diff","Groff","Groovy Server Pages","INI","Inno Setup","LabVIEW","Less","Logos","MTML","Modelica","Module Management System","NSIS","Nginx","NumPy","ObjDump","Parrot Internal Representation","Protocol Buffer","Pure Data","Python traceback","RHTML","SCSS","Sass","ShellSession","Unity3D Asset","XPages","Xojo","desktop"] -FLAG_EXT_FLAT = [".aux",".mkiv",".mkvi",".sty",".toc"] +FLAG_LANG_LIST = [ + "HTML", + "JSON", + "JSON5", + "JSONLD", + "JSONiq", + "Java Server Pages", + "YAML", + "XML", + "HTTP", + "CSV", + "SVG", + "Cython", + "Diff", + "Groff", + "Groovy Server Pages", + "INI", + "Inno Setup", + "LabVIEW", + "Less", + "Logos", + "MTML", + "Modelica", + "Module Management System", + "NSIS", + "Nginx", + "NumPy", + "ObjDump", + "Parrot Internal Representation", + "Protocol Buffer", + "Pure Data", + "Python traceback", + "RHTML", + "SCSS", + "Sass", + "ShellSession", + "Unity3D Asset", + "XPages", + "Xojo", + "desktop", +] +FLAG_EXT_FLAT = [".aux", ".mkiv", ".mkvi", ".sty", ".toc"] + -def filter_flat_langs(example:dict)->bool: +def filter_flat_langs(example: dict) -> bool: if "lang" in example and "ext" in example: - if example["lang"] not in FLAG_LANG_LIST and example["ext"] not in FLAG_EXT_FLAT: + if ( + example["lang"] not in FLAG_LANG_LIST + and example["ext"] not in FLAG_EXT_FLAT + ): return True else: return False else: return False -def convert_to_py(example:dict): - content = example["content"] - - pass if __name__ == "__main__": parser = argparse.ArgumentParser() diff --git a/pile/filtering/subset_pilev2.py b/pile/filtering/subset_pilev2.py index 028e866..d84ef9c 100644 --- a/pile/filtering/subset_pilev2.py +++ b/pile/filtering/subset_pilev2.py @@ -1,56 +1,62 @@ -from datasets import load_from_disk -import multiprocessing as mp -import os -import logging -from tqdm import tqdm import argparse -import time +import json +import logging +import os import pathlib import random -import json +import time + +from datasets import load_from_disk +from tqdm import tqdm logging.basicConfig( - level = logging.INFO, - format= '%(asctime)s %(levelname)-8s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - filename=f"pile/logs/the_pile_subset_{time.time()}.log") + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + filename=f"pile/logs/the_pile_subset_{time.time()}.log", +) logger = logging.getLogger(__name__) -def subset_dataset_fn(dataset,percentage:float=10.0): + +def subset_dataset_fn(dataset, percentage: float = 10.0): if len(dataset) > 10: - percentage_total = int(len(dataset)*percentage/100) - idxs = random.sample(range(len(dataset)),percentage_total) - + percentage_total = int(len(dataset) * percentage / 100) + idxs = random.sample(range(len(dataset)), percentage_total) + total_len = len(dataset) dataset = dataset.select(idxs) - #idxs to dataset column - print(f"Slicing 10% which is {percentage_total} | {len(dataset)} out of {total_len}") - logger.info(f"Slicing 10% which is {percentage_total} | {len(dataset)} out of {total_len}") - #dataset.add_column("subset_idx",idxs) This takes painfully too long So making a quick replacement. + # idxs to dataset column + print( + f"Slicing 10% which is {percentage_total} | {len(dataset)} out of {total_len}" + ) + logger.info( + f"Slicing 10% which is {percentage_total} | {len(dataset)} out of {total_len}" + ) + # dataset.add_column("subset_idx",idxs) This takes painfully too long So making a quick replacement. print(dataset) logger.info(dataset) - return dataset,idxs + return dataset, idxs else: - return dataset,[] + return dataset, [] -def write_to_json(path,list_of_inds:list): - with open(os.path.join(path,"subset_indices.json"),"w") as f: - json.dump({"idx" : list_of_inds},f) +def write_to_json(path, list_of_inds: list): + with open(os.path.join(path, "subset_indices.json"), "w") as f: + json.dump({"idx": list_of_inds}, f) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--input_dir", type=str) parser.add_argument("--output_dir", type=str) - parser.add_argument("--percentage",type=float,default=10.0) + parser.add_argument("--percentage", type=float, default=10.0) parser.add_argument("--num_workers", type=int, default=128) args = parser.parse_args() input_sub_dirs = os.listdir(args.input_dir) logger.info("Loading dataset") - for sub_dir in tqdm(input_sub_dirs,leave=False): + for sub_dir in tqdm(input_sub_dirs, leave=False): try: if sub_dir != "non_local_dedup": path_sub_dir = os.path.join(args.input_dir, sub_dir) @@ -59,34 +65,38 @@ def write_to_json(path,list_of_inds:list): dataset = load_from_disk(path_sub_dir) logger.info(f"Loaded dataset from {path_sub_dir}") print(f"Loaded dataset from {path_sub_dir}") - subset_dataset,subset_idx = subset_dataset_fn(dataset,percentage=10.0) + subset_dataset, subset_idx = subset_dataset_fn(dataset, percentage=10.0) logger.info(f"Subsetting dataset to {args.percentage}%") logger.info("Saving dataset") output_dir = pathlib.Path(os.path.join(args.output_dir, sub_dir)) output_dir.mkdir(parents=True, exist_ok=True) subset_dataset.save_to_disk(args.output_dir) - write_to_json(output_dir,subset_idx) + write_to_json(output_dir, subset_idx) logger.info(f"Saved dataset at {args.output_dir}") else: - logger.info(f"The subdir is non_local_dedup") - print(f"The subdir is non_local_dedup") - non_local_sub_dirs = os.listdir(os.path.join(args.input_dir,sub_dir)) - for non_local_sub_dir in tqdm(non_local_sub_dirs,leave=False): - path_non_local_sub_dir = os.path.join(args.input_dir,sub_dir, non_local_sub_dir) + logger.info("The subdir is non_local_dedup") + print("The subdir is non_local_dedup") + non_local_sub_dirs = os.listdir(os.path.join(args.input_dir, sub_dir)) + for non_local_sub_dir in tqdm(non_local_sub_dirs, leave=False): + path_non_local_sub_dir = os.path.join( + args.input_dir, sub_dir, non_local_sub_dir + ) print(path_sub_dir) logger.info(path_sub_dir) dataset = load_from_disk(path_non_local_sub_dir) logger.info(f"Loaded dataset from {path_non_local_sub_dir}") print(f"Loaded dataset from {path_non_local_sub_dir}") - subset_dataset = subset_dataset(dataset,percentage=10.0) + subset_dataset = subset_dataset(dataset, percentage=10.0) logger.info(f"Subsetting dataset to {args.percentage}%") logger.info("Saving dataset") - output_dir = pathlib.Path(os.path.join(args.output_dir,sub_dir ,non_local_sub_dir)) + output_dir = pathlib.Path( + os.path.join(args.output_dir, sub_dir, non_local_sub_dir) + ) output_dir.mkdir(parents=True, exist_ok=True) subset_dataset.save_to_disk(args.output_dir) - write_to_json(output_dir,subset_idx) + write_to_json(output_dir, subset_idx) logger.info(f"Saved dataset at {args.output_dir}") except PermissionError: logger.info(f"Permission Error at {sub_dir}") diff --git a/pile/processing/convert_to_parquet.py b/pile/processing/convert_to_parquet.py index 4899541..99f0dd4 100644 --- a/pile/processing/convert_to_parquet.py +++ b/pile/processing/convert_to_parquet.py @@ -1,17 +1,17 @@ import argparse -import datasets import pathlib +import datasets if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--dataset_path",type=str) - parser.add_argument("--output_dir",type=str) + parser.add_argument("--dataset_path", type=str) + parser.add_argument("--output_dir", type=str) args = parser.parse_args() dataset = datasets.load_from_disk(args.dataset_path) output_dir = pathlib.Path(args.output_dir) - output_dir.mkdir(parents=True,exist_ok=True) + output_dir.mkdir(parents=True, exist_ok=True) output_file_path = output_dir / "dataset.parquet" - dataset.to_parquet(output_file_path) \ No newline at end of file + dataset.to_parquet(output_file_path) diff --git a/pile/processing/data.json b/pile/processing/data.json index e69de29..0967ef4 100644 --- a/pile/processing/data.json +++ b/pile/processing/data.json @@ -0,0 +1 @@ +{} diff --git a/pile/processing/dedup/decontainmentation.py b/pile/processing/dedup/decontainmentation.py index bdb40ff..3f7b7ea 100644 --- a/pile/processing/dedup/decontainmentation.py +++ b/pile/processing/dedup/decontainmentation.py @@ -266,4 +266,4 @@ dataset = load_dataset("bigcode/the-stack-smol", data_dir="data/python", split="train") # clean the dataset -cleaned_dataset = cleaner.clean(dataset, column="content") \ No newline at end of file +cleaned_dataset = cleaner.clean(dataset, column="content") diff --git a/pile/processing/dedup/duy_local_dedup.py b/pile/processing/dedup/duy_local_dedup.py index df727e0..701f2a5 100644 --- a/pile/processing/dedup/duy_local_dedup.py +++ b/pile/processing/dedup/duy_local_dedup.py @@ -1,4 +1,3 @@ -from datasets import load_dataset, load_from_disk, Dataset import json import multiprocessing as mp import re @@ -6,12 +5,11 @@ from functools import partial from typing import Dict, List, Optional, Set, Tuple, Type -from datasets import Dataset -from tqdm import tqdm -import os +import cython +from datasets import Dataset, concatenate_datasets, load_from_disk from datasketch import MinHash, MinHashLSH from dpu_utils.utils.iterators import ThreadedIterator - +from tqdm import tqdm NON_ALPHA = re.compile("[^A-Za-z_0-9]") # parameters used in DuplicationIndex @@ -23,6 +21,7 @@ # name of the "text" column used in deduplication CONTENT = "text" + def get_min_hash(tokens: List[str]) -> Optional[MinHash]: """Compute the MinHash of a code snippet.""" if len(tokens) < MIN_NUM_TOKENS: @@ -46,7 +45,9 @@ def __init__( ): self._duplication_jaccard_threshold = duplication_jaccard_threshold self._num_perm = NUM_PERM - self._index = MinHashLSH(threshold=self._duplication_jaccard_threshold, num_perm=self._num_perm) + self._index = MinHashLSH( + threshold=self._duplication_jaccard_threshold, num_perm=self._num_perm + ) self._duplicate_clusters = defaultdict(set) @@ -58,7 +59,7 @@ def add(self, code_key: Tuple, min_hash: MinHash) -> None: Args: code_key (Tuple of (index, repo_name, path)): - Theoritically any hasbale key. Here we use a tuple to retrieve the information later. + Theoretically any hashtale key. Here we use a tuple to retrieve the information later. min_hash: MinHash of the code_key. """ close_duplicates = self._index.query(min_hash) @@ -98,12 +99,13 @@ def save(self, filepath) -> None: with open(filepath, "w") as f: json.dump(duplicate_clusters, f) -import cython @cython.cfunc def _compute_min_hash(element): index, data = element - min_hash = get_min_hash([t for t in NON_ALPHA.split(data[CONTENT]) if len(t.strip()) > 0]) + min_hash = get_min_hash( + [t for t in NON_ALPHA.split(data[CONTENT]) if len(t.strip()) > 0] + ) if min_hash is not None: return (index, data[PATH_COLUMN]), min_hash @@ -128,7 +130,11 @@ def make_duplicate_clusters(dataset_iterator: Type[Dataset], jaccard_threshold: """ di = DuplicationIndex(duplication_jaccard_threshold=jaccard_threshold) - for filename, min_hash in tqdm(ThreadedIterator(minhash_iter(enumerate(dataset_iterator)), max_queue_size=10000)): + for filename, min_hash in tqdm( + ThreadedIterator( + minhash_iter(enumerate(dataset_iterator)), max_queue_size=10000 + ) + ): di.add(filename, min_hash) # Returns a List[Cluster] where Cluster is List[str] with the filenames. @@ -252,14 +258,18 @@ def deduplicate_dataset( >>> ds_dedup, duplicate_clusters = deduplicate_dataset(ds, jaccard_threshold=0.85) """ duplicate_clusters = make_duplicate_clusters(dataset, jaccard_threshold) - duplicate_indices = set(x["base_index"] for cluster in duplicate_clusters for x in cluster) + duplicate_indices = set( + x["base_index"] for cluster in duplicate_clusters for x in cluster + ) extreme_dict = {} extremes_clusters = find_extremes(duplicate_clusters, dataset, jaccard_threshold) for extremes in extremes_clusters: for element in extremes: extreme_dict[element["base_index"]] = element remove_indices = duplicate_indices - set(extreme_dict.keys()) - ds_filter = dataset.filter(lambda x, idx: idx not in remove_indices, with_indices=True) + ds_filter = dataset.filter( + lambda x, idx: idx not in remove_indices, with_indices=True + ) # update duplicate_clusters for cluster in duplicate_clusters: @@ -275,14 +285,27 @@ def deduplicate_dataset( print(f"Filtered dataset size: {len(ds_filter)}") return ds_filter, duplicate_clusters -from datasets import concatenate_datasets + + data1 = load_from_disk("PileV2Reddit2020_ver2/PileV2Reddit2020_0") -data1 = data1.remove_columns(['check_char_repetition_criteria', 'check_flagged_words_criteria', 'check_stop_word_ratio_criteria']) +data1 = data1.remove_columns( + [ + "check_char_repetition_criteria", + "check_flagged_words_criteria", + "check_stop_word_ratio_criteria", + ] +) print(data1) data2 = load_from_disk("PileV2Reddit2020_ver2/PileV2Reddit2020_1") -data2 = data2.remove_columns(['check_char_repetition_criteria', 'check_flagged_words_criteria', 'check_stop_word_ratio_criteria']) +data2 = data2.remove_columns( + [ + "check_char_repetition_criteria", + "check_flagged_words_criteria", + "check_stop_word_ratio_criteria", + ] +) print(data2) -data = concatenate_datasets([data1, data2]) +data = concatenate_datasets([data1, data2]) print(data) dedup_pilev2 = deduplicate_dataset(data)[0] diff --git a/pile/processing/dedup/grouped_dedup.py b/pile/processing/dedup/grouped_dedup.py index d46b7b4..ccd4908 100644 --- a/pile/processing/dedup/grouped_dedup.py +++ b/pile/processing/dedup/grouped_dedup.py @@ -1,12 +1,11 @@ -from datasets import load_dataset, load_from_disk, Dataset, concatenate_datasets #!/usr/bin/env python -# -*- coding: utf-8 -*- # author : Chenghao Mou (mouchenghao@gmail.com) # created : 10/4/22 from __future__ import annotations import gc import hashlib +import json import logging import multiprocessing as mp import os @@ -18,18 +17,15 @@ from collections import defaultdict from itertools import tee from pathlib import Path -from typing import Any -from typing import Dict -from typing import Iterable -from typing import List -from typing import Tuple +from typing import Any, Dict, Iterable, List, Tuple + +from datasets import concatenate_datasets, load_from_disk with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) import datasets import numpy as np import typer - from datasets import load_dataset from scipy.integrate import quad as integrate from tqdm import tqdm @@ -116,9 +112,13 @@ def embed_func( """ hashvalues = np.ones(num_perm, dtype=np.uint64) * MAX_HASH tokens = {" ".join(t) for t in ngrams(NON_ALPHA.split(content), ngram_size)} - hv = np.array([sha1_hash32(token.encode("utf-8")) for token in tokens], dtype=np.uint64) # noqa: E501 + hv = np.array( + [sha1_hash32(token.encode("utf-8")) for token in tokens], dtype=np.uint64 + ) # noqa: E501 a, b = permutations - phv = np.bitwise_and(((hv * np.tile(a, (len(hv), 1)).T).T + b) % MERSENNE_PRIME, MAX_HASH) # noqa: E501 + phv = np.bitwise_and( + ((hv * np.tile(a, (len(hv), 1)).T).T + b) % MERSENNE_PRIME, MAX_HASH + ) # noqa: E501 hashvalues = np.vstack([phv, hashvalues]).min(axis=0) Hs = [bytes(hashvalues[start:end].byteswap().data) for start, end in hashranges] return {"__signatures__": Hs, "__id__": idx} @@ -200,10 +200,16 @@ def union(self, x, y): self.parent[px] = self.parent[py] = min(px, py) -if __name__ == "__main__": +if __name__ == "__main__": # noqa: + def run( - dataset_path: str = typer.Option("codeparrot/codeparrot-clean-valid", help="The dataset to use"), # noqa: E501 - grouped_dup_path: str = typer.Option("codeparrot/codeparrot-clean-valid-dup", help="The grouped duplicates to use"), # noqa: E501 + dataset_path: str = typer.Option( + "codeparrot/codeparrot-clean-valid", help="The dataset to use" + ), # noqa: E501 + grouped_dup_path: str = typer.Option( + "codeparrot/codeparrot-clean-valid-dup", + help="The grouped duplicates to use", + ), # noqa: E501 column: str = typer.Option("content", help="Dataset column"), ngram_size: int = typer.Option(5, help="The ngram size to use for MinHash"), num_perm: int = typer.Option(256, help="Number of permutations"), @@ -217,7 +223,7 @@ def run( logging.basicConfig(level=logging.INFO) - with open (grouped_dup_path, "r") as f: + with open(grouped_dup_path, "r") as f: data = json.load(f) in_common = data.pop("common_group") for group_name in ["group_1", "group_2"]: @@ -233,9 +239,9 @@ def run( ds = load_from_disk(ds_path) ds = ds.remove_columns( [ - 'check_char_repetition_criteria', - 'check_flagged_words_criteria', - 'check_stop_word_ratio_criteria' + "check_char_repetition_criteria", + "check_flagged_words_criteria", + "check_stop_word_ratio_criteria", ] ) group.append(ds_path) @@ -244,9 +250,9 @@ def run( ds = load_from_disk(ds_path) ds = ds.remove_columns( [ - 'check_char_repetition_criteria', - 'check_flagged_words_criteria', - 'check_stop_word_ratio_criteria' + "check_char_repetition_criteria", + "check_flagged_words_criteria", + "check_stop_word_ratio_criteria", ] ) group.append(ds_path) @@ -284,7 +290,9 @@ def run( time_measures["clustering"] = time.time() batch_size: int = 10000 for i in tqdm( - range(0, len(embedded), batch_size), dynamic_ncols=True, desc="Iterating MinHashes..." # noqa: E501 + range(0, len(embedded), batch_size), + dynamic_ncols=True, + desc="Iterating MinHashes...", # noqa: E501 ): batch = embedded[i : i + batch_size] for key, Hs in zip(batch["__id__"], batch["__signatures__"]): @@ -337,9 +345,15 @@ def run( logger.info( f"{'Data Number (after)':<{PAD}}: {FINAL_DATA_SIZE} ({FINAL_DATA_SIZE / DATA_SIZE:.2%})" # noqa: E501 ) - logger.info(f"{'Duplicate Number':<{PAD}}: {DUP_SIZE} ({DUP_SIZE / DATA_SIZE:.2%})") # noqa: E501 - logger.info(f"{'Total Time':<{PAD}}: {time.time() - start_time:.2f} seconds") - logger.info(f"{'Deduplicated Dataset':<{PAD}}: {os.path.join(output, group_name)}") + logger.info( + f"{'Duplicate Number':<{PAD}}: {DUP_SIZE} ({DUP_SIZE / DATA_SIZE:.2%})" + ) # noqa: E501 + logger.info( + f"{'Total Time':<{PAD}}: {time.time() - start_time:.2f} seconds" + ) + logger.info( + f"{'Deduplicated Dataset':<{PAD}}: {os.path.join(output, group_name)}" + ) logger.info("🤗 Happy Deduplicating 🤗") mp.set_start_method("fork", force=True) diff --git a/pile/processing/dedup/groups.json b/pile/processing/dedup/groups.json index 3d48efe..13467c1 100644 --- a/pile/processing/dedup/groups.json +++ b/pile/processing/dedup/groups.json @@ -39,4 +39,4 @@ "CodePileReddit2022_ver2","PileV2Posts_ver2", "CodePilePosts_ver2", "ASFPublicMail_ver2", "StackExchange_ver4", "USENET_ver2", "Discourse_ver2", "GithubIssues_ver2", "Opensubtitles", "DevDocs"], "non_dedup": ["DMMath", "CPDataset", "AI4Code", "GithubDiff_ver2", "GitHub Code", "AMPS", "TED2020", "GNOME", "SODA"], "common_group": ["DKPRO C4"] -} \ No newline at end of file +} diff --git a/pile/processing/dedup/run_grouped_dedup.sh b/pile/processing/dedup/run_grouped_dedup.sh index 23bfaca..e9d80db 100644 --- a/pile/processing/dedup/run_grouped_dedup.sh +++ b/pile/processing/dedup/run_grouped_dedup.sh @@ -4,4 +4,4 @@ DATA_PATH=~/pilev2_data/pilev2_local_dedup/ GROUPED_DUPS=./groups.json OUTPUT_PATH=./deduped/ -python ./grouped_dup.py --dataset_path $DATA_PATH --grouped_dup_path $GROUPED_DUPS --output_path $OUTPUT_PATH \ No newline at end of file +python ./grouped_dup.py --dataset_path $DATA_PATH --grouped_dup_path $GROUPED_DUPS --output_path $OUTPUT_PATH diff --git a/pile/processing/dedup/test_grouped_dup.py b/pile/processing/dedup/test_grouped_dup.py index 42e74d1..8db9585 100644 --- a/pile/processing/dedup/test_grouped_dup.py +++ b/pile/processing/dedup/test_grouped_dup.py @@ -2,67 +2,68 @@ import json import os -# print number of cores -print(os.cpu_count()) - from datasets import concatenate_datasets, load_from_disk -parser = argparse.ArgumentParser() -parser.add_argument("--dataset_path", type=str, required=True) -parser.add_argument("--grouped_dup_path", type=str, required=True) -parser.add_argument("--output_path", type=str, required=True) -args = parser.parse_args() +if __name__ == "__main__": + # print number of cores + print(os.cpu_count()) + + parser = argparse.ArgumentParser() + parser.add_argument("--dataset_path", type=str, required=True) + parser.add_argument("--grouped_dup_path", type=str, required=True) + parser.add_argument("--output_path", type=str, required=True) + args = parser.parse_args() + + with open(args.grouped_dup_path, "r") as f: + data = json.load(f) -with open (args.grouped_dup_path, "r") as f: - data = json.load(f) + print(data) + in_common = data.pop("common_group") + for group_name in ["group_1", "group_2"]: + # concatenate_datasets([data1, data2]) + group = [] + for name in data[group_name][:2]: + ds_path = os.path.join(args.dataset_path, name) + ds = load_from_disk(ds_path) + ds = ds.remove_columns( + [ + "check_char_repetition_criteria", + "check_flagged_words_criteria", + "check_stop_word_ratio_criteria", + ] + ) + group.append(ds_path) + for name in in_common: + ds_path = os.path.join(args.dataset_path, name) + ds = load_from_disk(ds_path) + ds = ds.remove_columns( + [ + "check_char_repetition_criteria", + "check_flagged_words_criteria", + "check_stop_word_ratio_criteria", + ] + ) + group.append(ds_path) + ds = concatenate_datasets(group) -print(data) -in_common = data.pop("common_group") -for group_name in ["group_1", "group_2"]: - # concatenate_datasets([data1, data2]) - group = [] - for name in data[group_name][:2]: - ds_path = os.path.join(args.dataset_path, name) - ds = load_from_disk(ds_path) - ds = ds.remove_columns( - [ - 'check_char_repetition_criteria', - 'check_flagged_words_criteria', - 'check_stop_word_ratio_criteria' - ] - ) - group.append(ds_path) - for name in in_common: - ds_path = os.path.join(args.dataset_path, name) - ds = load_from_disk(ds_path) - ds = ds.remove_columns( - [ - 'check_char_repetition_criteria', - 'check_flagged_words_criteria', - 'check_stop_word_ratio_criteria' - ] - ) - group.append(ds_path) - ds = concatenate_datasets(group) + # print(ds_path) - # print(ds_path) + # group.append(ds_path) - # group.append(ds_path) - - # ds = concatenate_datasets(group) + # ds = concatenate_datasets(group) -# data1 = load_from_disk("PileV2Reddit2020_ver2/PileV2Reddit2020_0") -# data1 = data1.remove_columns(['check_char_repetition_criteria', 'check_flagged_words_criteria', 'check_stop_word_ratio_criteria']) -# print(data1) -# data2 = load_from_disk("PileV2Reddit2020_ver2/PileV2Reddit2020_1") -# data2 = data2.remove_columns(['check_char_repetition_criteria', 'check_flagged_words_criteria', 'check_stop_word_ratio_criteria']) -# print(data2) -# data = concatenate_datasets([data1, data2]) -# print(data) + # data1 = load_from_disk("PileV2Reddit2020_ver2/PileV2Reddit2020_0") + # data1 = data1.remove_columns(['check_char_repetition_criteria', 'check_flagged_words_criteria', 'check_stop_word_ratio_criteria']) + # print(data1) + # data2 = load_from_disk("PileV2Reddit2020_ver2/PileV2Reddit2020_1") + # data2 = data2.remove_columns(['check_char_repetition_criteria', 'check_flagged_words_criteria', 'check_stop_word_ratio_criteria']) + # print(data2) + # data = concatenate_datasets([data1, data2]) + # print(data) -# dedup_pilev2 = deduplicate_dataset(data)[0] -# dedup_pilev2.save_to_disk("local_dedup/PileV2Reddit2020_ver2") + # dedup_pilev2 = deduplicate_dataset(data)[0] + # dedup_pilev2.save_to_disk("local_dedup/PileV2Reddit2020_ver2") -# for group in data: -# for ds in group: -# print(ds) \ No newline at end of file + # for group in data: + # for ds in group: + # print(ds) diff --git a/pile/processing/deduplication.py b/pile/processing/deduplication.py index 7bc727a..a051bf6 100644 --- a/pile/processing/deduplication.py +++ b/pile/processing/deduplication.py @@ -1,11 +1,10 @@ import argparse - -from datasets import disable_caching, load_from_disk -from functools import partial from pathlib import Path from pprint import pprint + +from datasets import disable_caching, load_from_disk from squeakily.core import Pipeline -from squeakily.filter import minhash_dedup + disable_caching() # Parse the arguments @@ -61,11 +60,13 @@ num_shards = ds_len // num_files_per_shard if num_shards == 0: num_shards = 1 - ds_shards = [ds["dataset"].shard(num_shards, i, contiguous=True) for i in range(num_shards)] + ds_shards = [ + ds["dataset"].shard(num_shards, i, contiguous=True) for i in range(num_shards) + ] for i, shard in enumerate(ds_shards): path = output_dir / f"{name}_shard_{i}.jsonl.zst" shard.to_json( path, lines=True, orient="records", - ) \ No newline at end of file + ) diff --git a/pile/processing/fix_format/fix_format_arxiv.py b/pile/processing/fix_format/fix_format_arxiv.py index 99da61b..9186e41 100644 --- a/pile/processing/fix_format/fix_format_arxiv.py +++ b/pile/processing/fix_format/fix_format_arxiv.py @@ -1,61 +1,63 @@ import argparse import re -import yaml +from pathlib import Path from datasets import disable_caching, load_from_disk -from pathlib import Path + disable_caching() + def get_abs_title(text): - # Remove any non-ascii characters and x0007 - text = re.sub(r'\x07',r'', text) - text = re.sub(r'[^\x00-\x7f]',r'', text) - # remove single quotes - # text = re.sub(r"'",r'', text) - lines = text.split("\n") - yaml_contents = [] - in_yaml = False - start, end = 0, 0 - for idx, line in enumerate(lines): - if line == "---": - if in_yaml: - end = idx - break - else: - in_yaml = True - start = idx - continue - if in_yaml: - yaml_contents.append(line) - - abstract = "" - title = "" - for idx, line in enumerate(yaml_contents): - if line.startswith("title:"): - title = line.replace("title:", "") - if line.startswith("abstract:"): - abstract = line.replace("abstract:", "") - - # trim the title and abstract - title = title.strip() - abstract = abstract.strip() - - # remove the beginning and ending quotes - title = title[1:-1] - abstract = abstract[1:-1] - return title, abstract, (start, end) + # Remove any non-ascii characters and x0007 + text = re.sub(r"\x07", r"", text) + text = re.sub(r"[^\x00-\x7f]", r"", text) + # remove single quotes + # text = re.sub(r"'",r'', text) + lines = text.split("\n") + yaml_contents = [] + in_yaml = False + start, end = 0, 0 + for idx, line in enumerate(lines): + if line == "---": + if in_yaml: + end = idx + break + else: + in_yaml = True + start = idx + continue + if in_yaml: + yaml_contents.append(line) + + abstract = "" + title = "" + for idx, line in enumerate(yaml_contents): + if line.startswith("title:"): + title = line.replace("title:", "") + if line.startswith("abstract:"): + abstract = line.replace("abstract:", "") + + # trim the title and abstract + title = title.strip() + abstract = abstract.strip() + + # remove the beginning and ending quotes + title = title[1:-1] + abstract = abstract[1:-1] + return title, abstract, (start, end) + def reformatter(example): - title, abstract, (start, end) = get_abs_title(example["text"]) + title, abstract, (start, end) = get_abs_title(example["text"]) - # remove the yaml contents from the text - lines = example["text"].split("\n") - lines = lines[:start] + lines[end+1:] - # add the title and abstract - lines = [title, "", abstract] + lines + # remove the yaml contents from the text + lines = example["text"].split("\n") + lines = lines[:start] + lines[end + 1 :] + # add the title and abstract + lines = [title, "", abstract] + lines - example["text"] = "\n".join(lines) - return example + example["text"] = "\n".join(lines) + return example # Parse the arguments @@ -87,4 +89,4 @@ def reformatter(example): # python fix_format_arxiv.py --data_dir /work/pilev2/pile/processing/fix_format/pile-v2-eda/local_dedup/arXiv_ver2 --output_dir /work/pilev2/pile/processing/fix_format/pile-v2-eda/reformated/arXiv_ver2 # /fsx/home-nathan/work/pilev2/pile/processing/fix_format/pile-v2-eda/local_dedup/arXiv_ver2 -# /fsx/home-nathan/work/pilev2/pile/processing/fix_format/pile-v2-eda/reformated/arXiv_ver2 \ No newline at end of file +# /fsx/home-nathan/work/pilev2/pile/processing/fix_format/pile-v2-eda/reformated/arXiv_ver2 diff --git a/pile/processing/fix_format/fix_format_pubmed.py b/pile/processing/fix_format/fix_format_pubmed.py index 354ba2c..4c069d2 100644 --- a/pile/processing/fix_format/fix_format_pubmed.py +++ b/pile/processing/fix_format/fix_format_pubmed.py @@ -1,33 +1,34 @@ import argparse -import re -import yaml +from pathlib import Path from datasets import disable_caching, load_from_disk -from pathlib import Path + disable_caching() + def get_body_text(text): - lines = text.split("\n") - body_contents = [] - in_body = False - start, end = 0, 0 - for idx, line in enumerate(lines): - if "==== Body" in line: - in_body = True - start = idx - continue - elif "==== " in line and in_body: - end = idx - break - if in_body: - body_contents.append(line) - - return "\n".join(body_contents), (start, end) + lines = text.split("\n") + body_contents = [] + in_body = False + start, end = 0, 0 + for idx, line in enumerate(lines): + if "==== Body" in line: + in_body = True + start = idx + continue + elif "==== " in line and in_body: + end = idx + break + if in_body: + body_contents.append(line) + + return "\n".join(body_contents), (start, end) + def reformatter(example): - text, (start, end) = get_body_text(example["text"]) - example["text"] = text - return example + text, (start, end) = get_body_text(example["text"]) + example["text"] = text + return example # Parse the arguments @@ -60,4 +61,4 @@ def reformatter(example): # python fix_format_pubmed.py --data_dir /work/pilev2/pile/processing/fix_format/pile-v2-eda/local_dedup/PubMed_ver2 --output_dir /work/pilev2/pile/processing/fix_format/pile-v2-eda/reformatted/PubMed_ver2 # /fsx/home-nathan/work/pilev2/pile/processing/fix_format/pile-v2-eda/local_dedup/arXiv_ver2 -# /fsx/home-nathan/work/pilev2/pile/processing/fix_format/pile-v2-eda/reformated/arXiv_ver2 \ No newline at end of file +# /fsx/home-nathan/work/pilev2/pile/processing/fix_format/pile-v2-eda/reformated/arXiv_ver2 diff --git a/pile/processing/local_dedup.py b/pile/processing/local_dedup.py index 2049083..fecc5ba 100644 --- a/pile/processing/local_dedup.py +++ b/pile/processing/local_dedup.py @@ -1,11 +1,11 @@ import argparse - -from datasets import disable_caching, load_from_disk -from functools import partial from pathlib import Path from pprint import pprint + +from datasets import disable_caching, load_from_disk from squeakily.core import Pipeline from squeakily.filter import minhash_dedup + disable_caching() # Parse the arguments @@ -51,4 +51,4 @@ pprint(pipeline.datasources) # Save the resulting datasets - pipeline.datasources[0]["dataset"].save_to_disk(output_dir / k) \ No newline at end of file + pipeline.datasources[0]["dataset"].save_to_disk(output_dir / k) diff --git a/pile/processing/processing.py b/pile/processing/processing.py index 3fc4568..fdff961 100644 --- a/pile/processing/processing.py +++ b/pile/processing/processing.py @@ -1,26 +1,31 @@ import argparse import pickle +from functools import partial +from pathlib import Path +from pprint import pprint import numpy as np - from datasets import disable_caching, load_from_disk from faker import Faker -from functools import partial -from pathlib import Path -from pprint import pprint from scrubadub import Scrubber from scrubadub.detectors import CredentialDetector, TwitterDetector, UrlDetector -from scrubadub.filth import CreditCardFilth, EmailFilth, PhoneFilth, SocialSecurityNumberFilth +from scrubadub.filth import ( + CreditCardFilth, + EmailFilth, + PhoneFilth, + SocialSecurityNumberFilth, +) +from squeakily.clean import replace_ip from squeakily.core import Pipeline -from squeakily.clean import replace_ip, normalize_whitespace, normalize_punctuation from squeakily.filter import ( check_char_repetition, check_flagged_words, - check_stop_word_ratio, check_perplexity, + check_stop_word_ratio, check_word_number, ) from squeakily.helpers import KenlmModel + disable_caching() kenlm_model = KenlmModel.from_pretrained( @@ -83,9 +88,11 @@ "Bible", "OtherWiki", "TheStack", - "GithubDiffs" + "GithubDiffs", ] -dataset_cats = [x.name for x in data_dir.iterdir() if x.is_dir() and x.name not in ignored_datasets][:2] +dataset_cats = [ + x.name for x in data_dir.iterdir() if x.is_dir() and x.name not in ignored_datasets +][:2] scrubber = Scrubber() scrubber.remove_detector(CredentialDetector) @@ -93,13 +100,19 @@ scrubber.remove_detector(UrlDetector) faker = Faker() -normalize_cleaning = lambda x: ( - scrubber.clean(x) - .replace("{{EMAIL}}", EmailFilth.generate(faker)) - .replace("{{PHONE}}", PhoneFilth.generate(faker)) - .replace("{{SOCIAL_SECURITY_NUMBER}}", SocialSecurityNumberFilth.generate(faker)) - .replace("{{CREDIT_CARD}}", CreditCardFilth.generate(faker)) -) + + +def normalize_cleaning(x): + return ( + scrubber.clean(x) + .replace("{{EMAIL}}", EmailFilth.generate(faker)) + .replace("{{PHONE}}", PhoneFilth.generate(faker)) + .replace( + "{{SOCIAL_SECURITY_NUMBER}}", SocialSecurityNumberFilth.generate(faker) + ) + .replace("{{CREDIT_CARD}}", CreditCardFilth.generate(faker)) + ) + check_char_repetition_p = partial(check_char_repetition, char_repetition_threshold=0.6) check_char_repetition_p.__name__ = "check_char_repetition" @@ -131,13 +144,19 @@ stats_dict = pickle.load(open(args.stats_path, "rb")) for ds in datasources: stats = stats_dict[ds["name"]] - word_min, word_max = np.quantile(stats["word_count"]["lst"], [args.min_percentile, args.max_percentile]) - check_word_number_p = partial(check_word_number, min_word_threshold=word_min, max_word_threshold=word_max) + word_min, word_max = np.quantile( + stats["word_count"]["lst"], [args.min_percentile, args.max_percentile] + ) + check_word_number_p = partial( + check_word_number, min_word_threshold=word_min, max_word_threshold=word_max + ) check_word_number_p.__name__ = "check_word_number" ds["filters"].append(check_word_number_p) perplexity_max = np.quantile(stats["perplexity"]["lst"], args.max_percentile) - check_perplexity_p = partial(check_perplexity, model=kenlm_model, perplexity_threshold=perplexity_max) + check_perplexity_p = partial( + check_perplexity, model=kenlm_model, perplexity_threshold=perplexity_max + ) check_perplexity_p.__name__ = "check_perplexity" ds["filters"].append(check_perplexity_p) @@ -150,4 +169,4 @@ # Save the resulting datasets for name, ds in zip(dataset_cats, pipeline.datasources): - ds["dataset"].save_to_disk(output_dir / name) \ No newline at end of file + ds["dataset"].save_to_disk(output_dir / name) diff --git a/pile/processing/processing_compression.py b/pile/processing/processing_compression.py index 1ade631..0123d33 100644 --- a/pile/processing/processing_compression.py +++ b/pile/processing/processing_compression.py @@ -1,15 +1,13 @@ import argparse -import pickle - -import numpy as np - -from datasets import disable_caching, load_from_disk -from faker import Faker from functools import partial from pathlib import Path from pprint import pprint + +import numpy as np +from datasets import disable_caching, load_from_disk from squeakily.core import Pipeline from squeakily.filter import check_compression_ratio + disable_caching() @@ -55,9 +53,11 @@ "Gutenberg", "OtherWiki", "TheStack", - "GithubDiffs" + "GithubDiffs", ] -dataset_cats = [x.name for x in data_dir.iterdir() if x.is_dir() and x.name not in ignored_datasets][:2] +dataset_cats = [ + x.name for x in data_dir.iterdir() if x.is_dir() and x.name not in ignored_datasets +][:2] tmp_ds = [ { @@ -73,9 +73,13 @@ for idx, ds in enumerate(tmp_ds): pipeline = Pipeline([ds]) pipeline.run(dry_run=True) - compression_ratios = pipeline.datasources[0]["dataset"]["check_compression_ratio_criteria"] + compression_ratios = pipeline.datasources[0]["dataset"][ + "check_compression_ratio_criteria" + ] min_compression_ratio = np.quantile(compression_ratios, args.min_percentile) - check_compression_ratio_p = partial(check_compression_ratio, compression_threshold=min_compression_ratio) + check_compression_ratio_p = partial( + check_compression_ratio, compression_threshold=min_compression_ratio + ) check_compression_ratio_p.__name__ = "check_compression_ratio" ds["filters"] = [check_compression_ratio_p] @@ -90,4 +94,4 @@ # Save the resulting datasets for name, ds in zip(dataset_cats, pipeline.datasources): - ds["dataset"].save_to_disk(output_dir / name) \ No newline at end of file + ds["dataset"].save_to_disk(output_dir / name) diff --git a/pile/processing/requirements.txt b/pile/processing/requirements.txt index 20ff844..ffd1ed1 100644 --- a/pile/processing/requirements.txt +++ b/pile/processing/requirements.txt @@ -1,8 +1,8 @@ datasets fasttext -scrubadub -sentencepiece git+https://github.com/CarperAI/squeakily.git +hf_clean_benchmarks https://github.com/kpu/kenlm/archive/master.zip +scrubadub +sentencepiece zstandard -hf_clean_benchmarks diff --git a/pile/processing/test_datasets.py b/pile/processing/test_datasets.py index 8613d27..cedf895 100644 --- a/pile/processing/test_datasets.py +++ b/pile/processing/test_datasets.py @@ -1,9 +1,8 @@ import argparse import random +from pathlib import Path from datasets import load_from_disk -from pathlib import Path -from pprint import pprint from tqdm.auto import tqdm # set the seed @@ -47,4 +46,4 @@ f.write(ds[i]["text"] + "\n\n") except Exception as e: print(e) - continue \ No newline at end of file + continue diff --git a/pile/processing/threshold_selection.py b/pile/processing/threshold_selection.py index 38f92ca..acae5ab 100644 --- a/pile/processing/threshold_selection.py +++ b/pile/processing/threshold_selection.py @@ -1,5 +1,5 @@ import pickle -import numpy as np + path = "/fsx/shared/hf_data_pilev2_small_text/stats_dict.pkl" with open(path, "rb") as f: stats_dict = pickle.load(f) @@ -13,4 +13,4 @@ # perplexity_max = np.quantile(stats["perplexity"]["lst"], 0.99) # print(f"perplexity_max: {perplexity_max}") # flagged_ratio_max = np.quantile(stats["flag_words_ratio"]["lst"], 0.99) - # print(f"flagged_ratio_max: {flagged_ratio_max}") \ No newline at end of file + # print(f"flagged_ratio_max: {flagged_ratio_max}") diff --git a/pile/scripts/run_group_stats.sh b/pile/scripts/run_group_stats.sh index 5f07623..81b5661 100644 --- a/pile/scripts/run_group_stats.sh +++ b/pile/scripts/run_group_stats.sh @@ -1 +1 @@ -python3 check_percentage_non_c4.py --input_dir \ No newline at end of file +python3 check_percentage_non_c4.py --input_dir diff --git a/pile/stats/check_percentage_non_c4.py b/pile/stats/check_percentage_non_c4.py index 3cb7991..637c44d 100644 --- a/pile/stats/check_percentage_non_c4.py +++ b/pile/stats/check_percentage_non_c4.py @@ -1,28 +1,23 @@ -import datasets -import multiprocessing as mp -import os -import pathlib -import json import argparse +import json import logging +import pathlib import time +import datasets + logging.basicConfig( - level = logging.INFO, - format= '%(asctime)s %(levelname)-8s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - filename=f"logs/stats_{time.time()}.log") + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + filename=f"logs/stats_{time.time()}.log", +) -meta_match = { - "arxiv" : - [ "arxiv_out" ], - "pubmed" : - ["PubMedDataset"] -} +meta_match = {"arxiv": ["arxiv_out"], "pubmed": ["PubMedDataset"]} -def filter_subset(example,subset_key:str): +def filter_subset(example, subset_key: str): if meta_match[subset_key] in example["meta"]: return True else: @@ -39,11 +34,14 @@ def filter_subset(example,subset_key:str): dataset = datasets.load_from_disk(args.input_dir) stats_dict = {} for subset_key in meta_match.keys(): - subset_dataset = dataset.filter(filter_subset,fn_kwargs={"subset_key":subset_key} num_proc=args.num_workers) + subset_dataset = dataset.filter( + filter_subset, + fn_kwargs={"subset_key": subset_key}, + num_proc=args.num_workers, + ) length = len(subset_dataset) stats_dict[subset_key] = length output_dir = pathlib.Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) - with open(output_dir/"stats.json","w") as f: - json.dump(stats_dict,f,indent=2) - \ No newline at end of file + with open(output_dir / "stats.json", "w") as f: + json.dump(stats_dict, f, indent=2) diff --git a/pile/templates.py b/pile/templates.py index a84322d..b6ffbbb 100644 --- a/pile/templates.py +++ b/pile/templates.py @@ -1,18 +1,20 @@ from abc import ABC, abstractmethod +from logging import getLogger +from pathlib import Path +from pprint import pformat +from typing import Generator, List, Union + +from tqdm import tqdm + from .utils import ( + component_exists, directory_size, - download, disk_cache, - utf8len, + download, pile_cache_dir, - component_exists, schema_from_examples, + utf8len, ) -from tqdm import tqdm -from typing import Generator, List, Optional, Union -from pathlib import Path -from logging import getLogger -from pprint import pformat logger = getLogger(__name__) @@ -24,24 +26,21 @@ def name(self) -> str: """ Human-readable name of the dataset. """ - pass - + @property @abstractmethod def license(self) -> str: """ The license that the data is under. """ - pass @property @abstractmethod def urls(self) -> List[str]: """Returns a list of urls to download the dataset. - The most trusted miror should be first. + The most trusted mirror should be first. """ - pass @property @abstractmethod @@ -49,7 +48,6 @@ def checksum(self) -> str: """ The sha256 checksum of the dataset. """ - pass @abstractmethod def replicate(self): @@ -60,7 +58,6 @@ def replicate(self): filtering steps, or it's not possible to replicate the scraping process, this method should just call self.download() """ - pass @abstractmethod def documents(self) -> Generator[dict, None, None]: @@ -70,7 +67,6 @@ def documents(self) -> Generator[dict, None, None]: Each document should be a dictionary with, at minimum, a 'text' key containing the text of the document. """ - pass @abstractmethod def paths(self) -> Generator[Union[str, Path], None, None]: @@ -79,7 +75,6 @@ def paths(self) -> Generator[Union[str, Path], None, None]: If the dataset is not on disk, this should still return the *expected* filepaths when the dataset is downloaded. """ - pass @abstractmethod def examples(self) -> List[dict]: @@ -87,7 +82,6 @@ def examples(self) -> List[dict]: Should be callable before downloading the whole dataset. """ - pass @abstractmethod def size_on_disk(self) -> int: @@ -98,7 +92,6 @@ def size_on_disk(self) -> int: We can verify the hardcoded value by comparing to Dataset._size_on_disk() """ - pass @abstractmethod def size(self) -> int: @@ -108,7 +101,6 @@ def size(self) -> int: This should be hardcoded for each dataset, we can verify the hardcoded value by comparing to Dataset._size() """ - pass def download(self, force=False): """Downloads the final hosted dataset. If necessary, this function diff --git a/pile/utils.py b/pile/utils.py index ab6fb2b..8160a58 100644 --- a/pile/utils.py +++ b/pile/utils.py @@ -1,13 +1,14 @@ +import hashlib +import logging import os -from typing import Union, List +import random +import shutil from pathlib import Path +from typing import List, Union + +import pyfra as pf from best_download import download_file -import logging -import shutil -import random from diskcache import Cache -import pyfra as pf -import hashlib from tqdm import tqdm logger = logging.getLogger(__name__) @@ -134,7 +135,7 @@ def download( success = download_file(url, str(out_path), expected_checksum=checksum) except (SystemExit, KeyboardInterrupt): raise - except: + except Exception: print("here") import traceback diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..43ab41a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.isort] +multi_line_output = 3 +profile = "black" + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "-ra -q --doctest-modules -vv --cov=pile/ " +testpaths = ["tests"] From fc5f03b2b80dadeff47bae09f58faf8ea83fda40 Mon Sep 17 00:00:00 2001 From: "Fabrizio (Misto) Milo" Date: Thu, 12 Jan 2023 12:59:08 -0800 Subject: [PATCH 2/2] add poetry --- .vscode/settings.json | 1 + README.md | 2 +- pile/filtering/__init__.py | 0 pile/processing/split_group.py | 64 +- pile/scripts/run_group_stats.sh | 1 + pile/tests/__init__.py | 66 -- pile/tests/test_datasets.py | 69 ++ poetry.lock | 1352 +++++++++++++++++++++++++++++++ pyproject.toml | 28 +- 9 files changed, 1483 insertions(+), 100 deletions(-) create mode 100644 pile/filtering/__init__.py create mode 100644 pile/tests/test_datasets.py create mode 100644 poetry.lock diff --git a/.vscode/settings.json b/.vscode/settings.json index 1d927bc..48253cf 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,6 @@ { "cSpell.words": [ + "dedup", "freelaw", "levelname", "philpapers" diff --git a/README.md b/README.md index 1ded309..dc9da9f 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# pilev2 +# Pile v2 diff --git a/pile/filtering/__init__.py b/pile/filtering/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pile/processing/split_group.py b/pile/processing/split_group.py index e52a456..b5ae893 100644 --- a/pile/processing/split_group.py +++ b/pile/processing/split_group.py @@ -1,37 +1,43 @@ -import datasets -import multiprocessing as mp -import os -import pathlib -import json import argparse +import ast +import json import logging +import os +import pathlib import time -import ast + +import datasets logging.basicConfig( - level = logging.INFO, - format= '%(asctime)s %(levelname)-8s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - filename=f"logs/stats_{time.time()}_split_group.log") + level=logging.INFO, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + filename=f"logs/stats_{time.time()}_split_group.log", +) logger = logging.getLogger(__name__) meta_match = { - "arXiv_ver2" : lambda example : ast.literal_eval(example["meta"]["source"]) == "arXiv_out/", - "PubMed_ver2" : lambda example : ast.literal_eval(example["meta"]["source"]) == "PubMedDataset", - "Gutenberg_ver2" : lambda example : ast.literal_eval(example["meta"]["source"]) == "Project Gutenberg", - "FreeLaw_Options_ver2" : lambda example : "date_created" in example.keys(), - "UbuntuIRC_ver2" : lambda example : ast.literal_eval(example["meta"]["source"]) == "Ubuntu IRC", - "Enwiki_ver2" : lambda example : "wikidata_id" in example.keys(), - "EuroParliamentProceedings_ver2" : lambda example : "language" in example.keys(), - "USPTO_ver2" : lambda example : ast.literal_eval(example["meta"]["source_data"]) == "USPTO-Application", - "PileOfLaw_ver2" : lambda example : "dataset" in example.keys(), - "OtherWiki_ver2" : lambda example : "wiki_source" in example.keys(), - "S2ORC_ver2" : lambda example : ast.literal_eval(example["meta"]["source"]) == "S2ORC", + "arXiv_ver2": lambda example: ast.literal_eval(example["meta"]["source"]) + == "arXiv_out/", + "PubMed_ver2": lambda example: ast.literal_eval(example["meta"]["source"]) + == "PubMedDataset", + "Gutenberg_ver2": lambda example: ast.literal_eval(example["meta"]["source"]) + == "Project Gutenberg", + "FreeLaw_Options_ver2": lambda example: "date_created" in example.keys(), + "UbuntuIRC_ver2": lambda example: ast.literal_eval(example["meta"]["source"]) + == "Ubuntu IRC", + "Enwiki_ver2": lambda example: "wikidata_id" in example.keys(), + "EuroParliamentProceedings_ver2": lambda example: "language" in example.keys(), + "USPTO_ver2": lambda example: ast.literal_eval(example["meta"]["source_data"]) + == "USPTO-Application", + "PileOfLaw_ver2": lambda example: "dataset" in example.keys(), + "OtherWiki_ver2": lambda example: "wiki_source" in example.keys(), + "S2ORC_ver2": lambda example: ast.literal_eval(example["meta"]["source"]) + == "S2ORC", } - -def filter_subset(example,subset_key:str): +def filter_subset(example, subset_key: str): if meta_match[subset_key] in example["meta"]: return True else: @@ -51,20 +57,20 @@ def filter_subset(example,subset_key:str): for subset_key in meta_match.keys(): logger.info(f"Starting to filter {subset_key}") print(f"Starting to filter {subset_key}") - subset_dataset = dataset.filter(meta_match[subset_key], num_proc=args.num_workers) + subset_dataset = dataset.filter( + meta_match[subset_key], num_proc=args.num_workers + ) length = len(subset_dataset) stats_dict[subset_key] = length logger.info(f"Length of {subset_key} is {length}") print(f"Length of {subset_key} is {length}") - output_dir = pathlib.Path(args.output_dir)/subset_key + output_dir = pathlib.Path(args.output_dir) / subset_key output_dir.mkdir(parents=True, exist_ok=True) subset_dataset.save_to_disk(output_dir) logger.info(f"Saved {subset_key} to {output_dir}") print(f"Saved {subset_key} to {output_dir}") - stats_output_dir = pathlib.Path(args.stats_output_dir) stats_output_dir.mkdir(parents=True, exist_ok=True) - with open(stats_output_dir/"stats.json","w") as f: - json.dump(stats_dict,f,indent=2) - \ No newline at end of file + with open(stats_output_dir / "stats.json", "w") as f: + json.dump(stats_dict, f, indent=2) diff --git a/pile/scripts/run_group_stats.sh b/pile/scripts/run_group_stats.sh index 81b5661..0aa7a55 100644 --- a/pile/scripts/run_group_stats.sh +++ b/pile/scripts/run_group_stats.sh @@ -1 +1,2 @@ +#!/bin/bash python3 check_percentage_non_c4.py --input_dir diff --git a/pile/tests/__init__.py b/pile/tests/__init__.py index 581b63e..e69de29 100644 --- a/pile/tests/__init__.py +++ b/pile/tests/__init__.py @@ -1,66 +0,0 @@ -def test_dataset(dataset, run_download=False, run_replicate=False): - """Tests all the properties / functions in a dataset are working without - error. - - TODO: write proper tests - """ - print("repr: ") - print(dataset) - - print("\nname: ") - print(dataset.name) - - print("\nurls & url: ") - print(dataset.urls) - print(dataset.url) - - print("\nchecksum: ") - print(dataset.checksum) - - if run_download: - print("\ndownload: ") - dataset.download(force=True) - - if run_replicate: - print("\nreplicate: ") - dataset.replicate() - - print("\ndocuments: ") - print(next(dataset.documents())) - - print("\n paths: ") - print(next(dataset.paths())) - - print("\nexamples: ") - print(dataset.examples()) - - print("\nreturned size: ") - print(dataset.size()) - - print("\nActual size: ") - print(dataset._size()) - - print("\nreturned size on disk: ") - print(dataset.size_on_disk()) - - print("\nActual size on disk: ") - print(dataset._size_on_disk()) - - print("\nnum_docs: ") - print(dataset.num_docs()) - - print("\nschema: ") - print(dataset.schema) - - print("\nmirrors: ") - print(dataset.mirrors) - - print("\ndataset_dir: ") - print(dataset.dataset_dir()) - - print("\nexists: ") - print(dataset.exists()) - try: - dataset.raise_if_not_exists() - except Exception as e: - print(e) diff --git a/pile/tests/test_datasets.py b/pile/tests/test_datasets.py new file mode 100644 index 0000000..f650cb1 --- /dev/null +++ b/pile/tests/test_datasets.py @@ -0,0 +1,69 @@ +import pytest + + +@pytest.mark.parametrize(dataset=[]) +def test_dataset(dataset, run_download=False, run_replicate=False): + """ + Tests all the properties / functions + in a dataset are working without error. + """ + print("repr: ") + print(dataset) + + print("\nname: ") + print(dataset.name) + + print("\nurls & url: ") + print(dataset.urls) + print(dataset.url) + + print("\nchecksum: ") + print(dataset.checksum) + + if run_download: + print("\ndownload: ") + dataset.download(force=True) + + if run_replicate: + print("\nreplicate: ") + dataset.replicate() + + print("\ndocuments: ") + print(next(dataset.documents())) + + print("\n paths: ") + print(next(dataset.paths())) + + print("\nexamples: ") + print(dataset.examples()) + + print("\nreturned size: ") + print(dataset.size()) + + print("\nActual size: ") + print(dataset._size()) + + print("\nreturned size on disk: ") + print(dataset.size_on_disk()) + + print("\nActual size on disk: ") + print(dataset._size_on_disk()) + + print("\nnum_docs: ") + print(dataset.num_docs()) + + print("\nschema: ") + print(dataset.schema) + + print("\nmirrors: ") + print(dataset.mirrors) + + print("\ndataset_dir: ") + print(dataset.dataset_dir()) + + print("\nexists: ") + print(dataset.exists()) + try: + dataset.raise_if_not_exists() + except Exception as e: + print(e) diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..bf51eee --- /dev/null +++ b/poetry.lock @@ -0,0 +1,1352 @@ +# This file is automatically @generated by Poetry and should not be changed by hand. + +[[package]] +name = "aiohttp" +version = "3.7.4.post0" +description = "Async http client/server framework (asyncio)" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "aiohttp-3.7.4.post0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:3cf75f7cdc2397ed4442594b935a11ed5569961333d49b7539ea741be2cc79d5"}, + {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:4b302b45040890cea949ad092479e01ba25911a15e648429c7c5aae9650c67a8"}, + {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:fe60131d21b31fd1a14bd43e6bb88256f69dfc3188b3a89d736d6c71ed43ec95"}, + {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:393f389841e8f2dfc86f774ad22f00923fdee66d238af89b70ea314c4aefd290"}, + {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:c6e9dcb4cb338d91a73f178d866d051efe7c62a7166653a91e7d9fb18274058f"}, + {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:5df68496d19f849921f05f14f31bd6ef53ad4b00245da3195048c69934521809"}, + {file = "aiohttp-3.7.4.post0-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:0563c1b3826945eecd62186f3f5c7d31abb7391fedc893b7e2b26303b5a9f3fe"}, + {file = "aiohttp-3.7.4.post0-cp36-cp36m-win32.whl", hash = "sha256:3d78619672183be860b96ed96f533046ec97ca067fd46ac1f6a09cd9b7484287"}, + {file = "aiohttp-3.7.4.post0-cp36-cp36m-win_amd64.whl", hash = "sha256:f705e12750171c0ab4ef2a3c76b9a4024a62c4103e3a55dd6f99265b9bc6fcfc"}, + {file = "aiohttp-3.7.4.post0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:230a8f7e24298dea47659251abc0fd8b3c4e38a664c59d4b89cca7f6c09c9e87"}, + {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2e19413bf84934d651344783c9f5e22dee452e251cfd220ebadbed2d9931dbf0"}, + {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:e4b2b334e68b18ac9817d828ba44d8fcb391f6acb398bcc5062b14b2cbeac970"}, + {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:d012ad7911653a906425d8473a1465caa9f8dea7fcf07b6d870397b774ea7c0f"}, + {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:40eced07f07a9e60e825554a31f923e8d3997cfc7fb31dbc1328c70826e04cde"}, + {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:209b4a8ee987eccc91e2bd3ac36adee0e53a5970b8ac52c273f7f8fd4872c94c"}, + {file = "aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:14762875b22d0055f05d12abc7f7d61d5fd4fe4642ce1a249abdf8c700bf1fd8"}, + {file = "aiohttp-3.7.4.post0-cp37-cp37m-win32.whl", hash = "sha256:7615dab56bb07bff74bc865307aeb89a8bfd9941d2ef9d817b9436da3a0ea54f"}, + {file = "aiohttp-3.7.4.post0-cp37-cp37m-win_amd64.whl", hash = "sha256:d9e13b33afd39ddeb377eff2c1c4f00544e191e1d1dee5b6c51ddee8ea6f0cf5"}, + {file = "aiohttp-3.7.4.post0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:547da6cacac20666422d4882cfcd51298d45f7ccb60a04ec27424d2f36ba3eaf"}, + {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:af9aa9ef5ba1fd5b8c948bb11f44891968ab30356d65fd0cc6707d989cd521df"}, + {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:64322071e046020e8797117b3658b9c2f80e3267daec409b350b6a7a05041213"}, + {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:bb437315738aa441251214dad17428cafda9cdc9729499f1d6001748e1d432f4"}, + {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:e54962802d4b8b18b6207d4a927032826af39395a3bd9196a5af43fc4e60b009"}, + {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:a00bb73540af068ca7390e636c01cbc4f644961896fa9363154ff43fd37af2f5"}, + {file = "aiohttp-3.7.4.post0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:79ebfc238612123a713a457d92afb4096e2148be17df6c50fb9bf7a81c2f8013"}, + {file = "aiohttp-3.7.4.post0-cp38-cp38-win32.whl", hash = "sha256:515dfef7f869a0feb2afee66b957cc7bbe9ad0cdee45aec7fdc623f4ecd4fb16"}, + {file = "aiohttp-3.7.4.post0-cp38-cp38-win_amd64.whl", hash = "sha256:114b281e4d68302a324dd33abb04778e8557d88947875cbf4e842c2c01a030c5"}, + {file = "aiohttp-3.7.4.post0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:7b18b97cf8ee5452fa5f4e3af95d01d84d86d32c5e2bfa260cf041749d66360b"}, + {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:15492a6368d985b76a2a5fdd2166cddfea5d24e69eefed4630cbaae5c81d89bd"}, + {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bdb230b4943891321e06fc7def63c7aace16095be7d9cf3b1e01be2f10fba439"}, + {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:cffe3ab27871bc3ea47df5d8f7013945712c46a3cc5a95b6bee15887f1675c22"}, + {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:f881853d2643a29e643609da57b96d5f9c9b93f62429dcc1cbb413c7d07f0e1a"}, + {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:a5ca29ee66f8343ed336816c553e82d6cade48a3ad702b9ffa6125d187e2dedb"}, + {file = "aiohttp-3.7.4.post0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:17c073de315745a1510393a96e680d20af8e67e324f70b42accbd4cb3315c9fb"}, + {file = "aiohttp-3.7.4.post0-cp39-cp39-win32.whl", hash = "sha256:932bb1ea39a54e9ea27fc9232163059a0b8855256f4052e776357ad9add6f1c9"}, + {file = "aiohttp-3.7.4.post0-cp39-cp39-win_amd64.whl", hash = "sha256:02f46fc0e3c5ac58b80d4d56eb0a7c7d97fcef69ace9326289fb9f1955e65cfe"}, + {file = "aiohttp-3.7.4.post0.tar.gz", hash = "sha256:493d3299ebe5f5a7c66b9819eacdcfbbaaf1a8e84911ddffcdc48888497afecf"}, +] + +[package.dependencies] +async-timeout = ">=3.0,<4.0" +attrs = ">=17.3.0" +chardet = ">=2.0,<5.0" +multidict = ">=4.5,<7.0" +typing-extensions = ">=3.6.5" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["aiodns", "brotlipy", "cchardet"] + +[[package]] +name = "async-timeout" +version = "3.0.1" +description = "Timeout context manager for asyncio programs" +category = "main" +optional = false +python-versions = ">=3.5.3" +files = [ + {file = "async-timeout-3.0.1.tar.gz", hash = "sha256:0c3c816a028d47f659d6ff5c745cb2acf1f966da1fe5c19c77a70282b25f4c5f"}, + {file = "async_timeout-3.0.1-py3-none-any.whl", hash = "sha256:4291ca197d287d274d0b6cb5d6f8f8f82d434ed288f962539ff18cc9012f9ea3"}, +] + +[[package]] +name = "attrs" +version = "22.2.0" +description = "Classes Without Boilerplate" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, + {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"] +tests = ["attrs[tests-no-zope]", "zope.interface"] +tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"] + +[[package]] +name = "certifi" +version = "2022.12.7" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, + {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, +] + +[[package]] +name = "chardet" +version = "4.0.0" +description = "Universal encoding detector for Python 2 and 3" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, + {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.0.1" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "charset-normalizer-3.0.1.tar.gz", hash = "sha256:ebea339af930f8ca5d7a699b921106c6e29c617fe9606fa7baa043c1cdae326f"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88600c72ef7587fe1708fd242b385b6ed4b8904976d5da0893e31df8b3480cb6"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c75ffc45f25324e68ab238cb4b5c0a38cd1c3d7f1fb1f72b5541de469e2247db"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db72b07027db150f468fbada4d85b3b2729a3db39178abf5c543b784c1254539"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62595ab75873d50d57323a91dd03e6966eb79c41fa834b7a1661ed043b2d404d"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:772b87914ff1152b92a197ef4ea40efe27a378606c39446ded52c8f80f79702e"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70990b9c51340e4044cfc394a81f614f3f90d41397104d226f21e66de668730d"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:292d5e8ba896bbfd6334b096e34bffb56161c81408d6d036a7dfa6929cff8783"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2edb64ee7bf1ed524a1da60cdcd2e1f6e2b4f66ef7c077680739f1641f62f555"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:31a9ddf4718d10ae04d9b18801bd776693487cbb57d74cc3458a7673f6f34639"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:44ba614de5361b3e5278e1241fda3dc1838deed864b50a10d7ce92983797fa76"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:12db3b2c533c23ab812c2b25934f60383361f8a376ae272665f8e48b88e8e1c6"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c512accbd6ff0270939b9ac214b84fb5ada5f0409c44298361b2f5e13f9aed9e"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-win32.whl", hash = "sha256:502218f52498a36d6bf5ea77081844017bf7982cdbe521ad85e64cabee1b608b"}, + {file = "charset_normalizer-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:601f36512f9e28f029d9481bdaf8e89e5148ac5d89cffd3b05cd533eeb423b59"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0298eafff88c99982a4cf66ba2efa1128e4ddaca0b05eec4c456bbc7db691d8d"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8d0fc946c784ff7f7c3742310cc8a57c5c6dc31631269876a88b809dbeff3d3"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:87701167f2a5c930b403e9756fab1d31d4d4da52856143b609e30a1ce7160f3c"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e76c0f23218b8f46c4d87018ca2e441535aed3632ca134b10239dfb6dadd6b"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c0a590235ccd933d9892c627dec5bc7511ce6ad6c1011fdf5b11363022746c1"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c7fe7afa480e3e82eed58e0ca89f751cd14d767638e2550c77a92a9e749c317"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79909e27e8e4fcc9db4addea88aa63f6423ebb171db091fb4373e3312cb6d603"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ac7b6a045b814cf0c47f3623d21ebd88b3e8cf216a14790b455ea7ff0135d18"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:72966d1b297c741541ca8cf1223ff262a6febe52481af742036a0b296e35fa5a"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5995f0164fa7df59db4746112fec3f49c461dd6b31b841873443bdb077c13cfc"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4a8fcf28c05c1f6d7e177a9a46a1c52798bfe2ad80681d275b10dcf317deaf0b"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:761e8904c07ad053d285670f36dd94e1b6ab7f16ce62b9805c475b7aa1cffde6"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-win32.whl", hash = "sha256:71140351489970dfe5e60fc621ada3e0f41104a5eddaca47a7acb3c1b851d6d3"}, + {file = "charset_normalizer-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ab77acb98eba3fd2a85cd160851816bfce6871d944d885febf012713f06659c"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:84c3990934bae40ea69a82034912ffe5a62c60bbf6ec5bc9691419641d7d5c9a"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74292fc76c905c0ef095fe11e188a32ebd03bc38f3f3e9bcb85e4e6db177b7ea"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c95a03c79bbe30eec3ec2b7f076074f4281526724c8685a42872974ef4d36b72"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c39b0e3eac288fedc2b43055cfc2ca7a60362d0e5e87a637beac5d801ef478"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df2c707231459e8a4028eabcd3cfc827befd635b3ef72eada84ab13b52e1574d"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93ad6d87ac18e2a90b0fe89df7c65263b9a99a0eb98f0a3d2e079f12a0735837"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:59e5686dd847347e55dffcc191a96622f016bc0ad89105e24c14e0d6305acbc6"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:cd6056167405314a4dc3c173943f11249fa0f1b204f8b51ed4bde1a9cd1834dc"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:083c8d17153ecb403e5e1eb76a7ef4babfc2c48d58899c98fcaa04833e7a2f9a"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:f5057856d21e7586765171eac8b9fc3f7d44ef39425f85dbcccb13b3ebea806c"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:7eb33a30d75562222b64f569c642ff3dc6689e09adda43a082208397f016c39a"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-win32.whl", hash = "sha256:95dea361dd73757c6f1c0a1480ac499952c16ac83f7f5f4f84f0658a01b8ef41"}, + {file = "charset_normalizer-3.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:eaa379fcd227ca235d04152ca6704c7cb55564116f8bc52545ff357628e10602"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3e45867f1f2ab0711d60c6c71746ac53537f1684baa699f4f668d4c6f6ce8e14"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cadaeaba78750d58d3cc6ac4d1fd867da6fc73c88156b7a3212a3cd4819d679d"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:911d8a40b2bef5b8bbae2e36a0b103f142ac53557ab421dc16ac4aafee6f53dc"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:503e65837c71b875ecdd733877d852adbc465bd82c768a067badd953bf1bc5a3"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a60332922359f920193b1d4826953c507a877b523b2395ad7bc716ddd386d866"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16a8663d6e281208d78806dbe14ee9903715361cf81f6d4309944e4d1e59ac5b"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a16418ecf1329f71df119e8a65f3aa68004a3f9383821edcb20f0702934d8087"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9d9153257a3f70d5f69edf2325357251ed20f772b12e593f3b3377b5f78e7ef8"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:02a51034802cbf38db3f89c66fb5d2ec57e6fe7ef2f4a44d070a593c3688667b"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:2e396d70bc4ef5325b72b593a72c8979999aa52fb8bcf03f701c1b03e1166918"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:11b53acf2411c3b09e6af37e4b9005cba376c872503c8f28218c7243582df45d"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-win32.whl", hash = "sha256:0bf2dae5291758b6f84cf923bfaa285632816007db0330002fa1de38bfcb7154"}, + {file = "charset_normalizer-3.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:2c03cc56021a4bd59be889c2b9257dae13bf55041a3372d3295416f86b295fb5"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:024e606be3ed92216e2b6952ed859d86b4cfa52cd5bc5f050e7dc28f9b43ec42"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4b0d02d7102dd0f997580b51edc4cebcf2ab6397a7edf89f1c73b586c614272c"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:358a7c4cb8ba9b46c453b1dd8d9e431452d5249072e4f56cfda3149f6ab1405e"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81d6741ab457d14fdedc215516665050f3822d3e56508921cc7239f8c8e66a58"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8b8af03d2e37866d023ad0ddea594edefc31e827fee64f8de5611a1dbc373174"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9cf4e8ad252f7c38dd1f676b46514f92dc0ebeb0db5552f5f403509705e24753"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e696f0dd336161fca9adbb846875d40752e6eba585843c768935ba5c9960722b"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c22d3fe05ce11d3671297dc8973267daa0f938b93ec716e12e0f6dee81591dc1"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:109487860ef6a328f3eec66f2bf78b0b72400280d8f8ea05f69c51644ba6521a"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:37f8febc8ec50c14f3ec9637505f28e58d4f66752207ea177c1d67df25da5aed"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f97e83fa6c25693c7a35de154681fcc257c1c41b38beb0304b9c4d2d9e164479"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a152f5f33d64a6be73f1d30c9cc82dfc73cec6477ec268e7c6e4c7d23c2d2291"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:39049da0ffb96c8cbb65cbf5c5f3ca3168990adf3551bd1dee10c48fce8ae820"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-win32.whl", hash = "sha256:4457ea6774b5611f4bed5eaa5df55f70abde42364d498c5134b7ef4c6958e20e"}, + {file = "charset_normalizer-3.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:e62164b50f84e20601c1ff8eb55620d2ad25fb81b59e3cd776a1902527a788af"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8eade758719add78ec36dc13201483f8e9b5d940329285edcd5f70c0a9edbd7f"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8499ca8f4502af841f68135133d8258f7b32a53a1d594aa98cc52013fff55678"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3fc1c4a2ffd64890aebdb3f97e1278b0cc72579a08ca4de8cd2c04799a3a22be"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00d3ffdaafe92a5dc603cb9bd5111aaa36dfa187c8285c543be562e61b755f6b"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2ac1b08635a8cd4e0cbeaf6f5e922085908d48eb05d44c5ae9eabab148512ca"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6f45710b4459401609ebebdbcfb34515da4fc2aa886f95107f556ac69a9147e"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ae1de54a77dc0d6d5fcf623290af4266412a7c4be0b1ff7444394f03f5c54e3"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b590df687e3c5ee0deef9fc8c547d81986d9a1b56073d82de008744452d6541"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab5de034a886f616a5668aa5d098af2b5385ed70142090e2a31bcbd0af0fdb3d"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9cb3032517f1627cc012dbc80a8ec976ae76d93ea2b5feaa9d2a5b8882597579"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:608862a7bf6957f2333fc54ab4399e405baad0163dc9f8d99cb236816db169d4"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0f438ae3532723fb6ead77e7c604be7c8374094ef4ee2c5e03a3a17f1fca256c"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:356541bf4381fa35856dafa6a965916e54bed415ad8a24ee6de6e37deccf2786"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-win32.whl", hash = "sha256:39cf9ed17fe3b1bc81f33c9ceb6ce67683ee7526e65fde1447c772afc54a1bb8"}, + {file = "charset_normalizer-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:0a11e971ed097d24c534c037d298ad32c6ce81a45736d31e0ff0ad37ab437d59"}, + {file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"}, +] + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.0.5" +description = "Code coverage measurement for Python" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "coverage-7.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2a7f23bbaeb2a87f90f607730b45564076d870f1fb07b9318d0c21f36871932b"}, + {file = "coverage-7.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c18d47f314b950dbf24a41787ced1474e01ca816011925976d90a88b27c22b89"}, + {file = "coverage-7.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef14d75d86f104f03dea66c13188487151760ef25dd6b2dbd541885185f05f40"}, + {file = "coverage-7.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66e50680e888840c0995f2ad766e726ce71ca682e3c5f4eee82272c7671d38a2"}, + {file = "coverage-7.0.5-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9fed35ca8c6e946e877893bbac022e8563b94404a605af1d1e6accc7eb73289"}, + {file = "coverage-7.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d8d04e755934195bdc1db45ba9e040b8d20d046d04d6d77e71b3b34a8cc002d0"}, + {file = "coverage-7.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7e109f1c9a3ece676597831874126555997c48f62bddbcace6ed17be3e372de8"}, + {file = "coverage-7.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0a1890fca2962c4f1ad16551d660b46ea77291fba2cc21c024cd527b9d9c8809"}, + {file = "coverage-7.0.5-cp310-cp310-win32.whl", hash = "sha256:be9fcf32c010da0ba40bf4ee01889d6c737658f4ddff160bd7eb9cac8f094b21"}, + {file = "coverage-7.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:cbfcba14a3225b055a28b3199c3d81cd0ab37d2353ffd7f6fd64844cebab31ad"}, + {file = "coverage-7.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:30b5fec1d34cc932c1bc04017b538ce16bf84e239378b8f75220478645d11fca"}, + {file = "coverage-7.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1caed2367b32cc80a2b7f58a9f46658218a19c6cfe5bc234021966dc3daa01f0"}, + {file = "coverage-7.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d254666d29540a72d17cc0175746cfb03d5123db33e67d1020e42dae611dc196"}, + {file = "coverage-7.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19245c249aa711d954623d94f23cc94c0fd65865661f20b7781210cb97c471c0"}, + {file = "coverage-7.0.5-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b05ed4b35bf6ee790832f68932baf1f00caa32283d66cc4d455c9e9d115aafc"}, + {file = "coverage-7.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:29de916ba1099ba2aab76aca101580006adfac5646de9b7c010a0f13867cba45"}, + {file = "coverage-7.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e057e74e53db78122a3979f908973e171909a58ac20df05c33998d52e6d35757"}, + {file = "coverage-7.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:411d4ff9d041be08fdfc02adf62e89c735b9468f6d8f6427f8a14b6bb0a85095"}, + {file = "coverage-7.0.5-cp311-cp311-win32.whl", hash = "sha256:52ab14b9e09ce052237dfe12d6892dd39b0401690856bcfe75d5baba4bfe2831"}, + {file = "coverage-7.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:1f66862d3a41674ebd8d1a7b6f5387fe5ce353f8719040a986551a545d7d83ea"}, + {file = "coverage-7.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b69522b168a6b64edf0c33ba53eac491c0a8f5cc94fa4337f9c6f4c8f2f5296c"}, + {file = "coverage-7.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:436e103950d05b7d7f55e39beeb4d5be298ca3e119e0589c0227e6d0b01ee8c7"}, + {file = "coverage-7.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8c56bec53d6e3154eaff6ea941226e7bd7cc0d99f9b3756c2520fc7a94e6d96"}, + {file = "coverage-7.0.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a38362528a9115a4e276e65eeabf67dcfaf57698e17ae388599568a78dcb029"}, + {file = "coverage-7.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f67472c09a0c7486e27f3275f617c964d25e35727af952869dd496b9b5b7f6a3"}, + {file = "coverage-7.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:220e3fa77d14c8a507b2d951e463b57a1f7810a6443a26f9b7591ef39047b1b2"}, + {file = "coverage-7.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ecb0f73954892f98611e183f50acdc9e21a4653f294dfbe079da73c6378a6f47"}, + {file = "coverage-7.0.5-cp37-cp37m-win32.whl", hash = "sha256:d8f3e2e0a1d6777e58e834fd5a04657f66affa615dae61dd67c35d1568c38882"}, + {file = "coverage-7.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:9e662e6fc4f513b79da5d10a23edd2b87685815b337b1a30cd11307a6679148d"}, + {file = "coverage-7.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:790e4433962c9f454e213b21b0fd4b42310ade9c077e8edcb5113db0818450cb"}, + {file = "coverage-7.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49640bda9bda35b057b0e65b7c43ba706fa2335c9a9896652aebe0fa399e80e6"}, + {file = "coverage-7.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d66187792bfe56f8c18ba986a0e4ae44856b1c645336bd2c776e3386da91e1dd"}, + {file = "coverage-7.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:276f4cd0001cd83b00817c8db76730938b1ee40f4993b6a905f40a7278103b3a"}, + {file = "coverage-7.0.5-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95304068686545aa368b35dfda1cdfbbdbe2f6fe43de4a2e9baa8ebd71be46e2"}, + {file = "coverage-7.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:17e01dd8666c445025c29684d4aabf5a90dc6ef1ab25328aa52bedaa95b65ad7"}, + {file = "coverage-7.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea76dbcad0b7b0deb265d8c36e0801abcddf6cc1395940a24e3595288b405ca0"}, + {file = "coverage-7.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:50a6adc2be8edd7ee67d1abc3cd20678987c7b9d79cd265de55941e3d0d56499"}, + {file = "coverage-7.0.5-cp38-cp38-win32.whl", hash = "sha256:e4ce984133b888cc3a46867c8b4372c7dee9cee300335e2925e197bcd45b9e16"}, + {file = "coverage-7.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:4a950f83fd3f9bca23b77442f3a2b2ea4ac900944d8af9993743774c4fdc57af"}, + {file = "coverage-7.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3c2155943896ac78b9b0fd910fb381186d0c345911f5333ee46ac44c8f0e43ab"}, + {file = "coverage-7.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:54f7e9705e14b2c9f6abdeb127c390f679f6dbe64ba732788d3015f7f76ef637"}, + {file = "coverage-7.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ee30375b409d9a7ea0f30c50645d436b6f5dfee254edffd27e45a980ad2c7f4"}, + {file = "coverage-7.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b78729038abea6a5df0d2708dce21e82073463b2d79d10884d7d591e0f385ded"}, + {file = "coverage-7.0.5-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13250b1f0bd023e0c9f11838bdeb60214dd5b6aaf8e8d2f110c7e232a1bff83b"}, + {file = "coverage-7.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2c407b1950b2d2ffa091f4e225ca19a66a9bd81222f27c56bd12658fc5ca1209"}, + {file = "coverage-7.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c76a3075e96b9c9ff00df8b5f7f560f5634dffd1658bafb79eb2682867e94f78"}, + {file = "coverage-7.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f26648e1b3b03b6022b48a9b910d0ae209e2d51f50441db5dce5b530fad6d9b1"}, + {file = "coverage-7.0.5-cp39-cp39-win32.whl", hash = "sha256:ba3027deb7abf02859aca49c865ece538aee56dcb4871b4cced23ba4d5088904"}, + {file = "coverage-7.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:949844af60ee96a376aac1ded2a27e134b8c8d35cc006a52903fc06c24a3296f"}, + {file = "coverage-7.0.5-pp37.pp38.pp39-none-any.whl", hash = "sha256:b9727ac4f5cf2cbf87880a63870b5b9730a8ae3a4a360241a0fdaa2f71240ff0"}, + {file = "coverage-7.0.5.tar.gz", hash = "sha256:051afcbd6d2ac39298d62d340f94dbb6a1f31de06dfaf6fcef7b759dd3860c45"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "cython" +version = "0.29.33" +description = "The Cython compiler for writing C extensions for the Python language." +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "Cython-0.29.33-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:286cdfb193e23799e113b7bd5ac74f58da5e9a77c70e3b645b078836b896b165"}, + {file = "Cython-0.29.33-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:8507279a4f86ed8365b96603d5ad155888d4d01b72a9bbf0615880feda5a11d4"}, + {file = "Cython-0.29.33-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5bf5ffd96957a595441cca2fc78470d93fdc40dfe5449881b812ea6045d7e9be"}, + {file = "Cython-0.29.33-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d2019a7e54ba8b253f44411863b8f8c0b6cd623f7a92dc0ccb83892358c4283a"}, + {file = "Cython-0.29.33-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:190e60b7505d3b9b60130bcc2251c01b9ef52603420829c19d3c3ede4ac2763a"}, + {file = "Cython-0.29.33-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0168482495b75fea1c97a9641a95bac991f313e85f378003f9a4909fdeb3d454"}, + {file = "Cython-0.29.33-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:090556e41f2b30427dd3a1628d3613177083f47567a30148b6b7b8c7a5862187"}, + {file = "Cython-0.29.33-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:19c9913e9304bf97f1d2c357438895466f99aa2707d3c7a5e9de60c259e1ca1d"}, + {file = "Cython-0.29.33-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:afc9b6ab20889676c76e700ae6967aa6886a7efe5b05ef6d5b744a6ca793cc43"}, + {file = "Cython-0.29.33-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:49fb45b2bf12d6e2060bbd64506c06ac90e254f3a4bceb32c717f4964a1ae812"}, + {file = "Cython-0.29.33-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:5430f38d3d01c4715ec2aef5c41e02a2441c1c3a0149359c7a498e4c605b8e6c"}, + {file = "Cython-0.29.33-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c4d315443c7f4c61180b6c3ea9a9717ee7c901cc9db8d1d46fdf6556613840ed"}, + {file = "Cython-0.29.33-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6b4e6481e3e7e4d345640fe2fdc6dc57c94369b467f3dc280949daa8e9fd13b9"}, + {file = "Cython-0.29.33-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:060a2568ef80116a0a9dcaf3218a61c6007be0e0b77c5752c094ce5187a4d63c"}, + {file = "Cython-0.29.33-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:b67ddd32eaa2932a66bf8121accc36a7b3078593805519b0f00040f2b10a6a52"}, + {file = "Cython-0.29.33-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:1b507236ba3ca94170ce0a504dd03acf77307d4bfbc5a010a8031673f6b213a9"}, + {file = "Cython-0.29.33-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:581efc0622a9be05714222f2b4ac96a5419de58d5949517282d8df38155c8b9d"}, + {file = "Cython-0.29.33-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6b8bcbf8f1c3c46d6184be1e559e3a3fb8cdf27c6d507d8bc8ae04cfcbfd75f5"}, + {file = "Cython-0.29.33-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1ca93bbe584aee92094fd4fb6acc5cb6500acf98d4f57cc59244f0a598b0fcf6"}, + {file = "Cython-0.29.33-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:da490129e1e4ffaf3f88bfb46d338549a2150f60f809a63d385b83e00960d11a"}, + {file = "Cython-0.29.33-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4cadf5250eda0c5cdaf4c3a29b52be3e0695f4a2bf1ccd49b638d239752ea513"}, + {file = "Cython-0.29.33-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:bcb1a84fd2bd7885d572adc180e24fd8a7d4b0c104c144e33ccf84a1ab4eb2b8"}, + {file = "Cython-0.29.33-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:d78147ad8a3417ae6b371bbc5bfc6512f6ad4ad3fb71f5eef42e136e4ed14970"}, + {file = "Cython-0.29.33-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dd96b06b93c0e5fa4fc526c5be37c13a93e2fe7c372b5f358277ebe9e1620957"}, + {file = "Cython-0.29.33-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:959f0092d58e7fa00fd3434f7ff32fb78be7c2fa9f8e0096326343159477fe45"}, + {file = "Cython-0.29.33-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0455d5b92f461218bcf173a149a88b7396c3a109066274ccab5eff58db0eae32"}, + {file = "Cython-0.29.33-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:a9b0b890656e9d18a18e1efe26ea3d2d0f3e525a07a2a853592b0afc56a15c89"}, + {file = "Cython-0.29.33-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:b5e8ce3039ff64000d58cd45b3f6f83e13f032dde7f27bb1ab96070d9213550b"}, + {file = "Cython-0.29.33-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:e8922fa3d7e76b7186bbd0810e170ca61f83661ab1b29dc75e88ff2327aaf49d"}, + {file = "Cython-0.29.33-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f67b7306fd00d55f271009335cecadc506d144205c7891070aad889928d85750"}, + {file = "Cython-0.29.33-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:f271f90005064c49b47a93f456dc6cf0a21d21ef835bd33ac1e0db10ad51f84f"}, + {file = "Cython-0.29.33-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d4457d417ffbb94abc42adcd63a03b24ff39cf090f3e9eca5e10cfb90766cbe3"}, + {file = "Cython-0.29.33-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:0b53e017522feb8dcc2189cf1d2d344bab473c5bba5234390b5666d822992c7c"}, + {file = "Cython-0.29.33-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:4f88c2dc0653eef6468848eb8022faf64115b39734f750a1c01a7ba7eb04d89f"}, + {file = "Cython-0.29.33-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:1900d862a4a537d2125706740e9f3b016e80f7bbf7b54db6b3cc3d0bdf0f5c3a"}, + {file = "Cython-0.29.33-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:37bfca4f9f26361343d8c678f8178321e4ae5b919523eed05d2cd8ddbe6b06ec"}, + {file = "Cython-0.29.33-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a9863f8238642c0b1ef8069d99da5ade03bfe2225a64b00c5ae006d95f142a73"}, + {file = "Cython-0.29.33-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1dd503408924723b0bb10c0013b76e324eeee42db6deced9b02b648f1415d94c"}, + {file = "Cython-0.29.33-py2.py3-none-any.whl", hash = "sha256:8b99252bde8ff51cd06a3fe4aeacd3af9b4ff4a4e6b701ac71bddc54f5da61d6"}, + {file = "Cython-0.29.33.tar.gz", hash = "sha256:5040764c4a4d2ce964a395da24f0d1ae58144995dab92c6b96f44c3f4d72286a"}, +] + +[[package]] +name = "datasets" +version = "2.8.0" +description = "HuggingFace community-driven open-source library of datasets" +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "datasets-2.8.0-py3-none-any.whl", hash = "sha256:f36cb362bb5587659bab18e594b6d25d9d28486d735a571319c82efeb5a4e5df"}, + {file = "datasets-2.8.0.tar.gz", hash = "sha256:a843b69593914071f921fc1086fde939f30a63415a34cdda5db3c0acdd58aff2"}, +] + +[package.dependencies] +aiohttp = "*" +dill = "<0.3.7" +fsspec = {version = ">=2021.11.1", extras = ["http"]} +huggingface-hub = ">=0.2.0,<1.0.0" +multiprocess = "*" +numpy = ">=1.17" +packaging = "*" +pandas = "*" +pyarrow = ">=6.0.0" +pyyaml = ">=5.1" +requests = ">=2.19.0" +responses = "<0.19" +tqdm = ">=4.62.1" +xxhash = "*" + +[package.extras] +apache-beam = ["apache-beam (>=2.26.0)"] +audio = ["librosa"] +benchmarks = ["numpy (==1.18.5)", "tensorflow (==2.3.0)", "torch (==1.7.1)", "transformers (==3.0.2)"] +dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "black (>=22.0,<23.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "flake8 (>=3.8.3)", "isort (>=5.0.0)", "librosa", "lz4", "py7zr", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "torchaudio (<0.12.0)", "transformers", "zstandard"] +docs = ["s3fs"] +metrics-tests = ["Werkzeug (>=1.0.1)", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "sqlalchemy", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] +quality = ["black (>=22.0,<23.0)", "flake8 (>=3.8.3)", "isort (>=5.0.0)", "pyyaml (>=5.3.1)"] +s3 = ["s3fs"] +tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"] +tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"] +tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "librosa", "lz4", "py7zr", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "torchaudio (<0.12.0)", "transformers", "zstandard"] +torch = ["torch"] +vision = ["Pillow (>=6.2.1)"] + +[[package]] +name = "datasketch" +version = "1.5.8" +description = "Probabilistic data structures for processing and searching very large datasets" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "datasketch-1.5.8-py2.py3-none-any.whl", hash = "sha256:3d0c21bc6311cfc1559ccef4a1de0e1e94ceeafa285c604f641c7dfd140ce540"}, + {file = "datasketch-1.5.8.tar.gz", hash = "sha256:42879d09f1cdbdf78a20f05739791a9665a18bc3c59652b3f18e3d1051067a30"}, +] + +[package.dependencies] +numpy = ">=1.11" +scipy = ">=1.0.0" + +[package.extras] +benchmark = ["SetSimilaritySearch (>=0.1.7)", "matplotlib (>=3.1.2)", "nltk (>=3.4.5)", "pandas (>=0.25.3)", "pyfarmhash (>=0.2.2)", "pyhash (>=0.9.3)", "scikit-learn (>=0.21.3)", "scipy (>=1.3.3)"] +cassandra = ["cassandra-driver (>=3.20)"] +experimental-aio = ["aiounittest", "motor"] +redis = ["redis (>=2.10.0)"] +test = ["cassandra-driver (>=3.20)", "coverage", "mock (>=2.0.0)", "mockredispy", "nose (>=1.3.7)", "nose-exclude (>=0.5.0)", "pymongo (>=3.9.0)", "redis (>=2.10.0)"] + +[[package]] +name = "dill" +version = "0.3.6" +description = "serialize all of python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "dill-0.3.6-py3-none-any.whl", hash = "sha256:a07ffd2351b8c678dfc4a856a3005f8067aea51d6ba6c700796a4d9e280f39f0"}, + {file = "dill-0.3.6.tar.gz", hash = "sha256:e5db55f3687856d8fbdab002ed78544e1c4559a130302693d839dfe8f93f2373"}, +] + +[package.extras] +graph = ["objgraph (>=1.7.2)"] + +[[package]] +name = "exceptiongroup" +version = "1.1.0" +description = "Backport of PEP 654 (exception groups)" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.0-py3-none-any.whl", hash = "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e"}, + {file = "exceptiongroup-1.1.0.tar.gz", hash = "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "filelock" +version = "3.9.0" +description = "A platform independent file lock." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "filelock-3.9.0-py3-none-any.whl", hash = "sha256:f58d535af89bb9ad5cd4df046f741f8553a418c01a7856bf0d173bbc9f6bd16d"}, + {file = "filelock-3.9.0.tar.gz", hash = "sha256:7b319f24340b51f55a2bf7a12ac0755a9b03e718311dac567a0f4f7fabd2f5de"}, +] + +[package.extras] +docs = ["furo (>=2022.12.7)", "sphinx (>=5.3)", "sphinx-autodoc-typehints (>=1.19.5)"] +testing = ["covdefaults (>=2.2.2)", "coverage (>=7.0.1)", "pytest (>=7.2)", "pytest-cov (>=4)", "pytest-timeout (>=2.1)"] + +[[package]] +name = "fsspec" +version = "2022.11.0" +description = "File-system specification" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "fsspec-2022.11.0-py3-none-any.whl", hash = "sha256:d6e462003e3dcdcb8c7aa84c73a228f8227e72453cd22570e2363e8844edfe7b"}, + {file = "fsspec-2022.11.0.tar.gz", hash = "sha256:259d5fd5c8e756ff2ea72f42e7613c32667dc2049a4ac3d84364a7ca034acb8b"}, +] + +[package.dependencies] +aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""} +requests = {version = "*", optional = true, markers = "extra == \"http\""} + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +entrypoints = ["importlib-metadata"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +tqdm = ["tqdm"] + +[[package]] +name = "huggingface-hub" +version = "0.11.1" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "huggingface_hub-0.11.1-py3-none-any.whl", hash = "sha256:11eed7aab4fa4d1fb532f2aea3379ef4998d9f6bc24a330834dfedd3dac7f441"}, + {file = "huggingface_hub-0.11.1.tar.gz", hash = "sha256:8b9ebf9bbb1782f6f0419ec490973a6487c6c4ed84293a8a325d34c4f898f53f"}, +] + +[package.dependencies] +filelock = "*" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = "*" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "black (==22.3)", "flake8 (>=3.8.3)", "flake8-bugbear", "isort (>=5.5.4)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "black (==22.3)", "flake8 (>=3.8.3)", "flake8-bugbear", "isort (>=5.5.4)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +quality = ["black (==22.3)", "flake8 (>=3.8.3)", "flake8-bugbear", "isort (>=5.5.4)", "mypy (==0.982)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "isort (>=5.5.4)", "jedi", "pytest", "pytest-cov", "pytest-env", "soundfile"] +torch = ["torch"] +typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, + {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, + {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, + {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, + {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, + {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, + {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, + {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, + {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, + {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, + {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, + {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, + {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, +] + +[[package]] +name = "multiprocess" +version = "0.70.14" +description = "better multiprocessing and multithreading in python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "multiprocess-0.70.14-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:560a27540daef4ce8b24ed3cc2496a3c670df66c96d02461a4da67473685adf3"}, + {file = "multiprocess-0.70.14-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:bfbbfa36f400b81d1978c940616bc77776424e5e34cb0c94974b178d727cfcd5"}, + {file = "multiprocess-0.70.14-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:89fed99553a04ec4f9067031f83a886d7fdec5952005551a896a4b6a59575bb9"}, + {file = "multiprocess-0.70.14-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:40a5e3685462079e5fdee7c6789e3ef270595e1755199f0d50685e72523e1d2a"}, + {file = "multiprocess-0.70.14-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:44936b2978d3f2648727b3eaeab6d7fa0bedf072dc5207bf35a96d5ee7c004cf"}, + {file = "multiprocess-0.70.14-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e628503187b5d494bf29ffc52d3e1e57bb770ce7ce05d67c4bbdb3a0c7d3b05f"}, + {file = "multiprocess-0.70.14-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0d5da0fc84aacb0e4bd69c41b31edbf71b39fe2fb32a54eaedcaea241050855c"}, + {file = "multiprocess-0.70.14-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:6a7b03a5b98e911a7785b9116805bd782815c5e2bd6c91c6a320f26fd3e7b7ad"}, + {file = "multiprocess-0.70.14-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cea5bdedd10aace3c660fedeac8b087136b4366d4ee49a30f1ebf7409bce00ae"}, + {file = "multiprocess-0.70.14-py310-none-any.whl", hash = "sha256:7dc1f2f6a1d34894c8a9a013fbc807971e336e7cc3f3ff233e61b9dc679b3b5c"}, + {file = "multiprocess-0.70.14-py37-none-any.whl", hash = "sha256:93a8208ca0926d05cdbb5b9250a604c401bed677579e96c14da3090beb798193"}, + {file = "multiprocess-0.70.14-py38-none-any.whl", hash = "sha256:6725bc79666bbd29a73ca148a0fb5f4ea22eed4a8f22fce58296492a02d18a7b"}, + {file = "multiprocess-0.70.14-py39-none-any.whl", hash = "sha256:63cee628b74a2c0631ef15da5534c8aedbc10c38910b9c8b18dcd327528d1ec7"}, + {file = "multiprocess-0.70.14.tar.gz", hash = "sha256:3eddafc12f2260d27ae03fe6069b12570ab4764ab59a75e81624fac453fbf46a"}, +] + +[package.dependencies] +dill = ">=0.3.6" + +[[package]] +name = "numpy" +version = "1.24.1" +description = "Fundamental package for array computing in Python" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "numpy-1.24.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:179a7ef0889ab769cc03573b6217f54c8bd8e16cef80aad369e1e8185f994cd7"}, + {file = "numpy-1.24.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b09804ff570b907da323b3d762e74432fb07955701b17b08ff1b5ebaa8cfe6a9"}, + {file = "numpy-1.24.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1b739841821968798947d3afcefd386fa56da0caf97722a5de53e07c4ccedc7"}, + {file = "numpy-1.24.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e3463e6ac25313462e04aea3fb8a0a30fb906d5d300f58b3bc2c23da6a15398"}, + {file = "numpy-1.24.1-cp310-cp310-win32.whl", hash = "sha256:b31da69ed0c18be8b77bfce48d234e55d040793cebb25398e2a7d84199fbc7e2"}, + {file = "numpy-1.24.1-cp310-cp310-win_amd64.whl", hash = "sha256:b07b40f5fb4fa034120a5796288f24c1fe0e0580bbfff99897ba6267af42def2"}, + {file = "numpy-1.24.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7094891dcf79ccc6bc2a1f30428fa5edb1e6fb955411ffff3401fb4ea93780a8"}, + {file = "numpy-1.24.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:28e418681372520c992805bb723e29d69d6b7aa411065f48216d8329d02ba032"}, + {file = "numpy-1.24.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e274f0f6c7efd0d577744f52032fdd24344f11c5ae668fe8d01aac0422611df1"}, + {file = "numpy-1.24.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0044f7d944ee882400890f9ae955220d29b33d809a038923d88e4e01d652acd9"}, + {file = "numpy-1.24.1-cp311-cp311-win32.whl", hash = "sha256:442feb5e5bada8408e8fcd43f3360b78683ff12a4444670a7d9e9824c1817d36"}, + {file = "numpy-1.24.1-cp311-cp311-win_amd64.whl", hash = "sha256:de92efa737875329b052982e37bd4371d52cabf469f83e7b8be9bb7752d67e51"}, + {file = "numpy-1.24.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b162ac10ca38850510caf8ea33f89edcb7b0bb0dfa5592d59909419986b72407"}, + {file = "numpy-1.24.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26089487086f2648944f17adaa1a97ca6aee57f513ba5f1c0b7ebdabbe2b9954"}, + {file = "numpy-1.24.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caf65a396c0d1f9809596be2e444e3bd4190d86d5c1ce21f5fc4be60a3bc5b36"}, + {file = "numpy-1.24.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0677a52f5d896e84414761531947c7a330d1adc07c3a4372262f25d84af7bf7"}, + {file = "numpy-1.24.1-cp38-cp38-win32.whl", hash = "sha256:dae46bed2cb79a58d6496ff6d8da1e3b95ba09afeca2e277628171ca99b99db1"}, + {file = "numpy-1.24.1-cp38-cp38-win_amd64.whl", hash = "sha256:6ec0c021cd9fe732e5bab6401adea5a409214ca5592cd92a114f7067febcba0c"}, + {file = "numpy-1.24.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:28bc9750ae1f75264ee0f10561709b1462d450a4808cd97c013046073ae64ab6"}, + {file = "numpy-1.24.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:84e789a085aabef2f36c0515f45e459f02f570c4b4c4c108ac1179c34d475ed7"}, + {file = "numpy-1.24.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e669fbdcdd1e945691079c2cae335f3e3a56554e06bbd45d7609a6cf568c700"}, + {file = "numpy-1.24.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef85cf1f693c88c1fd229ccd1055570cb41cdf4875873b7728b6301f12cd05bf"}, + {file = "numpy-1.24.1-cp39-cp39-win32.whl", hash = "sha256:87a118968fba001b248aac90e502c0b13606721b1343cdaddbc6e552e8dfb56f"}, + {file = "numpy-1.24.1-cp39-cp39-win_amd64.whl", hash = "sha256:ddc7ab52b322eb1e40521eb422c4e0a20716c271a306860979d450decbb51b8e"}, + {file = "numpy-1.24.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ed5fb71d79e771ec930566fae9c02626b939e37271ec285e9efaf1b5d4370e7d"}, + {file = "numpy-1.24.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad2925567f43643f51255220424c23d204024ed428afc5aad0f86f3ffc080086"}, + {file = "numpy-1.24.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:cfa1161c6ac8f92dea03d625c2d0c05e084668f4a06568b77a25a89111621566"}, + {file = "numpy-1.24.1.tar.gz", hash = "sha256:2386da9a471cc00a1f47845e27d916d5ec5346ae9696e01a8a34760858fe9dd2"}, +] + +[[package]] +name = "packaging" +version = "23.0" +description = "Core utilities for Python packages" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"}, + {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"}, +] + +[[package]] +name = "pandas" +version = "1.5.2" +description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pandas-1.5.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e9dbacd22555c2d47f262ef96bb4e30880e5956169741400af8b306bbb24a273"}, + {file = "pandas-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e2b83abd292194f350bb04e188f9379d36b8dfac24dd445d5c87575f3beaf789"}, + {file = "pandas-1.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2552bffc808641c6eb471e55aa6899fa002ac94e4eebfa9ec058649122db5824"}, + {file = "pandas-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fc87eac0541a7d24648a001d553406f4256e744d92df1df8ebe41829a915028"}, + {file = "pandas-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0d8fd58df5d17ddb8c72a5075d87cd80d71b542571b5f78178fb067fa4e9c72"}, + {file = "pandas-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:4aed257c7484d01c9a194d9a94758b37d3d751849c05a0050c087a358c41ad1f"}, + {file = "pandas-1.5.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:375262829c8c700c3e7cbb336810b94367b9c4889818bbd910d0ecb4e45dc261"}, + {file = "pandas-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc3cd122bea268998b79adebbb8343b735a5511ec14efb70a39e7acbc11ccbdc"}, + {file = "pandas-1.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b4f5a82afa4f1ff482ab8ded2ae8a453a2cdfde2001567b3ca24a4c5c5ca0db3"}, + {file = "pandas-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8092a368d3eb7116e270525329a3e5c15ae796ccdf7ccb17839a73b4f5084a39"}, + {file = "pandas-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6257b314fc14958f8122779e5a1557517b0f8e500cfb2bd53fa1f75a8ad0af2"}, + {file = "pandas-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:82ae615826da838a8e5d4d630eb70c993ab8636f0eff13cb28aafc4291b632b5"}, + {file = "pandas-1.5.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:457d8c3d42314ff47cc2d6c54f8fc0d23954b47977b2caed09cd9635cb75388b"}, + {file = "pandas-1.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c009a92e81ce836212ce7aa98b219db7961a8b95999b97af566b8dc8c33e9519"}, + {file = "pandas-1.5.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:71f510b0efe1629bf2f7c0eadb1ff0b9cf611e87b73cd017e6b7d6adb40e2b3a"}, + {file = "pandas-1.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a40dd1e9f22e01e66ed534d6a965eb99546b41d4d52dbdb66565608fde48203f"}, + {file = "pandas-1.5.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ae7e989f12628f41e804847a8cc2943d362440132919a69429d4dea1f164da0"}, + {file = "pandas-1.5.2-cp38-cp38-win32.whl", hash = "sha256:530948945e7b6c95e6fa7aa4be2be25764af53fba93fe76d912e35d1c9ee46f5"}, + {file = "pandas-1.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:73f219fdc1777cf3c45fde7f0708732ec6950dfc598afc50588d0d285fddaefc"}, + {file = "pandas-1.5.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9608000a5a45f663be6af5c70c3cbe634fa19243e720eb380c0d378666bc7702"}, + {file = "pandas-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:315e19a3e5c2ab47a67467fc0362cb36c7c60a93b6457f675d7d9615edad2ebe"}, + {file = "pandas-1.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e18bc3764cbb5e118be139b3b611bc3fbc5d3be42a7e827d1096f46087b395eb"}, + {file = "pandas-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0183cb04a057cc38fde5244909fca9826d5d57c4a5b7390c0cc3fa7acd9fa883"}, + {file = "pandas-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:344021ed3e639e017b452aa8f5f6bf38a8806f5852e217a7594417fb9bbfa00e"}, + {file = "pandas-1.5.2-cp39-cp39-win32.whl", hash = "sha256:e7469271497960b6a781eaa930cba8af400dd59b62ec9ca2f4d31a19f2f91090"}, + {file = "pandas-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:c218796d59d5abd8780170c937b812c9637e84c32f8271bbf9845970f8c1351f"}, + {file = "pandas-1.5.2.tar.gz", hash = "sha256:220b98d15cee0b2cd839a6358bd1f273d0356bf964c1a1aeb32d47db0215488b"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, +] +python-dateutil = ">=2.8.1" +pytz = ">=2020.1" + +[package.extras] +test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pyarrow" +version = "10.0.1" +description = "Python library for Apache Arrow" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyarrow-10.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:e00174764a8b4e9d8d5909b6d19ee0c217a6cf0232c5682e31fdfbd5a9f0ae52"}, + {file = "pyarrow-10.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f7a7dbe2f7f65ac1d0bd3163f756deb478a9e9afc2269557ed75b1b25ab3610"}, + {file = "pyarrow-10.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb627673cb98708ef00864e2e243f51ba7b4c1b9f07a1d821f98043eccd3f585"}, + {file = "pyarrow-10.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba71e6fc348c92477586424566110d332f60d9a35cb85278f42e3473bc1373da"}, + {file = "pyarrow-10.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7b4ede715c004b6fc535de63ef79fa29740b4080639a5ff1ea9ca84e9282f349"}, + {file = "pyarrow-10.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:e3fe5049d2e9ca661d8e43fab6ad5a4c571af12d20a57dffc392a014caebef65"}, + {file = "pyarrow-10.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:254017ca43c45c5098b7f2a00e995e1f8346b0fb0be225f042838323bb55283c"}, + {file = "pyarrow-10.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70acca1ece4322705652f48db65145b5028f2c01c7e426c5d16a30ba5d739c24"}, + {file = "pyarrow-10.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abb57334f2c57979a49b7be2792c31c23430ca02d24becd0b511cbe7b6b08649"}, + {file = "pyarrow-10.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:1765a18205eb1e02ccdedb66049b0ec148c2a0cb52ed1fb3aac322dfc086a6ee"}, + {file = "pyarrow-10.0.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:61f4c37d82fe00d855d0ab522c685262bdeafd3fbcb5fe596fe15025fbc7341b"}, + {file = "pyarrow-10.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e141a65705ac98fa52a9113fe574fdaf87fe0316cde2dffe6b94841d3c61544c"}, + {file = "pyarrow-10.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf26f809926a9d74e02d76593026f0aaeac48a65b64f1bb17eed9964bfe7ae1a"}, + {file = "pyarrow-10.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:443eb9409b0cf78df10ced326490e1a300205a458fbeb0767b6b31ab3ebae6b2"}, + {file = "pyarrow-10.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:f2d00aa481becf57098e85d99e34a25dba5a9ade2f44eb0b7d80c80f2984fc03"}, + {file = "pyarrow-10.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b1fc226d28c7783b52a84d03a66573d5a22e63f8a24b841d5fc68caeed6784d4"}, + {file = "pyarrow-10.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efa59933b20183c1c13efc34bd91efc6b2997377c4c6ad9272da92d224e3beb1"}, + {file = "pyarrow-10.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:668e00e3b19f183394388a687d29c443eb000fb3fe25599c9b4762a0afd37775"}, + {file = "pyarrow-10.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1bc6e4d5d6f69e0861d5d7f6cf4d061cf1069cb9d490040129877acf16d4c2a"}, + {file = "pyarrow-10.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:42ba7c5347ce665338f2bc64685d74855900200dac81a972d49fe127e8132f75"}, + {file = "pyarrow-10.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b069602eb1fc09f1adec0a7bdd7897f4d25575611dfa43543c8b8a75d99d6874"}, + {file = "pyarrow-10.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94fb4a0c12a2ac1ed8e7e2aa52aade833772cf2d3de9dde685401b22cec30002"}, + {file = "pyarrow-10.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db0c5986bf0808927f49640582d2032a07aa49828f14e51f362075f03747d198"}, + {file = "pyarrow-10.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:0ec7587d759153f452d5263dbc8b1af318c4609b607be2bd5127dcda6708cdb1"}, + {file = "pyarrow-10.0.1.tar.gz", hash = "sha256:1a14f57a5f472ce8234f2964cd5184cccaa8df7e04568c64edc33b23eb285dd5"}, +] + +[package.dependencies] +numpy = ">=1.16.6" + +[[package]] +name = "pytest" +version = "7.2.0" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.2.0-py3-none-any.whl", hash = "sha256:892f933d339f068883b6fd5a459f03d85bfcb355e4981e146d2c7616c21fef71"}, + {file = "pytest-7.2.0.tar.gz", hash = "sha256:c4014eb40e10f11f355ad4e3c2fb2c6c6d1919c73f3b5a433de4708202cade59"}, +] + +[package.dependencies] +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "pytest-cov" +version = "4.0.0" +description = "Pytest plugin for measuring coverage." +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pytest-cov-4.0.0.tar.gz", hash = "sha256:996b79efde6433cdbd0088872dbc5fb3ed7fe1578b68cdbba634f14bb8dd0470"}, + {file = "pytest_cov-4.0.0-py3-none-any.whl", hash = "sha256:2feb1b751d66a8bd934e5edfa2e961d11309dc37b73b0eabe73b5945fee20f6b"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] + +[[package]] +name = "pytest-cover" +version = "3.0.0" +description = "Pytest plugin for measuring coverage. Forked from `pytest-cov`." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pytest-cover-3.0.0.tar.gz", hash = "sha256:5bdb6c1cc3dd75583bb7bc2c57f5e1034a1bfcb79d27c71aceb0b16af981dbf4"}, + {file = "pytest_cover-3.0.0-py2.py3-none-any.whl", hash = "sha256:578249955eb3b5f3991209df6e532bb770b647743b7392d3d97698dc02f39ebb"}, +] + +[package.dependencies] +pytest-cov = ">=2.0" + +[[package]] +name = "pytest-coverage" +version = "0.0" +description = "Pytest plugin for measuring coverage. Forked from `pytest-cov`." +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "pytest-coverage-0.0.tar.gz", hash = "sha256:db6af2cbd7e458c7c9fd2b4207cee75258243c8a81cad31a7ee8cfad5be93c05"}, + {file = "pytest_coverage-0.0-py2.py3-none-any.whl", hash = "sha256:dedd084c5e74d8e669355325916dc011539b190355021b037242514dee546368"}, +] + +[package.dependencies] +pytest-cover = "*" + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2022.7" +description = "World timezone definitions, modern and historical" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2022.7-py2.py3-none-any.whl", hash = "sha256:93007def75ae22f7cd991c84e02d434876818661f8df9ad5df9e950ff4e52cfd"}, + {file = "pytz-2022.7.tar.gz", hash = "sha256:7ccfae7b4b2c067464a6733c6261673fdb8fd1be905460396b97a073e9fa683a"}, +] + +[[package]] +name = "pyyaml" +version = "6.0" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, + {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, + {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, + {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, + {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, + {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, + {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, + {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, + {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, + {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, + {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, + {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, + {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, + {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, + {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, + {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, + {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, + {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, +] + +[[package]] +name = "requests" +version = "2.28.2" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" +files = [ + {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, + {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "responses" +version = "0.18.0" +description = "A utility library for mocking out the `requests` Python library." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "responses-0.18.0-py3-none-any.whl", hash = "sha256:15c63ad16de13ee8e7182d99c9334f64fd81f1ee79f90748d527c28f7ca9dd51"}, + {file = "responses-0.18.0.tar.gz", hash = "sha256:380cad4c1c1dc942e5e8a8eaae0b4d4edf708f4f010db8b7bcfafad1fcd254ff"}, +] + +[package.dependencies] +requests = ">=2.0,<3.0" +urllib3 = ">=1.25.10" + +[package.extras] +tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=4.6)", "pytest-cov", "pytest-localserver", "types-mock", "types-requests"] + +[[package]] +name = "scipy" +version = "1.9.3" +description = "Fundamental algorithms for scientific computing in Python" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scipy-1.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1884b66a54887e21addf9c16fb588720a8309a57b2e258ae1c7986d4444d3bc0"}, + {file = "scipy-1.9.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:83b89e9586c62e787f5012e8475fbb12185bafb996a03257e9675cd73d3736dd"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a72d885fa44247f92743fc20732ae55564ff2a519e8302fb7e18717c5355a8b"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01e1dd7b15bd2449c8bfc6b7cc67d630700ed655654f0dfcf121600bad205c9"}, + {file = "scipy-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:68239b6aa6f9c593da8be1509a05cb7f9efe98b80f43a5861cd24c7557e98523"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b41bc822679ad1c9a5f023bc93f6d0543129ca0f37c1ce294dd9d386f0a21096"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:90453d2b93ea82a9f434e4e1cba043e779ff67b92f7a0e85d05d286a3625df3c"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c06e62a390a9167da60bedd4575a14c1f58ca9dfde59830fc42e5197283dab"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abaf921531b5aeaafced90157db505e10345e45038c39e5d9b6c7922d68085cb"}, + {file = "scipy-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:06d2e1b4c491dc7d8eacea139a1b0b295f74e1a1a0f704c375028f8320d16e31"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5a04cd7d0d3eff6ea4719371cbc44df31411862b9646db617c99718ff68d4840"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:545c83ffb518094d8c9d83cce216c0c32f8c04aaf28b92cc8283eda0685162d5"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d54222d7a3ba6022fdf5773931b5d7c56efe41ede7f7128c7b1637700409108"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff3a5295234037e39500d35316a4c5794739433528310e117b8a9a0c76d20fc"}, + {file = "scipy-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:2318bef588acc7a574f5bfdff9c172d0b1bf2c8143d9582e05f878e580a3781e"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d644a64e174c16cb4b2e41dfea6af722053e83d066da7343f333a54dae9bc31c"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:da8245491d73ed0a994ed9c2e380fd058ce2fa8a18da204681f2fe1f57f98f95"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4db5b30849606a95dcf519763dd3ab6fe9bd91df49eba517359e450a7d80ce2e"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c68db6b290cbd4049012990d7fe71a2abd9ffbe82c0056ebe0f01df8be5436b0"}, + {file = "scipy-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:5b88e6d91ad9d59478fafe92a7c757d00c59e3bdc3331be8ada76a4f8d683f58"}, + {file = "scipy-1.9.3.tar.gz", hash = "sha256:fbc5c05c85c1a02be77b1ff591087c83bc44579c6d2bd9fb798bb64ea5e1a027"}, +] + +[package.dependencies] +numpy = ">=1.18.5,<1.26.0" + +[package.extras] +dev = ["flake8", "mypy", "pycodestyle", "typing_extensions"] +doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-panels (>=0.5.2)", "sphinx-tabs"] +test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "tqdm" +version = "4.64.1" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" +files = [ + {file = "tqdm-4.64.1-py2.py3-none-any.whl", hash = "sha256:6fee160d6ffcd1b1c68c65f14c829c22832bc401726335ce92c52d395944a6a1"}, + {file = "tqdm-4.64.1.tar.gz", hash = "sha256:5f4f682a004951c1b450bc753c710e9280c5746ce6ffedee253ddbcbf54cf1e4"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "typer" +version = "0.7.0" +description = "Typer, build great CLIs. Easy to code. Based on Python type hints." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "typer-0.7.0-py3-none-any.whl", hash = "sha256:b5e704f4e48ec263de1c0b3a2387cd405a13767d2f907f44c1a08cbad96f606d"}, + {file = "typer-0.7.0.tar.gz", hash = "sha256:ff797846578a9f2a201b53442aedeb543319466870fbe1c701eab66dd7681165"}, +] + +[package.dependencies] +click = ">=7.1.1,<9.0.0" + +[package.extras] +all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"] +dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"] +doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"] +test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"] + +[[package]] +name = "typing-extensions" +version = "4.4.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"}, + {file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"}, +] + +[[package]] +name = "urllib3" +version = "1.26.14" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"}, + {file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "xxhash" +version = "3.2.0" +description = "Python binding for xxHash" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "xxhash-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:af44b9e59c4b2926a4e3c7f9d29949ff42fcea28637ff6b8182e654461932be8"}, + {file = "xxhash-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1bdd57973e2b802ef32553d7bebf9402dac1557874dbe5c908b499ea917662cd"}, + {file = "xxhash-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b7c9aa77bbce61a5e681bd39cb6a804338474dcc90abe3c543592aa5d6c9a9b"}, + {file = "xxhash-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11bf87dc7bb8c3b0b5e24b7b941a9a19d8c1f88120b6a03a17264086bc8bb023"}, + {file = "xxhash-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2783d41487ce6d379fdfaa7332fca5187bf7010b9bddcf20cafba923bc1dc665"}, + {file = "xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:561076ca0dcef2fbc20b2bc2765bff099e002e96041ae9dbe910a863ca6ee3ea"}, + {file = "xxhash-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a26eeb4625a6e61cedc8c1b39b89327c9c7e1a8c2c4d786fe3f178eb839ede6"}, + {file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d93a44d0104d1b9b10de4e7aadf747f6efc1d7ec5ed0aa3f233a720725dd31bd"}, + {file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:89585adc73395a10306d2e2036e50d6c4ac0cf8dd47edf914c25488871b64f6d"}, + {file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a892b4b139126a86bfdcb97cd912a2f8c4e8623869c3ef7b50871451dd7afeb0"}, + {file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:e998efb190653f70e0f30d92b39fc645145369a4823bee46af8ddfc244aa969d"}, + {file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e8ed3bd2b8bb3277710843ca63e4f5c3ee6f8f80b083be5b19a7a9905420d11e"}, + {file = "xxhash-3.2.0-cp310-cp310-win32.whl", hash = "sha256:20181cbaed033c72cb881b2a1d13c629cd1228f113046133469c9a48cfcbcd36"}, + {file = "xxhash-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:a0f7a16138279d707db778a63264d1d6016ac13ffd3f1e99f54b2855d6c0d8e1"}, + {file = "xxhash-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5daff3fb5bfef30bc5a2cb143810d376d43461445aa17aece7210de52adbe151"}, + {file = "xxhash-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75bb5be3c5de702a547715f320ecf5c8014aeca750ed5147ca75389bd22e7343"}, + {file = "xxhash-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01f36b671ff55cb1d5c2f6058b799b697fd0ae4b4582bba6ed0999678068172a"}, + {file = "xxhash-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4d4519123aac73c93159eb8f61db9682393862dd669e7eae034ecd0a35eadac"}, + {file = "xxhash-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:994e4741d5ed70fc2a335a91ef79343c6b1089d7dfe6e955dd06f8ffe82bede6"}, + {file = "xxhash-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:919bc1b010aa6ff0eb918838ff73a435aed9e9a19c3202b91acecd296bf75607"}, + {file = "xxhash-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:17b65454c5accbb079c45eca546c27c4782f5175aa320758fafac896b1549d27"}, + {file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b0c094d5e65a46dbf3fe0928ff20873a747e6abfd2ed4b675beeb2750624bc2e"}, + {file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f94163ebe2d5546e6a5977e96d83621f4689c1054053428cf8d4c28b10f92f69"}, + {file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cead7c0307977a00b3f784cff676e72c147adbcada19a2e6fc2ddf54f37cf387"}, + {file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a0e1bd0260c1da35c1883321ce2707ceea07127816ab625e1226ec95177b561a"}, + {file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc8878935671490efe9275fb4190a6062b73277bd273237179b9b5a2aa436153"}, + {file = "xxhash-3.2.0-cp311-cp311-win32.whl", hash = "sha256:a433f6162b18d52f7068175d00bd5b1563b7405f926a48d888a97b90a160c40d"}, + {file = "xxhash-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:a32d546a1752e4ee7805d6db57944f7224afa7428d22867006b6486e4195c1f3"}, + {file = "xxhash-3.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:82daaab720866bf690b20b49de5640b0c27e3b8eea2d08aa75bdca2b0f0cfb63"}, + {file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3126df6520cbdbaddd87ce74794b2b6c45dd2cf6ac2b600a374b8cdb76a2548c"}, + {file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e172c1ee40507ae3b8d220f4048aaca204f203e1e4197e8e652f5c814f61d1aa"}, + {file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5384f1d9f30876f5d5b618464fb19ff7ce6c0fe4c690fbaafd1c52adc3aae807"}, + {file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26cb52174a7e96a17acad27a3ca65b24713610ac479c99ac9640843822d3bebf"}, + {file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fbcd613a5e76b1495fc24db9c37a6b7ee5f214fd85979187ec4e032abfc12ded"}, + {file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:f988daf25f31726d5b9d0be6af636ca9000898f9ea43a57eac594daea25b0948"}, + {file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:bbc30c98ab006ab9fc47e5ed439c00f706bc9d4441ff52693b8b6fea335163e0"}, + {file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:2408d49260b0a4a7cc6ba445aebf38e073aeaf482f8e32767ca477e32ccbbf9e"}, + {file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:3f4152fd0bf8b03b79f2f900fd6087a66866537e94b5a11fd0fd99ef7efe5c42"}, + {file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:0eea848758e4823a01abdbcccb021a03c1ee4100411cbeeb7a5c36a202a0c13c"}, + {file = "xxhash-3.2.0-cp36-cp36m-win32.whl", hash = "sha256:77709139af5123c578ab06cf999429cdb9ab211047acd0c787e098dcb3f1cb4d"}, + {file = "xxhash-3.2.0-cp36-cp36m-win_amd64.whl", hash = "sha256:91687671fd9d484a4e201ad266d366b695a45a1f2b41be93d116ba60f1b8f3b3"}, + {file = "xxhash-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e4af8bc5c3fcc2192c266421c6aa2daab1a18e002cb8e66ef672030e46ae25cf"}, + {file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8be562e2ce3e481d9209b6f254c3d7c5ff920eb256aba2380d2fb5ba75d4f87"}, + {file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9eba0c7c12126b12f7fcbea5513f28c950d28f33d2a227f74b50b77789e478e8"}, + {file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2198c4901a0223c48f6ec0a978b60bca4f4f7229a11ca4dc96ca325dd6a29115"}, + {file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50ce82a71b22a3069c02e914bf842118a53065e2ec1c6fb54786e03608ab89cc"}, + {file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b5019fb33711c30e54e4e57ae0ca70af9d35b589d385ac04acd6954452fa73bb"}, + {file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0d54ac023eef7e3ac9f0b8841ae8a376b933043bc2ad428121346c6fa61c491c"}, + {file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c55fa832fc3fe64e0d29da5dc9b50ba66ca93312107cec2709300ea3d3bab5c7"}, + {file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:f4ce006215497993ae77c612c1883ca4f3973899573ce0c52fee91f0d39c4561"}, + {file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:1afb9b9d27fd675b436cb110c15979976d92d761ad6e66799b83756402f3a974"}, + {file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:baa99cebf95c1885db21e119395f222a706a2bb75a545f0672880a442137725e"}, + {file = "xxhash-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:75aa692936942ccb2e8fd6a386c81c61630ac1b6d6e921698122db8a930579c3"}, + {file = "xxhash-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0a2cdfb5cae9fafb9f7b65fd52ecd60cf7d72c13bb2591ea59aaefa03d5a8827"}, + {file = "xxhash-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a68d1e8a390b660d94b9360ae5baa8c21a101bd9c4790a8b30781bada9f1fc6"}, + {file = "xxhash-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ce7c3ce28f94302df95eaea7c9c1e2c974b6d15d78a0c82142a97939d7b6c082"}, + {file = "xxhash-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0dcb419bf7b0bc77d366e5005c25682249c5521a63fd36c51f584bd91bb13bd5"}, + {file = "xxhash-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae521ed9287f86aac979eeac43af762f03d9d9797b2272185fb9ddd810391216"}, + {file = "xxhash-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0d16775094423088ffa357d09fbbb9ab48d2fb721d42c0856b801c86f616eec"}, + {file = "xxhash-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe454aeab348c42f56d6f7434ff758a3ef90787ac81b9ad5a363cd61b90a1b0b"}, + {file = "xxhash-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:052fd0efdd5525c2dbc61bebb423d92aa619c4905bba605afbf1e985a562a231"}, + {file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:02badf3754e2133de254a4688798c4d80f0060635087abcb461415cb3eb82115"}, + {file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:66b8a90b28c13c2aae7a71b32638ceb14cefc2a1c8cf23d8d50dfb64dfac7aaf"}, + {file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:649cdf19df175925ad87289ead6f760cd840730ee85abc5eb43be326a0a24d97"}, + {file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4b948a03f89f5c72d69d40975af8af241111f0643228796558dc1cae8f5560b0"}, + {file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49f51fab7b762da7c2cee0a3d575184d3b9be5e2f64f26cae2dd286258ac9b3c"}, + {file = "xxhash-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1a42994f0d42b55514785356722d9031f064fd34e495b3a589e96db68ee0179d"}, + {file = "xxhash-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:0a6d58ba5865475e53d6c2c4fa6a62e2721e7875e146e2681e5337a6948f12e7"}, + {file = "xxhash-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:aabdbc082030f8df613e2d2ea1f974e7ad36a539bdfc40d36f34e55c7e4b8e94"}, + {file = "xxhash-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:498843b66b9ca416e9d03037e5875c8d0c0ab9037527e22df3b39aa5163214cd"}, + {file = "xxhash-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a910b1193cd90af17228f5d6069816646df0148f14f53eefa6b2b11a1dedfcd0"}, + {file = "xxhash-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb6d8ce31dc25faf4da92991320e211fa7f42de010ef51937b1dc565a4926501"}, + {file = "xxhash-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:883dc3d3942620f4c7dbc3fd6162f50a67f050b714e47da77444e3bcea7d91cc"}, + {file = "xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59dc8bfacf89b8f5be54d55bc3b4bd6d74d0c5320c8a63d2538ac7df5b96f1d5"}, + {file = "xxhash-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:61e6aa1d30c2af692aa88c4dd48709426e8b37bff6a574ee2de677579c34a3d6"}, + {file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:314ec0bd21f0ee8d30f2bd82ed3759314bd317ddbbd8555668f3d20ab7a8899a"}, + {file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:dad638cde3a5357ad3163b80b3127df61fb5b5e34e9e05a87697144400ba03c7"}, + {file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:eaa3ea15025b56076d806b248948612289b093e8dcda8d013776b3848dffff15"}, + {file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7deae3a312feb5c17c97cbf18129f83cbd3f1f9ec25b0f50e2bd9697befb22e7"}, + {file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:add774341c09853b1612c64a526032d95ab1683053325403e1afbe3ad2f374c5"}, + {file = "xxhash-3.2.0-cp39-cp39-win32.whl", hash = "sha256:9b94749130ef3119375c599bfce82142c2500ef9ed3280089157ee37662a7137"}, + {file = "xxhash-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:e57d94a1552af67f67b27db5dba0b03783ea69d5ca2af2f40e098f0ba3ce3f5f"}, + {file = "xxhash-3.2.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92fd765591c83e5c5f409b33eac1d3266c03d3d11c71a7dbade36d5cdee4fbc0"}, + {file = "xxhash-3.2.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8970f6a411a9839a02b23b7e90bbbba4a6de52ace009274998566dc43f36ca18"}, + {file = "xxhash-3.2.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5f3e33fe6cbab481727f9aeb136a213aed7e33cd1ca27bd75e916ffacc18411"}, + {file = "xxhash-3.2.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:368265392cb696dd53907e2328b5a8c1bee81cf2142d0cc743caf1c1047abb36"}, + {file = "xxhash-3.2.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3b1f3c6d67fa9f49c4ff6b25ce0e7143bab88a5bc0f4116dd290c92337d0ecc7"}, + {file = "xxhash-3.2.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c5e8db6e1ee7267b7c412ad0afd5863bf7a95286b8333a5958c8097c69f94cf5"}, + {file = "xxhash-3.2.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:761df3c7e2c5270088b691c5a8121004f84318177da1ca1db64222ec83c44871"}, + {file = "xxhash-3.2.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2d15a707e7f689531eb4134eccb0f8bf3844bb8255ad50823aa39708d9e6755"}, + {file = "xxhash-3.2.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6b2ba4ff53dd5f57d728095e3def7375eb19c90621ce3b41b256de84ec61cfd"}, + {file = "xxhash-3.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:61b0bcf946fdfd8ab5f09179dc2b5c74d1ef47cedfc6ed0ec01fdf0ee8682dd3"}, + {file = "xxhash-3.2.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f7b79f0f302396d8e0d444826ceb3d07b61977793886ebae04e82796c02e42dc"}, + {file = "xxhash-3.2.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0773cd5c438ffcd5dbff91cdd503574f88a4b960e70cedeb67736583a17a918"}, + {file = "xxhash-3.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ec1f57127879b419a2c8d2db9d9978eb26c61ae17e5972197830430ae78d25b"}, + {file = "xxhash-3.2.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d4b15c00e807b1d3d0b612338c814739dec310b80fb069bd732b98ddc709ad7"}, + {file = "xxhash-3.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:9d3f686e3d1c8900c5459eee02b60c7399e20ec5c6402364068a343c83a61d90"}, + {file = "xxhash-3.2.0.tar.gz", hash = "sha256:1afd47af8955c5db730f630ad53ae798cf7fae0acb64cebb3cf94d35c47dd088"}, +] + +[[package]] +name = "yarl" +version = "1.8.2" +description = "Yet another URL library" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bb81f753c815f6b8e2ddd2eef3c855cf7da193b82396ac013c661aaa6cc6b0a5"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:47d49ac96156f0928f002e2424299b2c91d9db73e08c4cd6742923a086f1c863"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3fc056e35fa6fba63248d93ff6e672c096f95f7836938241ebc8260e062832fe"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58a3c13d1c3005dbbac5c9f0d3210b60220a65a999b1833aa46bd6677c69b08e"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10b08293cda921157f1e7c2790999d903b3fd28cd5c208cf8826b3b508026996"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de986979bbd87272fe557e0a8fcb66fd40ae2ddfe28a8b1ce4eae22681728fef"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c4fcfa71e2c6a3cb568cf81aadc12768b9995323186a10827beccf5fa23d4f8"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae4d7ff1049f36accde9e1ef7301912a751e5bae0a9d142459646114c70ecba6"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bf071f797aec5b96abfc735ab97da9fd8f8768b43ce2abd85356a3127909d146"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:74dece2bfc60f0f70907c34b857ee98f2c6dd0f75185db133770cd67300d505f"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:df60a94d332158b444301c7f569659c926168e4d4aad2cfbf4bce0e8fb8be826"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:63243b21c6e28ec2375f932a10ce7eda65139b5b854c0f6b82ed945ba526bff3"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cfa2bbca929aa742b5084fd4663dd4b87c191c844326fcb21c3afd2d11497f80"}, + {file = "yarl-1.8.2-cp310-cp310-win32.whl", hash = "sha256:b05df9ea7496df11b710081bd90ecc3a3db6adb4fee36f6a411e7bc91a18aa42"}, + {file = "yarl-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:24ad1d10c9db1953291f56b5fe76203977f1ed05f82d09ec97acb623a7976574"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a1fca9588f360036242f379bfea2b8b44cae2721859b1c56d033adfd5893634"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f37db05c6051eff17bc832914fe46869f8849de5b92dc4a3466cd63095d23dfd"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77e913b846a6b9c5f767b14dc1e759e5aff05502fe73079f6f4176359d832581"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0978f29222e649c351b173da2b9b4665ad1feb8d1daa9d971eb90df08702668a"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388a45dc77198b2460eac0aca1efd6a7c09e976ee768b0d5109173e521a19daf"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2305517e332a862ef75be8fad3606ea10108662bc6fe08509d5ca99503ac2aee"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42430ff511571940d51e75cf42f1e4dbdded477e71c1b7a17f4da76c1da8ea76"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3150078118f62371375e1e69b13b48288e44f6691c1069340081c3fd12c94d5b"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c15163b6125db87c8f53c98baa5e785782078fbd2dbeaa04c6141935eb6dab7a"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4d04acba75c72e6eb90745447d69f84e6c9056390f7a9724605ca9c56b4afcc6"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e7fd20d6576c10306dea2d6a5765f46f0ac5d6f53436217913e952d19237efc4"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:75c16b2a900b3536dfc7014905a128a2bea8fb01f9ee26d2d7d8db0a08e7cb2c"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6d88056a04860a98341a0cf53e950e3ac9f4e51d1b6f61a53b0609df342cc8b2"}, + {file = "yarl-1.8.2-cp311-cp311-win32.whl", hash = "sha256:fb742dcdd5eec9f26b61224c23baea46c9055cf16f62475e11b9b15dfd5c117b"}, + {file = "yarl-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8c46d3d89902c393a1d1e243ac847e0442d0196bbd81aecc94fcebbc2fd5857c"}, + {file = "yarl-1.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ceff9722e0df2e0a9e8a79c610842004fa54e5b309fe6d218e47cd52f791d7ef"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f6b4aca43b602ba0f1459de647af954769919c4714706be36af670a5f44c9c1"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1684a9bd9077e922300ecd48003ddae7a7474e0412bea38d4631443a91d61077"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebb78745273e51b9832ef90c0898501006670d6e059f2cdb0e999494eb1450c2"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3adeef150d528ded2a8e734ebf9ae2e658f4c49bf413f5f157a470e17a4a2e89"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a7c87927a468e5a1dc60c17caf9597161d66457a34273ab1760219953f7f4c"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:efff27bd8cbe1f9bd127e7894942ccc20c857aa8b5a0327874f30201e5ce83d0"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a783cd344113cb88c5ff7ca32f1f16532a6f2142185147822187913eb989f739"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:705227dccbe96ab02c7cb2c43e1228e2826e7ead880bb19ec94ef279e9555b5b"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:34c09b43bd538bf6c4b891ecce94b6fa4f1f10663a8d4ca589a079a5018f6ed7"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a48f4f7fea9a51098b02209d90297ac324241bf37ff6be6d2b0149ab2bd51b37"}, + {file = "yarl-1.8.2-cp37-cp37m-win32.whl", hash = "sha256:0414fd91ce0b763d4eadb4456795b307a71524dbacd015c657bb2a39db2eab89"}, + {file = "yarl-1.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:d881d152ae0007809c2c02e22aa534e702f12071e6b285e90945aa3c376463c5"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5df5e3d04101c1e5c3b1d69710b0574171cc02fddc4b23d1b2813e75f35a30b1"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7a66c506ec67eb3159eea5096acd05f5e788ceec7b96087d30c7d2865a243918"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2b4fa2606adf392051d990c3b3877d768771adc3faf2e117b9de7eb977741229"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e21fb44e1eff06dd6ef971d4bdc611807d6bd3691223d9c01a18cec3677939e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93202666046d9edadfe9f2e7bf5e0782ea0d497b6d63da322e541665d65a044e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc77086ce244453e074e445104f0ecb27530d6fd3a46698e33f6c38951d5a0f1"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dd68a92cab699a233641f5929a40f02a4ede8c009068ca8aa1fe87b8c20ae3"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b372aad2b5f81db66ee7ec085cbad72c4da660d994e8e590c997e9b01e44901"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e6f3515aafe0209dd17fb9bdd3b4e892963370b3de781f53e1746a521fb39fc0"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dfef7350ee369197106805e193d420b75467b6cceac646ea5ed3049fcc950a05"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:728be34f70a190566d20aa13dc1f01dc44b6aa74580e10a3fb159691bc76909d"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ff205b58dc2929191f68162633d5e10e8044398d7a45265f90a0f1d51f85f72c"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baf211dcad448a87a0d9047dc8282d7de59473ade7d7fdf22150b1d23859f946"}, + {file = "yarl-1.8.2-cp38-cp38-win32.whl", hash = "sha256:272b4f1599f1b621bf2aabe4e5b54f39a933971f4e7c9aa311d6d7dc06965165"}, + {file = "yarl-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:326dd1d3caf910cd26a26ccbfb84c03b608ba32499b5d6eeb09252c920bcbe4f"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f8ca8ad414c85bbc50f49c0a106f951613dfa5f948ab69c10ce9b128d368baf8"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:418857f837347e8aaef682679f41e36c24250097f9e2f315d39bae3a99a34cbf"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0eec05ab49e91a78700761777f284c2df119376e391db42c38ab46fd662b77"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:009a028127e0a1755c38b03244c0bea9d5565630db9c4cf9572496e947137a87"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3edac5d74bb3209c418805bda77f973117836e1de7c000e9755e572c1f7850d0"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da65c3f263729e47351261351b8679c6429151ef9649bba08ef2528ff2c423b2"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef8fb25e52663a1c85d608f6dd72e19bd390e2ecaf29c17fb08f730226e3a08"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcd7bb1e5c45274af9a1dd7494d3c52b2be5e6bd8d7e49c612705fd45420b12d"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44ceac0450e648de86da8e42674f9b7077d763ea80c8ceb9d1c3e41f0f0a9951"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:97209cc91189b48e7cfe777237c04af8e7cc51eb369004e061809bcdf4e55220"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:48dd18adcf98ea9cd721a25313aef49d70d413a999d7d89df44f469edfb38a06"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e59399dda559688461762800d7fb34d9e8a6a7444fd76ec33220a926c8be1516"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d617c241c8c3ad5c4e78a08429fa49e4b04bedfc507b34b4d8dceb83b4af3588"}, + {file = "yarl-1.8.2-cp39-cp39-win32.whl", hash = "sha256:cb6d48d80a41f68de41212f3dfd1a9d9898d7841c8f7ce6696cf2fd9cb57ef83"}, + {file = "yarl-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:6604711362f2dbf7160df21c416f81fac0de6dbcf0b5445a2ef25478ecc4c778"}, + {file = "yarl-1.8.2.tar.gz", hash = "sha256:49d43402c6e3013ad0978602bf6bf5328535c48d192304b91b97a3c6790b1562"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[metadata] +lock-version = "2.0" +python-versions = "^3.8" +content-hash = "260cae5e561fd5d7669de4e693a01759dbf92a8465e6d0b234e0ea03d11e9e46" diff --git a/pyproject.toml b/pyproject.toml index 43ab41a..443c9e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,3 @@ -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" - [tool.isort] multi_line_output = 3 profile = "black" @@ -10,3 +6,27 @@ profile = "black" minversion = "6.0" addopts = "-ra -q --doctest-modules -vv --cov=pile/ " testpaths = ["tests"] + +[tool.poetry] +name = "thepilev2" +version = "2.0" +description = "The second version of the LLM (Large Language Model) dataset for the python package of The Pile is a collection of text data that can be used to train and evaluate large language models. It includes a diverse set of text data sourced from various sources such as Wikipedia, books, and websites. The dataset is preprocessed and tokenized, making it ready for use in training models. The second version of the dataset is designed to be even larger than the first version, with more text data and a wider range of sources. The goal of this dataset is to provide a comprehensive and diverse set of text data that can be used to train models that can accurately understand and generate a wide range of natural language." +authors = ["Fabrizio (Mistobaan) Milo "] +license = "Apache 2.0" +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.8" +datasets = "^2.8.0" +cython = "^0.29.33" +typer = "^0.7.0" +datasketch = "^1.5.8" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.2.0" +pytest-coverage = "^0.0" +coverage = "^7.0.5" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api"