From 0794916a74402b91ac5ec6298e24570b112ae833 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Fri, 22 Oct 2021 15:59:42 -0600 Subject: [PATCH 1/6] support for unresolved lockfiles --- tasks.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tasks.py b/tasks.py index a88780261..33cc010a8 100644 --- a/tasks.py +++ b/tasks.py @@ -3,8 +3,10 @@ """ import sys +from pathlib import Path from invoke import task from . import env +import yaml __ALL__ = [ 'dev_lock', @@ -39,6 +41,39 @@ def dev_lock(c, platform=None, extras=None, version=None, blas=None, env_file=Fa c.run(cmd) +@task(iterable=['extras']) +def env_file(c, platform=None, extras=None, version=None, blas=None, dev_deps=True, + output=None, name=None): + "Create an unresolved environment file" + from conda_lock.conda_lock import parse_source_files, aggregate_lock_specs + + if not platform: + platform = env.conda_platform() + + files = [Path('pyproject.toml')] + if version: + files.append(Path(f'lkbuild/python-{version}-spec.yml')) + if blas: + files.append(Path(f'lkbuild/{blas}-spec.yml')) + + lock = parse_source_files(files, platform, dev_deps, extras) + lock = aggregate_lock_specs(lock) + env_spec = { + 'channels': lock.channels, + 'dependencies': lock.specs, + } + if name: + env_spec['name'] = name + + if output: + print('writing environment to', output, file=sys.stderr) + out = Path(output) + with out.open('w') as f: + yaml.dump(env_spec, f) + else: + yaml.dump(env_spec, sys.stdout) + + @task def conda_platform(c, gh_output=False): plat = env.conda_platform() From 2b2f14b2c3ab347f37140cb3d780966ceb9b006f Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Mon, 1 Nov 2021 11:51:22 -0600 Subject: [PATCH 2/6] Add BibTeX update task to lkbuild --- tasks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tasks.py b/tasks.py index 33cc010a8..4c37490c5 100644 --- a/tasks.py +++ b/tasks.py @@ -7,12 +7,16 @@ from invoke import task from . import env import yaml +import requests __ALL__ = [ 'dev_lock', 'conda_platform' ] +BIBTEX_URL = 'https://paperpile.com/eb/YdOlWmnlit' +BIBTEX_FILE = Path('docs/lenskit.bib') + @task(iterable=['extras']) def dev_lock(c, platform=None, extras=None, version=None, blas=None, env_file=False): @@ -81,3 +85,11 @@ def conda_platform(c, gh_output=False): print('::set-output name=conda-platform::' + plat) else: print(plat) + + +@task +def update_bibtex(c): + "Update the BibTeX file" + res = requests.get(BIBTEX_URL) + print('updating file', BIBTEX_FILE) + BIBTEX_FILE.write_text(res.text, encoding='utf-8') From e69810bfd690a917e538c039dbb16f0a229248b0 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Wed, 15 Dec 2021 11:56:25 -0700 Subject: [PATCH 3/6] add data-fetching logic --- datasets.py | 33 +++++++++++++++++++++++++++++++++ tasks.py | 10 ++++++++++ 2 files changed, 43 insertions(+) create mode 100644 datasets.py diff --git a/datasets.py b/datasets.py new file mode 100644 index 000000000..4e7b40b22 --- /dev/null +++ b/datasets.py @@ -0,0 +1,33 @@ +from pathlib import Path +import requests +from zipfile import ZipFile + +ML_LOC = "http://files.grouplens.org/datasets/movielens/" +ML_DATASETS = { + 'ml-100k': 'ml-100k/u.data', + 'ml-1m': 'ml-1m/ratings.dat', + 'ml-10m': 'ml-10M100K/ratings.dat', + 'ml-20m': 'ml-20m/ratings.csv', + 'ml-25m': 'ml-25m/ratings.csv', +} + + +def fetch_ml(dir: Path, ds: str): + zipname = f'{ds}.zip' + zipfile = dir / zipname + zipurl = ML_LOC + zipname + + test_file = dir / ML_DATASETS[ds] + if test_file.exists(): + print(test_file, 'already exists') + return + + print('downloading data set', ds) + with zipfile.open('wb') as zf: + res = requests.get(zipurl, stream=True) + for block in res.iter_content(None): + zf.write(block) + + print('unpacking data set') + with ZipFile(zipfile, 'r') as zf: + zf.extractall(dir) diff --git a/tasks.py b/tasks.py index 4c37490c5..63f3abd7f 100644 --- a/tasks.py +++ b/tasks.py @@ -14,6 +14,7 @@ 'conda_platform' ] +DATA_DIR = Path('data') BIBTEX_URL = 'https://paperpile.com/eb/YdOlWmnlit' BIBTEX_FILE = Path('docs/lenskit.bib') @@ -93,3 +94,12 @@ def update_bibtex(c): res = requests.get(BIBTEX_URL) print('updating file', BIBTEX_FILE) BIBTEX_FILE.write_text(res.text, encoding='utf-8') + + +@task +def fetch_data(c, data='ml-100k', data_dir=DATA_DIR): + "Fetch a data set." + from . import datasets + + if data.startswith('ml-'): + datasets.fetch_ml(DATA_DIR, data) From 84641a707ce84f9c39457a44448985d5c5339b89 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 22 Jan 2022 10:33:06 -0700 Subject: [PATCH 4/6] drop version from MKL dep --- mkl-spec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkl-spec.yml b/mkl-spec.yml index d90293dea..0b52692fb 100644 --- a/mkl-spec.yml +++ b/mkl-spec.yml @@ -1,4 +1,4 @@ # environment mini-spec for ensuring MKL dependencies: - libblas=*=*mkl - - mkl=2020 + - tbb From b124c816f4c462cf33be49671ec32910861365a7 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 22 Jan 2022 10:45:40 -0700 Subject: [PATCH 5/6] Add LK tools to build --- tasks.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tasks.py b/tasks.py index 626cdf352..d283066f6 100644 --- a/tasks.py +++ b/tasks.py @@ -3,14 +3,20 @@ """ import sys +from pathlib import Path from invoke import task from . import env +import yaml +import requests __ALL__ = [ 'dev_lock', 'conda_platform' ] +DATA_DIR = Path('data') +BIBTEX_URL = 'https://paperpile.com/eb/YdOlWmnlit' +BIBTEX_FILE = Path('docs/lenskit.bib') @task(iterable=['extras', 'mixins']) def dev_lock(c, platform=None, extras=None, version=None, blas=None, mixins=None, env_file=False): @@ -41,6 +47,39 @@ def dev_lock(c, platform=None, extras=None, version=None, blas=None, mixins=None c.run(cmd) +@task(iterable=['extras']) +def env_file(c, platform=None, extras=None, version=None, blas=None, dev_deps=True, + output=None, name=None): + "Create an unresolved environment file" + from conda_lock.conda_lock import parse_source_files, aggregate_lock_specs + + if not platform: + platform = env.conda_platform() + + files = [Path('pyproject.toml')] + if version: + files.append(Path(f'lkbuild/python-{version}-spec.yml')) + if blas: + files.append(Path(f'lkbuild/{blas}-spec.yml')) + + lock = parse_source_files(files, platform, dev_deps, extras) + lock = aggregate_lock_specs(lock) + env_spec = { + 'channels': lock.channels, + 'dependencies': lock.specs, + } + if name: + env_spec['name'] = name + + if output: + print('writing environment to', output, file=sys.stderr) + out = Path(output) + with out.open('w') as f: + yaml.dump(env_spec, f) + else: + yaml.dump(env_spec, sys.stdout) + + @task def conda_platform(c, gh_output=False): plat = env.conda_platform() @@ -48,3 +87,20 @@ def conda_platform(c, gh_output=False): print('::set-output name=conda-platform::' + plat) else: print(plat) + + +@task +def update_bibtex(c): + "Update the BibTeX file" + res = requests.get(BIBTEX_URL) + print('updating file', BIBTEX_FILE) + BIBTEX_FILE.write_text(res.text, encoding='utf-8') + + +@task +def fetch_data(c, data='ml-100k', data_dir=DATA_DIR): + "Fetch a data set." + from . import datasets + + if data.startswith('ml-'): + datasets.fetch_ml(DATA_DIR, data) From e3d185cd5a78b5ebbf695ced1ad859c2640da32a Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 22 Jan 2022 11:19:02 -0700 Subject: [PATCH 6/6] add datasets.py --- datasets.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 datasets.py diff --git a/datasets.py b/datasets.py new file mode 100644 index 000000000..4e7b40b22 --- /dev/null +++ b/datasets.py @@ -0,0 +1,33 @@ +from pathlib import Path +import requests +from zipfile import ZipFile + +ML_LOC = "http://files.grouplens.org/datasets/movielens/" +ML_DATASETS = { + 'ml-100k': 'ml-100k/u.data', + 'ml-1m': 'ml-1m/ratings.dat', + 'ml-10m': 'ml-10M100K/ratings.dat', + 'ml-20m': 'ml-20m/ratings.csv', + 'ml-25m': 'ml-25m/ratings.csv', +} + + +def fetch_ml(dir: Path, ds: str): + zipname = f'{ds}.zip' + zipfile = dir / zipname + zipurl = ML_LOC + zipname + + test_file = dir / ML_DATASETS[ds] + if test_file.exists(): + print(test_file, 'already exists') + return + + print('downloading data set', ds) + with zipfile.open('wb') as zf: + res = requests.get(zipurl, stream=True) + for block in res.iter_content(None): + zf.write(block) + + print('unpacking data set') + with ZipFile(zipfile, 'r') as zf: + zf.extractall(dir)