From 0794916a74402b91ac5ec6298e24570b112ae833 Mon Sep 17 00:00:00 2001
From: Michael Ekstrand <michaelekstrand@boisestate.edu>
Date: Fri, 22 Oct 2021 15:59:42 -0600
Subject: [PATCH 1/6] support for unresolved lockfiles

---
 tasks.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tasks.py b/tasks.py
index a88780261..33cc010a8 100644
--- a/tasks.py
+++ b/tasks.py
@@ -3,8 +3,10 @@
 """
 
 import sys
+from pathlib import Path
 from invoke import task
 from . import env
+import yaml
 
 __ALL__ = [
     'dev_lock',
@@ -39,6 +41,39 @@ def dev_lock(c, platform=None, extras=None, version=None, blas=None, env_file=Fa
     c.run(cmd)
 
 
+@task(iterable=['extras'])
+def env_file(c, platform=None, extras=None, version=None, blas=None, dev_deps=True,
+             output=None, name=None):
+    "Create an unresolved environment file"
+    from conda_lock.conda_lock import parse_source_files, aggregate_lock_specs
+
+    if not platform:
+        platform = env.conda_platform()
+
+    files = [Path('pyproject.toml')]
+    if version:
+        files.append(Path(f'lkbuild/python-{version}-spec.yml'))
+    if blas:
+        files.append(Path(f'lkbuild/{blas}-spec.yml'))
+
+    lock = parse_source_files(files, platform, dev_deps, extras)
+    lock = aggregate_lock_specs(lock)
+    env_spec = {
+        'channels': lock.channels,
+        'dependencies': lock.specs,
+    }
+    if name:
+        env_spec['name'] = name
+
+    if output:
+        print('writing environment to', output, file=sys.stderr)
+        out = Path(output)
+        with out.open('w') as f:
+            yaml.dump(env_spec, f)
+    else:
+        yaml.dump(env_spec, sys.stdout)
+
+
 @task
 def conda_platform(c, gh_output=False):
     plat = env.conda_platform()

From 2b2f14b2c3ab347f37140cb3d780966ceb9b006f Mon Sep 17 00:00:00 2001
From: Michael Ekstrand <michaelekstrand@boisestate.edu>
Date: Mon, 1 Nov 2021 11:51:22 -0600
Subject: [PATCH 2/6] Add BibTeX update task to lkbuild

---
 tasks.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tasks.py b/tasks.py
index 33cc010a8..4c37490c5 100644
--- a/tasks.py
+++ b/tasks.py
@@ -7,12 +7,16 @@
 from invoke import task
 from . import env
 import yaml
+import requests
 
 __ALL__ = [
     'dev_lock',
     'conda_platform'
 ]
 
+BIBTEX_URL = 'https://paperpile.com/eb/YdOlWmnlit'
+BIBTEX_FILE = Path('docs/lenskit.bib')
+
 
 @task(iterable=['extras'])
 def dev_lock(c, platform=None, extras=None, version=None, blas=None, env_file=False):
@@ -81,3 +85,11 @@ def conda_platform(c, gh_output=False):
         print('::set-output name=conda-platform::' + plat)
     else:
         print(plat)
+
+
+@task
+def update_bibtex(c):
+    "Update the BibTeX file"
+    res = requests.get(BIBTEX_URL)
+    print('updating file', BIBTEX_FILE)
+    BIBTEX_FILE.write_text(res.text, encoding='utf-8')

From e69810bfd690a917e538c039dbb16f0a229248b0 Mon Sep 17 00:00:00 2001
From: Michael Ekstrand <michaelekstrand@boisestate.edu>
Date: Wed, 15 Dec 2021 11:56:25 -0700
Subject: [PATCH 3/6] add data-fetching logic

---
 datasets.py | 33 +++++++++++++++++++++++++++++++++
 tasks.py    | 10 ++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 datasets.py

diff --git a/datasets.py b/datasets.py
new file mode 100644
index 000000000..4e7b40b22
--- /dev/null
+++ b/datasets.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+import requests
+from zipfile import ZipFile
+
+ML_LOC = "http://files.grouplens.org/datasets/movielens/"
+ML_DATASETS = {
+    'ml-100k': 'ml-100k/u.data',
+    'ml-1m': 'ml-1m/ratings.dat',
+    'ml-10m': 'ml-10M100K/ratings.dat',
+    'ml-20m': 'ml-20m/ratings.csv',
+    'ml-25m': 'ml-25m/ratings.csv',
+}
+
+
+def fetch_ml(dir: Path, ds: str):
+    zipname = f'{ds}.zip'
+    zipfile = dir / zipname
+    zipurl = ML_LOC + zipname
+
+    test_file = dir / ML_DATASETS[ds]
+    if test_file.exists():
+        print(test_file, 'already exists')
+        return
+
+    print('downloading data set', ds)
+    with zipfile.open('wb') as zf:
+        res = requests.get(zipurl, stream=True)
+        for block in res.iter_content(None):
+            zf.write(block)
+
+    print('unpacking data set')
+    with ZipFile(zipfile, 'r') as zf:
+        zf.extractall(dir)
diff --git a/tasks.py b/tasks.py
index 4c37490c5..63f3abd7f 100644
--- a/tasks.py
+++ b/tasks.py
@@ -14,6 +14,7 @@
     'conda_platform'
 ]
 
+DATA_DIR = Path('data')
 BIBTEX_URL = 'https://paperpile.com/eb/YdOlWmnlit'
 BIBTEX_FILE = Path('docs/lenskit.bib')
 
@@ -93,3 +94,12 @@ def update_bibtex(c):
     res = requests.get(BIBTEX_URL)
     print('updating file', BIBTEX_FILE)
     BIBTEX_FILE.write_text(res.text, encoding='utf-8')
+
+
+@task
+def fetch_data(c, data='ml-100k', data_dir=DATA_DIR):
+    "Fetch a data set."
+    from . import datasets
+
+    if data.startswith('ml-'):
+        datasets.fetch_ml(DATA_DIR, data)

From 84641a707ce84f9c39457a44448985d5c5339b89 Mon Sep 17 00:00:00 2001
From: Michael Ekstrand <md@ekstrandom.net>
Date: Sat, 22 Jan 2022 10:33:06 -0700
Subject: [PATCH 4/6] drop version from MKL dep

---
 mkl-spec.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mkl-spec.yml b/mkl-spec.yml
index d90293dea..0b52692fb 100644
--- a/mkl-spec.yml
+++ b/mkl-spec.yml
@@ -1,4 +1,4 @@
 # environment mini-spec for ensuring MKL
 dependencies:
   - libblas=*=*mkl
-  - mkl=2020
+  - tbb

From b124c816f4c462cf33be49671ec32910861365a7 Mon Sep 17 00:00:00 2001
From: Michael Ekstrand <md@ekstrandom.net>
Date: Sat, 22 Jan 2022 10:45:40 -0700
Subject: [PATCH 5/6] Add LK tools to build

---
 tasks.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/tasks.py b/tasks.py
index 626cdf352..d283066f6 100644
--- a/tasks.py
+++ b/tasks.py
@@ -3,14 +3,20 @@
 """
 
 import sys
+from pathlib import Path
 from invoke import task
 from . import env
+import yaml
+import requests
 
 __ALL__ = [
     'dev_lock',
     'conda_platform'
 ]
 
+DATA_DIR = Path('data')
+BIBTEX_URL = 'https://paperpile.com/eb/YdOlWmnlit'
+BIBTEX_FILE = Path('docs/lenskit.bib')
 
 @task(iterable=['extras', 'mixins'])
 def dev_lock(c, platform=None, extras=None, version=None, blas=None, mixins=None, env_file=False):
@@ -41,6 +47,39 @@ def dev_lock(c, platform=None, extras=None, version=None, blas=None, mixins=None
     c.run(cmd)
 
 
+@task(iterable=['extras'])
+def env_file(c, platform=None, extras=None, version=None, blas=None, dev_deps=True,
+             output=None, name=None):
+    "Create an unresolved environment file"
+    from conda_lock.conda_lock import parse_source_files, aggregate_lock_specs
+
+    if not platform:
+        platform = env.conda_platform()
+
+    files = [Path('pyproject.toml')]
+    if version:
+        files.append(Path(f'lkbuild/python-{version}-spec.yml'))
+    if blas:
+        files.append(Path(f'lkbuild/{blas}-spec.yml'))
+
+    lock = parse_source_files(files, platform, dev_deps, extras)
+    lock = aggregate_lock_specs(lock)
+    env_spec = {
+        'channels': lock.channels,
+        'dependencies': lock.specs,
+    }
+    if name:
+        env_spec['name'] = name
+
+    if output:
+        print('writing environment to', output, file=sys.stderr)
+        out = Path(output)
+        with out.open('w') as f:
+            yaml.dump(env_spec, f)
+    else:
+        yaml.dump(env_spec, sys.stdout)
+
+
 @task
 def conda_platform(c, gh_output=False):
     plat = env.conda_platform()
@@ -48,3 +87,20 @@ def conda_platform(c, gh_output=False):
         print('::set-output name=conda-platform::' + plat)
     else:
         print(plat)
+
+
+@task
+def update_bibtex(c):
+    "Update the BibTeX file"
+    res = requests.get(BIBTEX_URL)
+    print('updating file', BIBTEX_FILE)
+    BIBTEX_FILE.write_text(res.text, encoding='utf-8')
+
+
+@task
+def fetch_data(c, data='ml-100k', data_dir=DATA_DIR):
+    "Fetch a data set."
+    from . import datasets
+
+    if data.startswith('ml-'):
+        datasets.fetch_ml(DATA_DIR, data)

From e3d185cd5a78b5ebbf695ced1ad859c2640da32a Mon Sep 17 00:00:00 2001
From: Michael Ekstrand <md@ekstrandom.net>
Date: Sat, 22 Jan 2022 11:19:02 -0700
Subject: [PATCH 6/6] add datasets.py

---
 datasets.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 datasets.py

diff --git a/datasets.py b/datasets.py
new file mode 100644
index 000000000..4e7b40b22
--- /dev/null
+++ b/datasets.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+import requests
+from zipfile import ZipFile
+
+ML_LOC = "http://files.grouplens.org/datasets/movielens/"
+ML_DATASETS = {
+    'ml-100k': 'ml-100k/u.data',
+    'ml-1m': 'ml-1m/ratings.dat',
+    'ml-10m': 'ml-10M100K/ratings.dat',
+    'ml-20m': 'ml-20m/ratings.csv',
+    'ml-25m': 'ml-25m/ratings.csv',
+}
+
+
+def fetch_ml(dir: Path, ds: str):
+    zipname = f'{ds}.zip'
+    zipfile = dir / zipname
+    zipurl = ML_LOC + zipname
+
+    test_file = dir / ML_DATASETS[ds]
+    if test_file.exists():
+        print(test_file, 'already exists')
+        return
+
+    print('downloading data set', ds)
+    with zipfile.open('wb') as zf:
+        res = requests.get(zipurl, stream=True)
+        for block in res.iter_content(None):
+            zf.write(block)
+
+    print('unpacking data set')
+    with ZipFile(zipfile, 'r') as zf:
+        zf.extractall(dir)