diff --git a/relbench/datasets/__init__.py b/relbench/datasets/__init__.py index 06154cf7..6c9011fe 100644 --- a/relbench/datasets/__init__.py +++ b/relbench/datasets/__init__.py @@ -74,7 +74,14 @@ def get_dataset(name: str, download=False) -> Dataset: Returns: Dataset: The dataset object. - If `download` is True, the dataset will be downloaded into the cache. + If `download` is True, the database comprising the dataset will be + downloaded into the cache from the RelBench server. If you use + `download=False` the first time, the database will be processed from the + raw files of the original source. + + Once the database is cached, either because of download or processing from + raw files, the cache will be used. `download=True` will verify that the + cached database matches the RelBench version even in this case. """ if download: diff --git a/relbench/tasks/__init__.py b/relbench/tasks/__init__.py index 1c94167a..3e51a4cf 100644 --- a/relbench/tasks/__init__.py +++ b/relbench/tasks/__init__.py @@ -79,7 +79,14 @@ def get_task(dataset_name: str, task_name: str, download=False) -> BaseTask: Returns: BaseTask: The task object. - If `download` is True, the task will be downloaded into the cache. + If `download` is True, the task tables (train, val, test) comprising the + task will be downloaded into the cache from the RelBench server. If you use + `download=False` the first time, the task tables will be computed from + scratch using the database. + + Once the task tables are cached, either because of download or computing from + scratch, the cache will be used. `download=True` will verify that the + cached task tables matches the RelBench version even in this case. """ if download: