-
-
Notifications
You must be signed in to change notification settings - Fork 120
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #481 from WenjieDu/dev
Fix missing load_specific_dataset(), update testing_daily workflow, release v0.7.1
- Loading branch information
Showing
7 changed files
with
99 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,7 +48,7 @@ jobs: | |
- name: Build package | ||
run: | | ||
python -m build --no-isolation | ||
python -m build | ||
- name: Publish the new package to PyPI | ||
uses: pypa/[email protected] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
""" | ||
Functions to load supported open-source time-series datasets. | ||
""" | ||
|
||
# Created by Wenjie Du <[email protected]> | ||
# License: BSD-3-Clause | ||
|
||
|
||
from benchpots.datasets import preprocess_physionet2012 | ||
|
||
from ..utils.logging import logger | ||
|
||
# currently supported datasets | ||
SUPPORTED_DATASETS = [ | ||
"physionet_2012", | ||
] | ||
|
||
# preprocessing functions of the supported datasets | ||
PREPROCESSING_FUNC = { | ||
"physionet_2012": preprocess_physionet2012, | ||
} | ||
|
||
|
||
def list_supported_datasets() -> list: | ||
"""Return the datasets natively supported by PyPOTS so far. | ||
Returns | ||
------- | ||
SUPPORTED_DATASETS : | ||
A list including all supported datasets. | ||
""" | ||
return SUPPORTED_DATASETS | ||
|
||
|
||
def load_specific_dataset(dataset_name: str, use_cache: bool = True) -> dict: | ||
"""Load specific datasets supported by PyPOTS. | ||
Different from tsdb.load_dataset(), which only produces merely raw data, | ||
load_specific_dataset here does some preprocessing operations, | ||
like truncating time series to generate samples with the same length. | ||
Parameters | ||
---------- | ||
dataset_name : | ||
The name of the dataset to be loaded, which should be supported, i.e. in SUPPORTED_DATASETS. | ||
use_cache : | ||
Whether to use cache. This is an argument of tsdb.load_dataset(). | ||
Returns | ||
------- | ||
data : | ||
A dict contains the preprocessed dataset. | ||
Users only need to continue the preprocessing steps to generate the data they want, | ||
e.g. standardizing and splitting. | ||
""" | ||
logger.info( | ||
f"Loading the dataset {dataset_name} with TSDB (https://github.com/WenjieDu/Time_Series_Data_Beans)..." | ||
) | ||
assert dataset_name in SUPPORTED_DATASETS, ( | ||
f"Dataset {dataset_name} is not supported. " | ||
f"If you believe this dataset is valuable to be supported by PyPOTS," | ||
f"please create an issue on GitHub " | ||
f"https://github.com/WenjieDu/PyPOTS/issues" | ||
) | ||
logger.info(f"Starting preprocessing {dataset_name}...") | ||
data = PREPROCESSING_FUNC[dataset_name]("all", 0.1) | ||
logger.warning( | ||
"⚠️ load_specific_dataset() will be deprecated in the near future. Data preprocessing functions " | ||
"are moved to BenchPOTS, which now supports processing 170+ public time-series datasets." | ||
) | ||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.