diff --git a/baselines/BigST/PEMS08.py b/baselines/BigST/PEMS08.py new file mode 100644 index 00000000..9c9a4b2e --- /dev/null +++ b/baselines/BigST/PEMS08.py @@ -0,0 +1,182 @@ +import os +import sys +import torch +from easydict import EasyDict +sys.path.append(os.path.abspath(__file__ + '/../../..')) + +from basicts.metrics import masked_mae, masked_mape, masked_rmse +from basicts.data import TimeSeriesForecastingDataset +from basicts.runners import SimpleTimeSeriesForecastingRunner +from basicts.scaler import ZScoreScaler +from basicts.utils import get_regular_settings, load_adj + +from .arch import BigST +# from .runner import BigSTPreprocessRunner +from .loss import bigst_loss + +import pdb + +############################## Hot Parameters ############################## +# Dataset & Metrics configuration +DATA_NAME = 'PEMS08' # Dataset name +regular_settings = get_regular_settings(DATA_NAME) +INPUT_LEN = 2016 # regular_settings['INPUT_LEN'] # Length of input sequence +OUTPUT_LEN = 12 # regular_settings['OUTPUT_LEN'] # Length of output sequence +TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios +NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data +RESCALE = regular_settings['RESCALE'] # Whether to rescale the data +NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data +# Model architecture and parameters +PREPROCESSED_FILE = "checkpoints\\BigSTPreprocess\\PEMS08_100_2016_12\\db8308a2c87de35e5f3db6177c5714ff\\BigSTPreprocess_best_val_MAE.pt" +MODEL_ARCH = BigST + +adj_mx, _ = load_adj("datasets/" + DATA_NAME + + "/adj_mx.pkl", "doubletransition") +MODEL_PARAM = { + "bigst_args":{ + "num_nodes": 170, + "seq_num": 12, + "in_dim": 3, + "out_dim": OUTPUT_LEN, # 源代码固定成12了 + "hid_dim": 32, + "tau" : 0.25, + "random_feature_dim": 64, + "node_emb_dim": 32, + "time_emb_dim": 32, + "use_residual": True, + "use_bn": True, + "use_long": True, + "use_spatial": True, + "dropout": 0.3, + "supports": [torch.tensor(i) for i in adj_mx], + "time_of_day_size": 288, + "day_of_week_size": 7 + }, + "preprocess_path": PREPROCESSED_FILE, + "preprocess_args":{ + "num_nodes": 170, + "in_dim": 3, + "dropout": 0.3, + "input_length": 2016, + "output_length": 12, + "nhid": 32, + "tiny_batch_size": 64, + } + + +} + +NUM_EPOCHS = 100 + +############################## General Configuration ############################## +CFG = EasyDict() +# General settings +CFG.DESCRIPTION = 'An Example Config' +CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode) +# Runner +CFG.RUNNER = SimpleTimeSeriesForecastingRunner + +############################## Environment Configuration ############################## + +CFG.ENV = EasyDict() # Environment settings. Default: None +CFG.ENV.SEED = 0 # Random seed. Default: None + +############################## Dataset Configuration ############################## +CFG.DATASET = EasyDict() +# Dataset settings +CFG.DATASET.NAME = DATA_NAME +CFG.DATASET.TYPE = TimeSeriesForecastingDataset +CFG.DATASET.PARAM = EasyDict({ + 'dataset_name': DATA_NAME, + 'train_val_test_ratio': TRAIN_VAL_TEST_RATIO, + 'input_len': INPUT_LEN, + 'output_len': OUTPUT_LEN, + # 'mode' is automatically set by the runner +}) + +############################## Scaler Configuration ############################## +CFG.SCALER = EasyDict() +# Scaler settings +CFG.SCALER.TYPE = ZScoreScaler # Scaler class +CFG.SCALER.PARAM = EasyDict({ + 'dataset_name': DATA_NAME, + 'train_ratio': TRAIN_VAL_TEST_RATIO[0], + 'norm_each_channel': NORM_EACH_CHANNEL, + 'rescale': RESCALE, +}) + +############################## Model Configuration ############################## +CFG.MODEL = EasyDict() +# Model settings +CFG.MODEL.NAME = MODEL_ARCH.__name__ +CFG.MODEL.ARCH = MODEL_ARCH +CFG.MODEL.PARAM = MODEL_PARAM +CFG.MODEL.FORWARD_FEATURES = [0, 1, 2] +CFG.MODEL.TARGET_FEATURES = [0] + +############################## Metrics Configuration ############################## + +CFG.METRICS = EasyDict() +# Metrics settings +CFG.METRICS.FUNCS = EasyDict({ + 'MAE': masked_mae, + 'MAPE': masked_mape, + 'RMSE': masked_rmse, + }) +CFG.METRICS.TARGET = 'MAE' +CFG.METRICS.NULL_VAL = NULL_VAL + +############################## Training Configuration ############################## +CFG.TRAIN = EasyDict() +CFG.TRAIN.NUM_EPOCHS = NUM_EPOCHS +CFG.TRAIN.CKPT_SAVE_DIR = os.path.join( + 'checkpoints', + MODEL_ARCH.__name__, + '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)]) +) + + +CFG.TRAIN.LOSS = bigst_loss if MODEL_PARAM['bigst_args']['use_spatial'] else masked_mae +# Optimizer settings +CFG.TRAIN.OPTIM = EasyDict() +CFG.TRAIN.OPTIM.TYPE = "AdamW" +CFG.TRAIN.OPTIM.PARAM = { + "lr": 0.002, + "weight_decay": 0.0001, +} +# Learning rate scheduler settings +CFG.TRAIN.LR_SCHEDULER = EasyDict() +CFG.TRAIN.LR_SCHEDULER.TYPE = "MultiStepLR" +CFG.TRAIN.LR_SCHEDULER.PARAM = { + "milestones": [1, 50], + "gamma": 0.5 +} +# Train data loader settings +CFG.TRAIN.DATA = EasyDict() +CFG.TRAIN.DATA.BATCH_SIZE = 64 +CFG.TRAIN.DATA.SHUFFLE = True +# Gradient clipping settings +CFG.TRAIN.CLIP_GRAD_PARAM = { + "max_norm": 5.0 +} + +############################## Validation Configuration ############################## +CFG.VAL = EasyDict() +CFG.VAL.INTERVAL = 1 +CFG.VAL.DATA = EasyDict() +CFG.VAL.DATA.BATCH_SIZE = 64 + +############################## Test Configuration ############################## +CFG.TEST = EasyDict() +CFG.TEST.INTERVAL = 1 +CFG.TEST.DATA = EasyDict() +CFG.TEST.DATA.BATCH_SIZE = 64 + +############################## Evaluation Configuration ############################## +CFG.EVAL = EasyDict() + +# Evaluation parameters +CFG.EVAL.HORIZONS = [3, 6, 12] # Prediction horizons for evaluation. Default: [] +CFG.EVAL.USE_GPU = True # Whether to use GPU for evaluation. Default: True + + diff --git a/baselines/BigST/PEMS04.py b/baselines/BigST/PreprocessPEMS08.py similarity index 84% rename from baselines/BigST/PEMS04.py rename to baselines/BigST/PreprocessPEMS08.py index 6dcc698a..39d7f4b9 100644 --- a/baselines/BigST/PEMS04.py +++ b/baselines/BigST/PreprocessPEMS08.py @@ -10,41 +10,32 @@ from basicts.scaler import ZScoreScaler from basicts.utils import get_regular_settings, load_adj -from .arch import BigST -from .loss import bigst_loss +from .arch import BigSTPreprocess +from .runner import BigSTPreprocessRunner ############################## Hot Parameters ############################## # Dataset & Metrics configuration -DATA_NAME = 'PEMS04' # Dataset name +DATA_NAME = 'PEMS08' # Dataset name regular_settings = get_regular_settings(DATA_NAME) -INPUT_LEN = regular_settings['INPUT_LEN'] # Length of input sequence -OUTPUT_LEN = regular_settings['OUTPUT_LEN'] # Length of output sequence +INPUT_LEN = 2016 +OUTPUT_LEN = 12 TRAIN_VAL_TEST_RATIO = regular_settings['TRAIN_VAL_TEST_RATIO'] # Train/Validation/Test split ratios NORM_EACH_CHANNEL = regular_settings['NORM_EACH_CHANNEL'] # Whether to normalize each channel of the data RESCALE = regular_settings['RESCALE'] # Whether to rescale the data NULL_VAL = regular_settings['NULL_VAL'] # Null value in the data # Model architecture and parameters -MODEL_ARCH = BigST +MODEL_ARCH = BigSTPreprocess adj_mx, _ = load_adj("datasets/" + DATA_NAME + "/adj_mx.pkl", "doubletransition") MODEL_PARAM = { - "num_nodes": 307, - "seq_num": INPUT_LEN, + "num_nodes": 170, "in_dim": 3, - "out_dim": OUTPUT_LEN, - "hid_dim": 32, - "tau" : 0.25, - "random_feature_dim": 64, - "node_emb_dim": 32, - "time_emb_dim": 32, - "use_residual": True, - "use_bn": True, - "use_spatial": True, - "use_long": False, "dropout": 0.3, - "supports": [torch.tensor(i) for i in adj_mx], - "time_of_day_size": 288, - "day_of_week_size": 7, + "input_length": INPUT_LEN, + "output_length": OUTPUT_LEN, + "nhid": 32, + "tiny_batch_size": 64, + } NUM_EPOCHS = 100 @@ -55,7 +46,7 @@ CFG.DESCRIPTION = 'An Example Config' CFG.GPU_NUM = 1 # Number of GPUs to use (0 for CPU mode) # Runner -CFG.RUNNER = SimpleTimeSeriesForecastingRunner +CFG.RUNNER = BigSTPreprocessRunner ############################## Environment Configuration ############################## @@ -115,7 +106,7 @@ MODEL_ARCH.__name__, '_'.join([DATA_NAME, str(CFG.TRAIN.NUM_EPOCHS), str(INPUT_LEN), str(OUTPUT_LEN)]) ) -CFG.TRAIN.LOSS = bigst_loss +CFG.TRAIN.LOSS = masked_mae # Optimizer settings CFG.TRAIN.OPTIM = EasyDict() CFG.TRAIN.OPTIM.TYPE = "AdamW" @@ -132,7 +123,7 @@ } # Train data loader settings CFG.TRAIN.DATA = EasyDict() -CFG.TRAIN.DATA.BATCH_SIZE = 64 +CFG.TRAIN.DATA.BATCH_SIZE = 1 CFG.TRAIN.DATA.SHUFFLE = True # Gradient clipping settings CFG.TRAIN.CLIP_GRAD_PARAM = { @@ -143,13 +134,13 @@ CFG.VAL = EasyDict() CFG.VAL.INTERVAL = 1 CFG.VAL.DATA = EasyDict() -CFG.VAL.DATA.BATCH_SIZE = 64 +CFG.VAL.DATA.BATCH_SIZE = 1 ############################## Test Configuration ############################## CFG.TEST = EasyDict() CFG.TEST.INTERVAL = 1 CFG.TEST.DATA = EasyDict() -CFG.TEST.DATA.BATCH_SIZE = 64 +CFG.TEST.DATA.BATCH_SIZE = 1 ############################## Evaluation Configuration ############################## diff --git a/baselines/BigST/arch/__init__.py b/baselines/BigST/arch/__init__.py index 7cb17069..e2d419fd 100644 --- a/baselines/BigST/arch/__init__.py +++ b/baselines/BigST/arch/__init__.py @@ -1,3 +1,5 @@ from .bigst_arch import BigST +from .preprocess import BigSTPreprocess -__all__ = ["BigST"] + +__all__ = ["BigST", "BigSTPreprocess"] diff --git a/baselines/BigST/arch/bigst_arch.py b/baselines/BigST/arch/bigst_arch.py index 5e8c6034..dd3d0342 100644 --- a/baselines/BigST/arch/bigst_arch.py +++ b/baselines/BigST/arch/bigst_arch.py @@ -1,3 +1,4 @@ +import os import math import torch import torch.nn as nn @@ -5,6 +6,19 @@ from .linear_conv import * from torch.autograd import Variable import pdb +from .preprocess import BigSTPreprocess +from .model import Model + +def sample_period(x, time_num): + # trainx (B, N, T, F) + history_length = x.shape[-2] + idx_list = [i for i in range(history_length)] + period_list = [idx_list[i:i+12] for i in range(0, history_length, time_num)] + period_feat = [x[:,:,sublist,0] for sublist in period_list] + period_feat = torch.stack(period_feat) + period_feat = torch.mean(period_feat, dim=0) + + return period_feat class BigST(nn.Module): """ @@ -14,126 +28,51 @@ class BigST(nn.Module): Venue: VLDB 2024 Task: Spatial-Temporal Forecasting """ - def __init__(self, seq_num, in_dim, out_dim, hid_dim, num_nodes, tau, random_feature_dim, node_emb_dim, time_emb_dim, \ - use_residual, use_bn, use_spatial, use_long, dropout, time_of_day_size, day_of_week_size, supports=None, edge_indices=None): + + def __init__(self, bigst_args, preprocess_path, preprocess_args): super(BigST, self).__init__() - self.tau = tau - self.layer_num = 3 - self.in_dim = in_dim - self.random_feature_dim = random_feature_dim - - self.use_residual = use_residual - self.use_bn = use_bn - self.use_spatial = use_spatial - self.use_long = use_long - - self.dropout = dropout - self.activation = nn.ReLU() - self.supports = supports - - self.time_num = time_of_day_size - self.week_num = day_of_week_size - - # node embedding layer - self.node_emb_layer = nn.Parameter(torch.empty(num_nodes, node_emb_dim)) - nn.init.xavier_uniform_(self.node_emb_layer) - - # time embedding layer - self.time_emb_layer = nn.Parameter(torch.empty(self.time_num, time_emb_dim)) - nn.init.xavier_uniform_(self.time_emb_layer) - self.week_emb_layer = nn.Parameter(torch.empty(self.week_num, time_emb_dim)) - nn.init.xavier_uniform_(self.week_emb_layer) - # embedding layer - self.input_emb_layer = nn.Conv2d(seq_num*in_dim, hid_dim, kernel_size=(1, 1), bias=True) - - self.W_1 = nn.Conv2d(node_emb_dim+time_emb_dim*2, hid_dim, kernel_size=(1, 1), bias=True) - self.W_2 = nn.Conv2d(node_emb_dim+time_emb_dim*2, hid_dim, kernel_size=(1, 1), bias=True) - - self.linear_conv = nn.ModuleList() - self.bn = nn.ModuleList() - - self.supports_len = 0 - if supports is not None: - self.supports_len += len(supports) - - for i in range(self.layer_num): - self.linear_conv.append(linearized_conv(hid_dim*4, hid_dim*4, self.dropout, self.tau, self.random_feature_dim)) - self.bn.append(nn.LayerNorm(hid_dim*4)) - + self.use_long = bigst_args['use_long'] + self.in_dim = bigst_args['in_dim'] + self.out_dim = bigst_args['out_dim'] + self.time_num = bigst_args['time_of_day_size'] + self.bigst = Model(**bigst_args) + if self.use_long: - self.regression_layer = nn.Conv2d(hid_dim*4*2+hid_dim+seq_num, out_dim, kernel_size=(1, 1), bias=True) - else: - self.regression_layer = nn.Conv2d(hid_dim*4*2, out_dim, kernel_size=(1, 1), bias=True) + self.feat_extractor = BigSTPreprocess(**preprocess_args) + self.load_pre_trained_model(preprocess_path) + + def load_pre_trained_model(self, preprocess_path): + """Load pre-trained model""" - # def forward(self, x, feat=None): - def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor: - x = history_data[:, :, :, range(self.in_dim)] # (batch_size, in_len, data_dim) - x = x.transpose(1,2) - # input: (B, N, T, D) - B, N, T, D = x.size() - - time_emb = self.time_emb_layer[(x[:, :, -1, 1]*self.time_num).type(torch.LongTensor)] - week_emb = self.week_emb_layer[(x[:, :, -1, 2]).type(torch.LongTensor)] - - # input embedding - x = x.contiguous().view(B, N, -1).transpose(1, 2).unsqueeze(-1) # (B, D*T, N, 1) - input_emb = self.input_emb_layer(x) + # load parameters + checkpoint_dict = torch.load(preprocess_path) + self.feat_extractor.load_state_dict(checkpoint_dict["model_state_dict"]) + # freeze parameters + for param in self.feat_extractor.parameters(): + param.requires_grad = False - # node embeddings - node_emb = self.node_emb_layer.unsqueeze(0).expand(B, -1, -1).transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) + self.feat_extractor.eval() - # time embeddings - time_emb = time_emb.transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) - week_emb = week_emb.transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) - - x_g = torch.cat([node_emb, time_emb, week_emb], dim=1) # (B, dim*4, N, 1) - x = torch.cat([input_emb, node_emb, time_emb, week_emb], dim=1) # (B, dim*4, N, 1) - # linearized spatial convolution - x_pool = [x] # (B, dim*4, N, 1) - node_vec1 = self.W_1(x_g) # (B, dim, N, 1) - node_vec2 = self.W_2(x_g) # (B, dim, N, 1) - node_vec1 = node_vec1.permute(0, 2, 3, 1) # (B, N, 1, dim) - node_vec2 = node_vec2.permute(0, 2, 3, 1) # (B, N, 1, dim) - for i in range(self.layer_num): - if self.use_residual: - residual = x - x, node_vec1_prime, node_vec2_prime = self.linear_conv[i](x, node_vec1, node_vec2) - - if self.use_residual: - x = x+residual - - if self.use_bn: - x = x.permute(0, 2, 3, 1) # (B, N, 1, dim*4) - x = self.bn[i](x) - x = x.permute(0, 3, 1, 2) + def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor: + history_data = history_data.transpose(1,2) # (B, N, T, D) + x = history_data[:, :, -self.out_dim:] # (batch_size, in_len, data_dim) - x_pool.append(x) - x = torch.cat(x_pool, dim=1) # (B, dim*4, N, 1) - - x = self.activation(x) # (B, dim*4, N, 1) - if self.use_long: - feat = feat.permute(0, 2, 1).unsqueeze(-1) # (B, F, N, 1) - x = torch.cat([x, feat], dim=1) - x = self.regression_layer(x) # (B, N, T) - x = x.squeeze(-1).permute(0, 2, 1) - else: - x = self.regression_layer(x) # (B, N, T) - x = x.squeeze(-1).permute(0, 2, 1) - - # if self.use_spatial: + feat = [] + for i in range(history_data.shape[0]): + with torch.no_grad(): + feat_sample = self.feat_extractor(history_data[[i],:,:,:], future_data, batch_seen, epoch, train) + feat.append(feat_sample['feat']) - # supports = [support.to(x.device) for support in self.supports] - # edge_indices = torch.nonzero(supports[0] > 0) + feat = torch.cat(feat, dim=0) + feat_period = sample_period(history_data, self.time_num) + feat = torch.cat([feat, feat_period], dim=2) + + return self.bigst(x, feat) + + else: + return self.bigst(x) - # # s_loss = spatial_loss(node_vec1_prime, node_vec2_prime, supports, edge_indices) - # return x.transpose(1,2).unsqueeze(-1), s_loss - # else: - # return x.transpose(1,2).unsqueeze(-1), 0 - return {"prediction": x.transpose(1,2).unsqueeze(-1) - , "node_vec1": node_vec1_prime - , "node_vec2": node_vec2_prime - , "supports": self.supports - , 'use_spatial': self.use_spatial} \ No newline at end of file + \ No newline at end of file diff --git a/baselines/BigST/arch/model.py b/baselines/BigST/arch/model.py new file mode 100644 index 00000000..31063da6 --- /dev/null +++ b/baselines/BigST/arch/model.py @@ -0,0 +1,122 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +from .linear_conv import * +from torch.autograd import Variable +import pdb + +class Model(nn.Module): + def __init__(self, seq_num, in_dim, out_dim, hid_dim, num_nodes, tau, random_feature_dim, node_emb_dim, time_emb_dim, \ + use_residual, use_bn, use_spatial, use_long, dropout, time_of_day_size, day_of_week_size, supports=None, edge_indices=None): + super(Model, self).__init__() + + self.tau = tau + self.layer_num = 3 + self.in_dim = in_dim + self.random_feature_dim = random_feature_dim + + self.use_residual = use_residual + self.use_bn = use_bn + self.use_spatial = use_spatial + self.use_long = use_long + + self.dropout = dropout + self.activation = nn.ReLU() + self.supports = supports + + self.time_num = time_of_day_size + self.week_num = day_of_week_size + + # node embedding layer + self.node_emb_layer = nn.Parameter(torch.empty(num_nodes, node_emb_dim)) + nn.init.xavier_uniform_(self.node_emb_layer) + + # time embedding layer + self.time_emb_layer = nn.Parameter(torch.empty(self.time_num, time_emb_dim)) + nn.init.xavier_uniform_(self.time_emb_layer) + self.week_emb_layer = nn.Parameter(torch.empty(self.week_num, time_emb_dim)) + nn.init.xavier_uniform_(self.week_emb_layer) + + # embedding layer + self.input_emb_layer = nn.Conv2d(seq_num*in_dim, hid_dim, kernel_size=(1, 1), bias=True) + + self.W_1 = nn.Conv2d(node_emb_dim+time_emb_dim*2, hid_dim, kernel_size=(1, 1), bias=True) + self.W_2 = nn.Conv2d(node_emb_dim+time_emb_dim*2, hid_dim, kernel_size=(1, 1), bias=True) + + self.linear_conv = nn.ModuleList() + self.bn = nn.ModuleList() + + self.supports_len = 0 + if supports is not None: + self.supports_len += len(supports) + + for i in range(self.layer_num): + self.linear_conv.append(linearized_conv(hid_dim*4, hid_dim*4, self.dropout, self.tau, self.random_feature_dim)) + self.bn.append(nn.LayerNorm(hid_dim*4)) + + if self.use_long: + self.regression_layer = nn.Conv2d(hid_dim*4*2+hid_dim+seq_num, out_dim, kernel_size=(1, 1), bias=True) + else: + self.regression_layer = nn.Conv2d(hid_dim*4*2, out_dim, kernel_size=(1, 1), bias=True) + + def forward(self, x, feat=None): + + # x: (B, N, T, D) + B, N, T, D = x.size() + + time_emb = self.time_emb_layer[(x[:, :, -1, 1]*self.time_num).type(torch.LongTensor)] + week_emb = self.week_emb_layer[(x[:, :, -1, 2]).type(torch.LongTensor)] + + # input embedding + x = x.contiguous().view(B, N, -1).transpose(1, 2).unsqueeze(-1) # (B, D*T, N, 1) + input_emb = self.input_emb_layer(x) + + # node embeddings + node_emb = self.node_emb_layer.unsqueeze(0).expand(B, -1, -1).transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) + + # time embeddings + time_emb = time_emb.transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) + week_emb = week_emb.transpose(1, 2).unsqueeze(-1) # (B, dim, N, 1) + + x_g = torch.cat([node_emb, time_emb, week_emb], dim=1) # (B, dim*4, N, 1) + x = torch.cat([input_emb, node_emb, time_emb, week_emb], dim=1) # (B, dim*4, N, 1) + + # linearized spatial convolution + x_pool = [x] # (B, dim*4, N, 1) + node_vec1 = self.W_1(x_g) # (B, dim, N, 1) + node_vec2 = self.W_2(x_g) # (B, dim, N, 1) + node_vec1 = node_vec1.permute(0, 2, 3, 1) # (B, N, 1, dim) + node_vec2 = node_vec2.permute(0, 2, 3, 1) # (B, N, 1, dim) + for i in range(self.layer_num): + if self.use_residual: + residual = x + x, node_vec1_prime, node_vec2_prime = self.linear_conv[i](x, node_vec1, node_vec2) + + if self.use_residual: + x = x+residual + + if self.use_bn: + x = x.permute(0, 2, 3, 1) # (B, N, 1, dim*4) + x = self.bn[i](x) + x = x.permute(0, 3, 1, 2) + + x_pool.append(x) + x = torch.cat(x_pool, dim=1) # (B, dim*4, N, 1) + + x = self.activation(x) # (B, dim*4, N, 1) + + if self.use_long: + feat = feat.permute(0, 2, 1).unsqueeze(-1) # (B, F, N, 1) + x = torch.cat([x, feat], dim=1) + x = self.regression_layer(x) # (B, N, T) + x = x.squeeze(-1).permute(0, 2, 1) + else: + x = self.regression_layer(x) # (B, N, T) + x = x.squeeze(-1).permute(0, 2, 1) + + return {"prediction": x.transpose(1,2).unsqueeze(-1) + , "node_vec1": node_vec1_prime + , "node_vec2": node_vec2_prime + , "supports": self.supports + , 'use_spatial': self.use_spatial} \ No newline at end of file diff --git a/baselines/BigST/arch/preprocess/model.py b/baselines/BigST/arch/preprocess.py similarity index 91% rename from baselines/BigST/arch/preprocess/model.py rename to baselines/BigST/arch/preprocess.py index 44bd07c1..02926a47 100644 --- a/baselines/BigST/arch/preprocess/model.py +++ b/baselines/BigST/arch/preprocess.py @@ -4,6 +4,8 @@ import torch.nn.functional as F from torch.autograd import Variable import sys +import numpy as np +import pdb def create_projection_matrix(m, d, seed=0, scaling=0, struct_mode=False): nb_full_blocks = int(m/d) @@ -147,9 +149,17 @@ def forward(self, x): return x -class linear_transformer(nn.Module): - def __init__(self, input_length, output_length, in_dim, num_nodes, nhid, dropout=0.3): - super(linear_transformer, self).__init__() + +class BigSTPreprocess(nn.Module): + """ + Paper: BigST: Linear Complexity Spatio-Temporal Graph Neural Network for Traffic Forecasting on Large-Scale Road Networks + Link: https://dl.acm.org/doi/10.14778/3641204.3641217 + Official Code: https://github.com/usail-hkust/BigST?tab=readme-ov-file + Venue: VLDB 2024 + Task: Spatial-Temporal Forecasting + """ + def __init__(self, input_length, output_length, in_dim, num_nodes, nhid, tiny_batch_size, dropout=0.3): + super(BigSTPreprocess, self).__init__() self.tau = 1.0 self.layer_num = 3 self.random_feature_dim = nhid*2 @@ -175,7 +185,10 @@ def __init__(self, input_length, output_length, in_dim, num_nodes, nhid, dropout self.regression_layer = nn.Linear(nhid, output_length) - def forward(self, x): + self.tiny_batch_size = tiny_batch_size + + def forward(self, history_data: torch.Tensor, future_data: torch.Tensor, batch_seen: int, epoch: int, train: bool, **kwargs) -> torch.Tensor: + x = history_data # input: (1, 9638, 2016, 3) (B, N, T, D) B, N, T, D = x.size() pe = self.temporal_embedding.unsqueeze(0).expand(B*N, -1, -1) # (B*N, T/12, nhid) @@ -203,4 +216,4 @@ def forward(self, x): # x = torch.sum(x, dim=1) # (B*N, nhid) feat = x.view(B, N, -1) # (B, N, nhid) x = self.regression_layer(feat) # (B, N, output_length) - return x, feat + return {'prediction': x.transpose(1,2).unsqueeze(-1), 'feat':feat} \ No newline at end of file diff --git a/baselines/BigST/arch/preprocess/metrics.py b/baselines/BigST/arch/preprocess/metrics.py deleted file mode 100644 index aac0af60..00000000 --- a/baselines/BigST/arch/preprocess/metrics.py +++ /dev/null @@ -1,53 +0,0 @@ -import torch -import numpy as np - -def masked_mse(preds, labels, null_val=np.nan): - if np.isnan(null_val): - mask = ~torch.isnan(labels) - else: - mask = (labels!=null_val) - mask = mask.float() - mask /= torch.mean((mask)) - mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) - loss = (preds-labels)**2 - loss = loss * mask - loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) - return torch.mean(loss) - -def masked_rmse(preds, labels, null_val=np.nan): - return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val)) - -def masked_mae(preds, labels, null_val=np.nan): - if np.isnan(null_val): - mask = ~torch.isnan(labels) - else: - mask = (labels!=null_val) - mask = mask.float() - mask /= torch.mean((mask)) - mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) - loss = torch.abs(preds-labels) - loss = loss * mask - loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) - return torch.mean(loss) - - -def masked_mape(preds, labels, null_val=np.nan): - labels = torch.where(labels<0.01, torch.zeros_like(labels), labels) - if np.isnan(null_val): - mask = ~torch.isnan(labels) - else: - mask = (labels!=null_val) - mask = mask.float() - mask /= torch.mean((mask)) - mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) - loss = torch.abs(preds-labels)/labels - loss = loss * mask - loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) - return torch.mean(loss) - - -def metric(pred, real): - mae = masked_mae(pred,real,0.0).item() - mape = masked_mape(pred,real,0.0).item() - rmse = masked_rmse(pred,real,0.0).item() - return mae,mape,rmse \ No newline at end of file diff --git a/baselines/BigST/arch/preprocess/pipeline.py b/baselines/BigST/arch/preprocess/pipeline.py deleted file mode 100644 index 46499b73..00000000 --- a/baselines/BigST/arch/preprocess/pipeline.py +++ /dev/null @@ -1,38 +0,0 @@ -import torch.optim as optim -from model import * -import metrics - -class train_pipeline(): - def __init__(self, scaler, input_length, output_length, in_dim, num_nodes, nhid, dropout, lrate, wdecay, device): - self.model = linear_transformer(input_length, output_length, in_dim, num_nodes, nhid, dropout) - self.model.to(device) - self.optimizer = optim.Adam(self.model.parameters(), lr=lrate, weight_decay=wdecay) - self.loss = metrics.masked_mae - self.scaler = scaler - self.clip = 5 - - def train(self, input, real_val): - self.model.train() - self.optimizer.zero_grad() - output, _ = self.model(input) - real = self.scaler.inverse_transform(real_val) - predict = self.scaler.inverse_transform(output) - - loss = self.loss(predict, real, 0.0) - loss.backward() - if self.clip is not None: - torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) - self.optimizer.step() - mape = metrics.masked_mape(predict,real,0.0).item() - rmse = metrics.masked_rmse(predict,real,0.0).item() - return loss.item(), mape, rmse - - def eval(self, input, real_val): - self.model.eval() - output, _ = self.model(input) - real = self.scaler.inverse_transform(real_val) - predict = self.scaler.inverse_transform(output) - loss = self.loss(predict, real, 0.0) - mape = metrics.masked_mape(predict,real,0.0).item() - rmse = metrics.masked_rmse(predict,real,0.0).item() - return loss.item(), mape, rmse diff --git a/baselines/BigST/arch/preprocess/preprocess.py b/baselines/BigST/arch/preprocess/preprocess.py deleted file mode 100644 index feb795e3..00000000 --- a/baselines/BigST/arch/preprocess/preprocess.py +++ /dev/null @@ -1,127 +0,0 @@ -import torch -import numpy as np -import argparse -import time -import util -from pipeline import train_pipeline - -parser = argparse.ArgumentParser() -parser.add_argument('--device',type=str,default='cuda:0',help='') -parser.add_argument('--data',type=str,default='/data/pems_data/pems_vldb/long_term',help='data path') -parser.add_argument('--input_length',type=int,default=2016,help='') -parser.add_argument('--output_length',type=int,default=12,help='') -parser.add_argument('--nhid',type=int,default=32,help='') -parser.add_argument('--in_dim',type=int,default=3,help='inputs dimension') -parser.add_argument('--num_nodes',type=int,default=9638,help='number of nodes') -parser.add_argument('--batch_size',type=int,default=1,help='batch size') -parser.add_argument('--tiny_batch_size',type=int,default=256,help='tiny batch size') -parser.add_argument('--learning_rate',type=float,default=0.001,help='learning rate') -parser.add_argument('--dropout',type=float,default=0.3,help='dropout rate') -parser.add_argument('--weight_decay',type=float,default=0.0001,help='weight decay rate') -parser.add_argument('--epochs',type=int,default=100,help='') -parser.add_argument('--print_every',type=int,default=1,help='') -#parser.add_argument('--seed',type=int,default=99,help='random seed') -parser.add_argument('--save',type=str,default='checkpoint/',help='save path') -parser.add_argument('--expid',type=int,default=1,help='experiment id') - -args = parser.parse_args() - -def main(): - # set seed - # torch.manual_seed(args.seed) - # np.random.seed(args.seed) - # load data - device = torch.device(args.device) - dataloader = util.load_dataset(args.data, args.batch_size, args.batch_size, args.batch_size, - args.input_length, args.output_length) - scaler = dataloader['scaler'] - tiny_batch_size = args.tiny_batch_size - - print(args) - - trainer = train_pipeline(scaler, args.input_length, args.output_length, args.in_dim, args.num_nodes, - args.nhid, args.dropout, args.learning_rate, args.weight_decay, device) - - print("start training...",flush=True) - his_loss =[] - train_time = [] - val_time = [] - - for i in range(1, args.epochs+1): - # train - train_loss = [] - train_mape = [] - train_rmse = [] - t1 = time.time() - dataloader['train_loader'].shuffle() - for iter, (x, y) in enumerate(dataloader['train_loader'].get_iterator()): - B, T, N, F = x.shape - batch_num = int(B * N / tiny_batch_size) - idx_perm = np.random.permutation([i for i in range(B*N)]) - for j in range(batch_num): - if j==batch_num-1: - x_ = x[:, :, idx_perm[(j+1)*tiny_batch_size:], :] - y_ = y[:, :, idx_perm[(j+1)*tiny_batch_size:], :] - else: - x_ = x[:, :, idx_perm[j*tiny_batch_size:(j+1)*tiny_batch_size], :] - y_ = y[:, :, idx_perm[j*tiny_batch_size:(j+1)*tiny_batch_size], :] - - trainx = torch.Tensor(x_).to(device) # (B, T, N, F) - trainx = trainx.transpose(1, 2) # (B, N, T, F) - trainy = torch.Tensor(y_).to(device) # (B, T, N, F) - trainy = trainy.transpose(1, 2) # (B, N, T, F) - metrics = trainer.train(trainx, trainy[:,:,:,0]) - train_loss.append(metrics[0]) - train_mape.append(metrics[1]) - train_rmse.append(metrics[2]) - t2 = time.time() - train_time.append(t2-t1) - - if iter % args.print_every == 0: - log = 'Iter: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}' - print(log.format(iter, train_loss[-1], train_mape[-1], train_rmse[-1]),flush=True) - # Save the model parameters for subsequent preprocessing - torch.save(trainer.model.state_dict(), args.save+"linear_transformer.pth") - - # validation - valid_loss = [] - valid_mape = [] - valid_rmse = [] - - s1 = time.time() - for iter, (x, y) in enumerate(dataloader['val_loader'].get_iterator()): - B, T, N, F = x.shape - batch_num = int(B*N/tiny_batch_size) - for k in range(batch_num): - if k==batch_num-1: - x_ = x[:, :, (k+1)*tiny_batch_size:, :] - y_ = y[:, :, (k+1)*tiny_batch_size:, :] - else: - x_ = x[:, :, k*tiny_batch_size:(k+1)*tiny_batch_size, :] - y_ = y[:, :, k*tiny_batch_size:(k+1)*tiny_batch_size, :] - testx = torch.Tensor(x).to(device) - testx = testx.transpose(1, 2) - testy = torch.Tensor(y).to(device) - testy = testy.transpose(1, 2) - metrics = trainer.eval(testx, testy[:,:,:,0]) - valid_loss.append(metrics[0]) - valid_mape.append(metrics[1]) - valid_rmse.append(metrics[2]) - s2 = time.time() - mvalid_loss = np.mean(valid_loss) - mvalid_mape = np.mean(valid_mape) - mvalid_rmse = np.mean(valid_rmse) - log = 'Epoch: {:03d}, Validation Inference Time: {:.4f} secs' - print(log.format(i,(s2-s1))) - log = 'Valid MAE: {:.4f}, Valid MAPE: {:.4f}, Valid RMSE: {:.4f}' - print(log.format(mvalid_loss, mvalid_mape, mvalid_rmse), flush=True) - val_time.append(s2-s1) - - print("Average Training Time: {:.4f} secs/epoch".format(np.mean(train_time))) - print("Average Inference Time: {:.4f} secs".format(np.mean(val_time))) - -if __name__ == "__main__": - t1 = time.time() - main() - t2 = time.time() - print("Total time spent: {:.4f}".format(t2-t1)) diff --git a/baselines/BigST/arch/preprocess/util.py b/baselines/BigST/arch/preprocess/util.py deleted file mode 100644 index 81bf2cd7..00000000 --- a/baselines/BigST/arch/preprocess/util.py +++ /dev/null @@ -1,147 +0,0 @@ -import pickle -import numpy as np -import os -import scipy.sparse as sp -import torch -from scipy.sparse import linalg - -class DataLoader(object): - def __init__(self, data, batch_size, input_length, output_length): - self.seq_length_x = input_length - self.seq_length_y = output_length - self.y_start = 1 - self.batch_size = batch_size - self.current_ind = 0 - self.x_offsets = np.sort(np.concatenate((np.arange(-(self.seq_length_x - 1), 1, 1),))) - self.y_offsets = np.sort(np.arange(self.y_start, (self.seq_length_y + 1), 1)) - self.min_t = abs(min(self.x_offsets)) - self.max_t = abs(data.shape[0] - abs(max(self.y_offsets))) - mod = (self.max_t-self.min_t) % batch_size - if mod != 0: - self.data = data[:-mod] - else: - self.data = data - self.max_t = abs(self.data.shape[0] - abs(max(self.y_offsets))) - self.permutation = [i for i in range(self.min_t, self.max_t)] - - def shuffle(self): - self.permutation = np.random.permutation([i for i in range(self.min_t, self.max_t)]) - - def get_iterator(self): - self.current_ind = 0 - - def _wrapper(): - while self.current_ind < len(self.permutation): - if self.batch_size > 1: - x_batch = [] - y_batch = [] - for i in range(self.batch_size): - x_i = self.data[self.permutation[self.current_ind+i] + self.x_offsets, ...] - y_i = self.data[self.permutation[self.current_ind+i] + self.y_offsets, ...] - x_batch.append(x_i) - y_batch.append(y_i) - - x_batch = np.stack(x_batch, axis=0) - y_batch = np.stack(y_batch, axis=0) - else: - x_batch = self.data[self.permutation[self.current_ind] + self.x_offsets, ...] - y_batch = self.data[self.permutation[self.current_ind] + self.y_offsets, ...] - x_batch = np.expand_dims(x_batch, axis=0) - y_batch = np.expand_dims(y_batch, axis=0) - yield (x_batch, y_batch) - self.current_ind += self.batch_size - - return _wrapper() - -class StandardScaler(): - """ - Standard the input - """ - - def __init__(self, mean, std): - self.mean = mean - self.std = std - - def transform(self, data): - return (data - self.mean) / self.std - - def inverse_transform(self, data): - return (data * self.std) + self.mean - -def sym_adj(adj): - """Symmetrically normalize adjacency matrix.""" - adj = sp.coo_matrix(adj) - rowsum = np.array(adj.sum(1)) - d_inv_sqrt = np.power(rowsum, -0.5).flatten() - d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. - d_mat_inv_sqrt = sp.diags(d_inv_sqrt) - return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).astype(np.float32).todense() - -def asym_adj(adj): - adj = sp.coo_matrix(adj) - rowsum = np.array(adj.sum(1)).flatten() - d_inv = np.power(rowsum, -1).flatten() - d_inv[np.isinf(d_inv)] = 0. - d_mat= sp.diags(d_inv) - return d_mat.dot(adj).astype(np.float32).todense() - -def calculate_normalized_laplacian(adj): - """ - # L = D^-1/2 (D-A) D^-1/2 = I - D^-1/2 A D^-1/2 - # D = diag(A 1) - :param adj: - :return: - """ - adj = sp.coo_matrix(adj) - d = np.array(adj.sum(1)) - d_inv_sqrt = np.power(d, -0.5).flatten() - d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. - d_mat_inv_sqrt = sp.diags(d_inv_sqrt) - normalized_laplacian = sp.eye(adj.shape[0]) - adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() - return normalized_laplacian - -def calculate_scaled_laplacian(adj_mx, lambda_max=2, undirected=True): - if undirected: - adj_mx = np.maximum.reduce([adj_mx, adj_mx.T]) - L = calculate_normalized_laplacian(adj_mx) - if lambda_max is None: - lambda_max, _ = linalg.eigsh(L, 1, which='LM') - lambda_max = lambda_max[0] - L = sp.csr_matrix(L) - M, _ = L.shape - I = sp.identity(M, format='csr', dtype=L.dtype) - L = (2 / lambda_max * L) - I - return L.astype(np.float32).todense() - -def load_pickle(pickle_file): - try: - with open(pickle_file, 'rb') as f: - pickle_data = pickle.load(f) - except UnicodeDecodeError as e: - with open(pickle_file, 'rb') as f: - pickle_data = pickle.load(f, encoding='latin1') - except Exception as e: - print('Unable to load data ', pickle_file, ':', e) - raise - return pickle_data - -def load_adj(adj_filename, adjtype): - adj_mx = np.load(adj_filename) - print('adj_mx: ', adj_mx.shape) - adj = [asym_adj(adj_mx)] - return adj - -def load_dataset(dataset_dir, batch_size, valid_batch_size, test_batch_size, input_length, output_length): - data = {} - for category in ['train', 'val', 'test']: - data[category] = np.load(os.path.join(dataset_dir, category + '.npy')) - print('*'*10, category, data[category].shape, '*'*10) - scaler = StandardScaler(mean=data['train'][..., 0].mean(), std=data['train'][..., 0].std()) - # Data format - for category in ['train', 'val', 'test']: - data[category][..., 0] = scaler.transform(data[category][..., 0]) - data['train_loader'] = DataLoader(data['train'], batch_size, input_length, output_length) - data['val_loader'] = DataLoader(data['val'], valid_batch_size, input_length, output_length) - data['test_loader'] = DataLoader(data['test'], test_batch_size, input_length, output_length) - data['scaler'] = scaler - return data diff --git a/baselines/BigST/runner/__init__.py b/baselines/BigST/runner/__init__.py new file mode 100644 index 00000000..2a0ecce8 --- /dev/null +++ b/baselines/BigST/runner/__init__.py @@ -0,0 +1 @@ +from .bigstpreprocess_runner import BigSTPreprocessRunner \ No newline at end of file diff --git a/baselines/BigST/runner/bigstpreprocess_runner.py b/baselines/BigST/runner/bigstpreprocess_runner.py new file mode 100644 index 00000000..fbff8c45 --- /dev/null +++ b/baselines/BigST/runner/bigstpreprocess_runner.py @@ -0,0 +1,48 @@ +from typing import Tuple, Union, Dict +import torch +import numpy as np +import wandb +import pdb +import os + +from basicts.runners import SimpleTimeSeriesForecastingRunner + + +class BigSTPreprocessRunner(SimpleTimeSeriesForecastingRunner): + def __init__(self, cfg: dict): + super().__init__(cfg) + + self.tiny_batch_size = cfg.MODEL.PARAM['tiny_batch_size'] + + def preprocessing(self, input_data: Dict) -> Dict: + """Preprocess data. + + Args: + input_data (Dict): Dictionary containing data to be processed. + + Returns: + Dict: Processed data. + """ + + input_data = super().preprocessing(input_data) + + x = input_data['inputs'] + y = input_data['target'] + + B, T, N, F = x.shape + batch_num = int(B * N / self.tiny_batch_size) # 似乎要确保不能等于0 + idx_perm = np.random.permutation([i for i in range(B*N)]) + + for j in range(batch_num): + if j==batch_num-1: + x_ = x[:, :, idx_perm[(j+1)*self.tiny_batch_size:], :] + y_ = y[:, :, idx_perm[(j+1)*self.tiny_batch_size:], :] + else: + x_ = x[:, :, idx_perm[j*self.tiny_batch_size:(j+1)*self.tiny_batch_size], :] + y_ = y[:, :, idx_perm[j*self.tiny_batch_size:(j+1)*self.tiny_batch_size], :] + + input_data['inputs'] = x_.transpose(1,2) + input_data['target'] = y_ + return input_data + + \ No newline at end of file