From bd2002f469d6c9e1501e0c6163c2969d11dbbab8 Mon Sep 17 00:00:00 2001 From: TrellixVulnTeam Date: Sat, 15 Oct 2022 09:29:08 +0000 Subject: [PATCH] Adding tarfile member sanitization to extractall() --- .../torchvision/datasets/sbu.py | 21 ++++++- .../torchvision/datasets/utils.py | 42 ++++++++++++- .../torchvision/datasets/voc.py | 21 ++++++- .../torchvision/datasets/sbu.py | 21 ++++++- .../torchvision/datasets/utils.py | 42 ++++++++++++- .../torchvision/datasets/voc.py | 21 ++++++- .../core/data/dataloader/utils.py | 21 ++++++- .../core/data/downloader/pascal_voc.py | 42 ++++++++++++- .../Bert-Squad_ID0470_for_PyTorch/modeling.py | 21 ++++++- .../model/modeling.py | 21 ++++++- .../module/prepare_imagenet.py | 42 ++++++++++++- .../tools/convert_datasets/stare.py | 63 ++++++++++++++++++- .../ENet/core/data/dataloader/utils.py | 21 ++++++- .../ENet/core/data/downloader/pascal_voc.py | 42 ++++++++++++- .../FCN8s/tools/convert_datasets/stare.py | 63 ++++++++++++++++++- .../segmentron/data/dataloader/utils.py | 21 ++++++- .../segmentron/data/downloader/pascal_voc.py | 42 ++++++++++++- .../PSPNet/tools/convert_datasets/stare.py | 63 ++++++++++++++++++- .../code/pytorch_pretrained_bert/modeling.py | 21 ++++++- .../pretraining/fairseq/models/hf_bert.py | 21 ++++++- .../pretraining/fairseq/models/pair_bert.py | 21 ++++++- .../BertBase_ID0490_for_PyTorch/modeling.py | 21 ++++++- .../datasets/voc.py | 21 ++++++- .../datasets/voc.py | 21 ++++++- .../dev/nlp/BERT_base_for_PyTorch/modeling.py | 21 ++++++- .../code/pytorch_pretrained_bert/modeling.py | 21 ++++++- .../pretraining/fairseq/models/hf_bert.py | 21 ++++++- .../pretraining/fairseq/models/pair_bert.py | 21 ++++++- 28 files changed, 800 insertions(+), 40 deletions(-) diff --git a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/sbu.py b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/sbu.py index 411f64591..154c24fa5 100644 --- a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/sbu.py +++ b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/sbu.py @@ -126,7 +126,26 @@ def download(self): # Extract file with tarfile.open(os.path.join(self.root, self.filename), 'r:gz') as tar: - tar.extractall(path=self.root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.root) # Download individual photos with open(os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_urls.txt')) as fh: diff --git a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/utils.py b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/utils.py index 83d0a4d8c..11d8c4ad2 100644 --- a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/utils.py +++ b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/utils.py @@ -251,10 +251,48 @@ def extract_archive(from_path, to_path=None, remove_finished=False): if _is_tar(from_path): with tarfile.open(from_path, 'r') as tar: - tar.extractall(path=to_path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=to_path) elif _is_targz(from_path): with tarfile.open(from_path, 'r:gz') as tar: - tar.extractall(path=to_path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=to_path) elif _is_gzip(from_path): to_path = os.path.join(to_path, os.path.splitext(os.path.basename(from_path))[0]) with open(to_path, "wb") as out_f, gzip.GzipFile(from_path) as zip_f: diff --git a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/voc.py b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/voc.py index 3d7a8eeb8..0f5044b0d 100644 --- a/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/voc.py +++ b/PyTorch/built-in/cv/classification/DenseNet169_ID0454_for_PyTorch/torchvision/datasets/voc.py @@ -261,4 +261,23 @@ def parse_voc_xml(self, node): def download_extract(url, root, filename, md5): download_url(url, root, filename, md5) with tarfile.open(os.path.join(root, filename), "r") as tar: - tar.extractall(path=root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=root) diff --git a/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/sbu.py b/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/sbu.py index 411f64591..154c24fa5 100644 --- a/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/sbu.py +++ b/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/sbu.py @@ -126,7 +126,26 @@ def download(self): # Extract file with tarfile.open(os.path.join(self.root, self.filename), 'r:gz') as tar: - tar.extractall(path=self.root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=self.root) # Download individual photos with open(os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_urls.txt')) as fh: diff --git a/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/utils.py b/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/utils.py index 83d0a4d8c..11d8c4ad2 100644 --- a/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/utils.py +++ b/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/utils.py @@ -251,10 +251,48 @@ def extract_archive(from_path, to_path=None, remove_finished=False): if _is_tar(from_path): with tarfile.open(from_path, 'r') as tar: - tar.extractall(path=to_path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=to_path) elif _is_targz(from_path): with tarfile.open(from_path, 'r:gz') as tar: - tar.extractall(path=to_path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=to_path) elif _is_gzip(from_path): to_path = os.path.join(to_path, os.path.splitext(os.path.basename(from_path))[0]) with open(to_path, "wb") as out_f, gzip.GzipFile(from_path) as zip_f: diff --git a/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/voc.py b/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/voc.py index 3d7a8eeb8..0f5044b0d 100644 --- a/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/voc.py +++ b/PyTorch/built-in/cv/classification/Googlenet_ID0447_for_PyTorch/torchvision/datasets/voc.py @@ -261,4 +261,23 @@ def parse_voc_xml(self, node): def download_extract(url, root, filename, md5): download_url(url, root, filename, md5) with tarfile.open(os.path.join(root, filename), "r") as tar: - tar.extractall(path=root) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=root) diff --git a/PyTorch/built-in/cv/semantic_segmentation/DynamicUNet_for_Pytorch/awesome-semantic-segmentation-pytorch/core/data/dataloader/utils.py b/PyTorch/built-in/cv/semantic_segmentation/DynamicUNet_for_Pytorch/awesome-semantic-segmentation-pytorch/core/data/dataloader/utils.py index c0bd1ad4b..f14abc00c 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/DynamicUNet_for_Pytorch/awesome-semantic-segmentation-pytorch/core/data/dataloader/utils.py +++ b/PyTorch/built-in/cv/semantic_segmentation/DynamicUNet_for_Pytorch/awesome-semantic-segmentation-pytorch/core/data/dataloader/utils.py @@ -66,4 +66,23 @@ def download_url(url, root, filename=None, md5=None): def download_extract(url, root, filename, md5): download_url(url, root, filename, md5) with tarfile.open(os.path.join(root, filename), "r") as tar: - tar.extractall(path=root) \ No newline at end of file + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=root) \ No newline at end of file diff --git a/PyTorch/built-in/cv/semantic_segmentation/DynamicUNet_for_Pytorch/awesome-semantic-segmentation-pytorch/core/data/downloader/pascal_voc.py b/PyTorch/built-in/cv/semantic_segmentation/DynamicUNet_for_Pytorch/awesome-semantic-segmentation-pytorch/core/data/downloader/pascal_voc.py index 849c95bbb..012b3d9c6 100644 --- a/PyTorch/built-in/cv/semantic_segmentation/DynamicUNet_for_Pytorch/awesome-semantic-segmentation-pytorch/core/data/downloader/pascal_voc.py +++ b/PyTorch/built-in/cv/semantic_segmentation/DynamicUNet_for_Pytorch/awesome-semantic-segmentation-pytorch/core/data/downloader/pascal_voc.py @@ -44,7 +44,26 @@ def download_voc(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) ##################################################################################### @@ -59,7 +78,26 @@ def download_aug(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) shutil.move(os.path.join(path, 'benchmark_RELEASE'), os.path.join(path, 'VOCaug')) filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt'] diff --git a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/modeling.py b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/modeling.py index d76df7a38..49a094b46 100644 --- a/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/modeling.py +++ b/PyTorch/built-in/nlp/Bert-Squad_ID0470_for_PyTorch/modeling.py @@ -718,7 +718,26 @@ def from_pretrained(cls, pretrained_model_name_or_path, state_dict=None, cache_d logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) diff --git a/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/model/modeling.py b/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/model/modeling.py index d5f8f5a4d..9e05b1f62 100644 --- a/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/model/modeling.py +++ b/PyTorch/built-in/nlp/CPM_Finetune_for_PyTorch/model/modeling.py @@ -735,7 +735,26 @@ def from_pretrained(cls, pretrained_model_name, state_dict=None, cache_dir=None, logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) diff --git a/PyTorch/contrib/cv/classification/ResNeSt50_for_PyTorch/module/prepare_imagenet.py b/PyTorch/contrib/cv/classification/ResNeSt50_for_PyTorch/module/prepare_imagenet.py index 6587d773d..dfcfc65da 100644 --- a/PyTorch/contrib/cv/classification/ResNeSt50_for_PyTorch/module/prepare_imagenet.py +++ b/PyTorch/contrib/cv/classification/ResNeSt50_for_PyTorch/module/prepare_imagenet.py @@ -71,7 +71,26 @@ def extract_train(tar_fname, target_dir, with_rec=False, num_thread=1): class_dir = os.path.splitext(class_fname)[0] os.mkdir(class_dir) with tarfile.open(class_fname) as f: - f.extractall(class_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, class_dir) os.remove(class_fname) pbar.update(1) pbar.close() @@ -80,7 +99,26 @@ def extract_val(tar_fname, target_dir, with_rec=False, num_thread=1): mkdir(target_dir) print('Extracting ' + tar_fname) with tarfile.open(tar_fname) as tar: - tar.extractall(target_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, target_dir) # build rec file before images are moved into subfolders # move images to proper subfolders subprocess.call(["wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash"], diff --git a/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/tools/convert_datasets/stare.py b/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/tools/convert_datasets/stare.py index 3ac28aa6f..7baa4ed16 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/tools/convert_datasets/stare.py +++ b/PyTorch/contrib/cv/semantic_segmentation/DeeplabV3_for_Pytorch/tools/convert_datasets/stare.py @@ -71,7 +71,26 @@ def main(): print('Extracting stare-images.tar...') with tarfile.open(image_path) as f: - f.extractall(osp.join(tmp_dir, 'gz')) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, osp.join(tmp_dir,"gz")) for filename in os.listdir(osp.join(tmp_dir, 'gz')): un_gz( @@ -106,7 +125,26 @@ def main(): print('Extracting labels-ah.tar...') with tarfile.open(labels_ah) as f: - f.extractall(osp.join(tmp_dir, 'gz')) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, osp.join(tmp_dir,"gz")) for filename in os.listdir(osp.join(tmp_dir, 'gz')): un_gz( @@ -145,7 +183,26 @@ def main(): print('Extracting labels-vk.tar...') with tarfile.open(labels_vk) as f: - f.extractall(osp.join(tmp_dir, 'gz')) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, osp.join(tmp_dir,"gz")) for filename in os.listdir(osp.join(tmp_dir, 'gz')): un_gz( diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/dataloader/utils.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/dataloader/utils.py index f7bad51db..50dd65462 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/dataloader/utils.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/dataloader/utils.py @@ -80,4 +80,23 @@ def download_url(url, root, filename=None, md5=None): def download_extract(url, root, filename, md5): download_url(url, root, filename, md5) with tarfile.open(os.path.join(root, filename), "r") as tar: - tar.extractall(path=root) \ No newline at end of file + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=root) \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/downloader/pascal_voc.py b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/downloader/pascal_voc.py index 9253738e2..3d44972fc 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/downloader/pascal_voc.py +++ b/PyTorch/contrib/cv/semantic_segmentation/ENet/core/data/downloader/pascal_voc.py @@ -58,7 +58,26 @@ def download_voc(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) ##################################################################################### @@ -73,7 +92,26 @@ def download_aug(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) shutil.move(os.path.join(path, 'benchmark_RELEASE'), os.path.join(path, 'VOCaug')) filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt'] diff --git a/PyTorch/contrib/cv/semantic_segmentation/FCN8s/tools/convert_datasets/stare.py b/PyTorch/contrib/cv/semantic_segmentation/FCN8s/tools/convert_datasets/stare.py index 6238d62f6..58de7ca8c 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/FCN8s/tools/convert_datasets/stare.py +++ b/PyTorch/contrib/cv/semantic_segmentation/FCN8s/tools/convert_datasets/stare.py @@ -55,7 +55,26 @@ def main(): print('Extracting stare-images.tar...') with tarfile.open(image_path) as f: - f.extractall(osp.join(tmp_dir, 'gz')) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, osp.join(tmp_dir,"gz")) for filename in os.listdir(osp.join(tmp_dir, 'gz')): un_gz( @@ -90,7 +109,26 @@ def main(): print('Extracting labels-ah.tar...') with tarfile.open(labels_ah) as f: - f.extractall(osp.join(tmp_dir, 'gz')) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, osp.join(tmp_dir,"gz")) for filename in os.listdir(osp.join(tmp_dir, 'gz')): un_gz( @@ -129,7 +167,26 @@ def main(): print('Extracting labels-vk.tar...') with tarfile.open(labels_vk) as f: - f.extractall(osp.join(tmp_dir, 'gz')) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, osp.join(tmp_dir,"gz")) for filename in os.listdir(osp.join(tmp_dir, 'gz')): un_gz( diff --git a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/data/dataloader/utils.py b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/data/dataloader/utils.py index 5939e5a7e..1e8f1d435 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/data/dataloader/utils.py +++ b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/data/dataloader/utils.py @@ -79,4 +79,23 @@ def download_url(url, root, filename=None, md5=None): def download_extract(url, root, filename, md5): download_url(url, root, filename, md5) with tarfile.open(os.path.join(root, filename), "r") as tar: - tar.extractall(path=root) \ No newline at end of file + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=root) \ No newline at end of file diff --git a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/data/downloader/pascal_voc.py b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/data/downloader/pascal_voc.py index 0072ce90d..4e6376eeb 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/data/downloader/pascal_voc.py +++ b/PyTorch/contrib/cv/semantic_segmentation/FastSCNN/segmentron/data/downloader/pascal_voc.py @@ -55,7 +55,26 @@ def download_voc(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) ##################################################################################### @@ -70,7 +89,26 @@ def download_aug(path, overwrite=False): filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: - tar.extractall(path=path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=path) shutil.move(os.path.join(path, 'benchmark_RELEASE'), os.path.join(path, 'VOCaug')) filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt'] diff --git a/PyTorch/contrib/cv/semantic_segmentation/PSPNet/tools/convert_datasets/stare.py b/PyTorch/contrib/cv/semantic_segmentation/PSPNet/tools/convert_datasets/stare.py index 6238d62f6..58de7ca8c 100644 --- a/PyTorch/contrib/cv/semantic_segmentation/PSPNet/tools/convert_datasets/stare.py +++ b/PyTorch/contrib/cv/semantic_segmentation/PSPNet/tools/convert_datasets/stare.py @@ -55,7 +55,26 @@ def main(): print('Extracting stare-images.tar...') with tarfile.open(image_path) as f: - f.extractall(osp.join(tmp_dir, 'gz')) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, osp.join(tmp_dir,"gz")) for filename in os.listdir(osp.join(tmp_dir, 'gz')): un_gz( @@ -90,7 +109,26 @@ def main(): print('Extracting labels-ah.tar...') with tarfile.open(labels_ah) as f: - f.extractall(osp.join(tmp_dir, 'gz')) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, osp.join(tmp_dir,"gz")) for filename in os.listdir(osp.join(tmp_dir, 'gz')): un_gz( @@ -129,7 +167,26 @@ def main(): print('Extracting labels-vk.tar...') with tarfile.open(labels_vk) as f: - f.extractall(osp.join(tmp_dir, 'gz')) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(f, osp.join(tmp_dir,"gz")) for filename in os.listdir(osp.join(tmp_dir, 'gz')): un_gz( diff --git a/PyTorch/contrib/nlp/SpanBERT/code/pytorch_pretrained_bert/modeling.py b/PyTorch/contrib/nlp/SpanBERT/code/pytorch_pretrained_bert/modeling.py index a68558e48..24392fe81 100644 --- a/PyTorch/contrib/nlp/SpanBERT/code/pytorch_pretrained_bert/modeling.py +++ b/PyTorch/contrib/nlp/SpanBERT/code/pytorch_pretrained_bert/modeling.py @@ -585,7 +585,26 @@ def from_pretrained(cls, pretrained_model_name_or_path, state_dict=None, cache_d logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) diff --git a/PyTorch/contrib/nlp/SpanBERT/pretraining/fairseq/models/hf_bert.py b/PyTorch/contrib/nlp/SpanBERT/pretraining/fairseq/models/hf_bert.py index 3ae3024d8..49f63a30c 100644 --- a/PyTorch/contrib/nlp/SpanBERT/pretraining/fairseq/models/hf_bert.py +++ b/PyTorch/contrib/nlp/SpanBERT/pretraining/fairseq/models/hf_bert.py @@ -501,7 +501,26 @@ def from_pretrained(cls, pretrained_model_name, state_dict=None, cache_dir=None, logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) diff --git a/PyTorch/contrib/nlp/SpanBERT/pretraining/fairseq/models/pair_bert.py b/PyTorch/contrib/nlp/SpanBERT/pretraining/fairseq/models/pair_bert.py index 7fccf6b7d..6fc64fd2f 100644 --- a/PyTorch/contrib/nlp/SpanBERT/pretraining/fairseq/models/pair_bert.py +++ b/PyTorch/contrib/nlp/SpanBERT/pretraining/fairseq/models/pair_bert.py @@ -550,7 +550,26 @@ def from_pretrained(cls, pretrained_model_name, state_dict=None, cache_dir=None, logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) diff --git a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/modeling.py b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/modeling.py index 3cd6c5234..73db3754b 100644 --- a/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/modeling.py +++ b/PyTorch/dev/cv/image_classification/BertBase_ID0490_for_PyTorch/modeling.py @@ -694,7 +694,26 @@ def from_pretrained(cls, pretrained_model_name_or_path, state_dict=None, cache_d logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) diff --git a/PyTorch/dev/cv/image_segmentation/DeepLabV3+_ID0458_for_PyTorch/datasets/voc.py b/PyTorch/dev/cv/image_segmentation/DeepLabV3+_ID0458_for_PyTorch/datasets/voc.py index b07c18345..2af147739 100644 --- a/PyTorch/dev/cv/image_segmentation/DeepLabV3+_ID0458_for_PyTorch/datasets/voc.py +++ b/PyTorch/dev/cv/image_segmentation/DeepLabV3+_ID0458_for_PyTorch/datasets/voc.py @@ -193,4 +193,23 @@ def decode_target(cls, mask): def download_extract(url, root, filename, md5): download_url(url, root, filename, md5) with tarfile.open(os.path.join(root, filename), "r") as tar: - tar.extractall(path=root) \ No newline at end of file + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=root) \ No newline at end of file diff --git a/PyTorch/dev/cv/image_segmentation/DeepLabV3_ID0621_for_PyTorch/datasets/voc.py b/PyTorch/dev/cv/image_segmentation/DeepLabV3_ID0621_for_PyTorch/datasets/voc.py index 36d4a1a53..fb36cb05d 100644 --- a/PyTorch/dev/cv/image_segmentation/DeepLabV3_ID0621_for_PyTorch/datasets/voc.py +++ b/PyTorch/dev/cv/image_segmentation/DeepLabV3_ID0621_for_PyTorch/datasets/voc.py @@ -194,4 +194,23 @@ def decode_target(cls, mask): def download_extract(url, root, filename, md5): download_url(url, root, filename, md5) with tarfile.open(os.path.join(root, filename), "r") as tar: - tar.extractall(path=root) \ No newline at end of file + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path=root) \ No newline at end of file diff --git a/PyTorch/dev/nlp/BERT_base_for_PyTorch/modeling.py b/PyTorch/dev/nlp/BERT_base_for_PyTorch/modeling.py index 3cd6c5234..73db3754b 100644 --- a/PyTorch/dev/nlp/BERT_base_for_PyTorch/modeling.py +++ b/PyTorch/dev/nlp/BERT_base_for_PyTorch/modeling.py @@ -694,7 +694,26 @@ def from_pretrained(cls, pretrained_model_name_or_path, state_dict=None, cache_d logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) diff --git a/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/code/pytorch_pretrained_bert/modeling.py b/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/code/pytorch_pretrained_bert/modeling.py index 9cb767ab9..3fd9723f2 100644 --- a/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/code/pytorch_pretrained_bert/modeling.py +++ b/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/code/pytorch_pretrained_bert/modeling.py @@ -602,7 +602,26 @@ def from_pretrained(cls, pretrained_model_name_or_path, state_dict=None, cache_d logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) diff --git a/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/pretraining/fairseq/models/hf_bert.py b/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/pretraining/fairseq/models/hf_bert.py index 087852fc2..43c965980 100644 --- a/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/pretraining/fairseq/models/hf_bert.py +++ b/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/pretraining/fairseq/models/hf_bert.py @@ -500,7 +500,26 @@ def from_pretrained(cls, pretrained_model_name, state_dict=None, cache_dir=None, logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME) diff --git a/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/pretraining/fairseq/models/pair_bert.py b/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/pretraining/fairseq/models/pair_bert.py index dda03e470..bd4fbf4b2 100644 --- a/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/pretraining/fairseq/models/pair_bert.py +++ b/PyTorch/dev/nlp/SpanBERT_ID0337_for_PyTorch/pretraining/fairseq/models/pair_bert.py @@ -549,7 +549,26 @@ def from_pretrained(cls, pretrained_model_name, state_dict=None, cache_dir=None, logger.info("extracting archive file {} to temp dir {}".format( resolved_archive_file, tempdir)) with tarfile.open(resolved_archive_file, 'r:gz') as archive: - archive.extractall(tempdir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(archive, tempdir) serialization_dir = tempdir # Load config config_file = os.path.join(serialization_dir, CONFIG_NAME)