From 09380441226552eaf2dfd35e2bd65982d9c7fd74 Mon Sep 17 00:00:00 2001 From: zdaiot Date: Sat, 28 Sep 2019 23:20:07 +0800 Subject: [PATCH] update uploads.sh --- .gitignore | 1 + classify_segment.py | 34 ++++++++++++------------- create_submission.py | 49 ++++++++++++++++-------------------- models/model.py | 5 +--- uploads.sh | 60 ++++++++++++++++++++++++++++++++++++++------ 5 files changed, 92 insertions(+), 57 deletions(-) diff --git a/.gitignore b/.gitignore index d1f9acf..0b518c1 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ checkpoints/ *.pkl submission.csv *.json +kaggle/ \ No newline at end of file diff --git a/classify_segment.py b/classify_segment.py index 8ee235a..5ff238f 100644 --- a/classify_segment.py +++ b/classify_segment.py @@ -8,22 +8,21 @@ class Get_Classify_Results(): - def __init__(self, model_name, fold, save_path, class_num=4): + def __init__(self, model_name, fold, model_path, class_num=4): ''' 处理当前fold一个batch的数据分类结果 :param model_name: 当前的模型名称 :param fold: 当前的折数 - :param save_path: 存放所有模型的路径 + :param model_path: 存放所有模型的路径 :param class_num: 类别总数 ''' self.model_name = model_name self.fold = fold - self.save_path = save_path + self.model_path = model_path self.class_num = class_num self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 加载模型及其权重 - self.model_path = os.path.join(self.save_path, self.model_name) self.classify_model = ClassifyResNet(model_name) if torch.cuda.is_available(): self.classify_model = torch.nn.DataParallel(self.classify_model) @@ -48,21 +47,20 @@ def get_classify_results(self, images, thrshold=0.5): class Get_Segment_Results(): - def __init__(self, model_name, fold, save_path, class_num=4): + def __init__(self, model_name, fold, model_path, class_num=4): ''' 处理当前fold一个batch的数据分割结果 :param model_name: 当前的模型名称 :param fold: 当前的折数 - :param save_path: 存放所有模型的路径 + :param model_path: 存放所有模型的路径 :param class_num: 类别总数 ''' self.model_name = model_name self.fold = fold - self.save_path = save_path + self.model_path = model_path self.class_num = class_num # 加载模型及其权重 - self.model_path = os.path.join(self.save_path, self.model_name) self.segment_model = Model(self.model_name).create_model() self.segment_model_path = os.path.join(self.model_path, '%s_fold%d_best.pth' % (self.model_name, self.fold)) self.solver = Solver(self.segment_model) @@ -115,21 +113,21 @@ def get_thresholds_minareas(self, json_path, fold): class Classify_Segment_Fold(): - def __init__(self, model_name, fold, save_path, class_num=4): + def __init__(self, model_name, fold, model_path, class_num=4): ''' 处理当前fold一个batch的分割结果和分类结果 :param model_name: 当前的模型名称 :param fold: 当前的折数 - :param save_path: 存放所有模型的路径 + :param model_path: 存放所有模型的路径 :param class_num: 类别总数 ''' self.model_name = model_name self.fold = fold - self.save_path = save_path + self.model_path = model_path self.class_num = class_num - self.classify_model = Get_Classify_Results(self.model_name, self.fold, self.save_path, self.class_num) - self.segment_model = Get_Segment_Results(self.model_name, self.fold, self.save_path, self.class_num) + self.classify_model = Get_Classify_Results(self.model_name, self.fold, self.model_path, self.class_num) + self.segment_model = Get_Segment_Results(self.model_name, self.fold, self.model_path, self.class_num) def classify_segment(self, images): ''' 处理当前fold一个batch的分割结果和分类结果 @@ -149,17 +147,17 @@ def classify_segment(self, images): class Classify_Segment_Folds(): - def __init__(self, model_name, n_splits, save_path, dataloader, class_num=4): + def __init__(self, model_name, n_splits, model_path, dataloader, class_num=4): ''' 使用投票法处理所有fold一个batch的分割结果和分类结果 :param model_name: 当前的模型名称 :param n_splits: 总共有多少折,为list列表 - :param save_path: 存放所有模型的路径 + :param model_path: 存放所有模型的路径 :param class_num: 类别总数 ''' self.model_name = model_name self.n_splits = n_splits - self.save_path = save_path + self.model_path = model_path self.class_num = class_num self.dataloader = dataloader @@ -171,8 +169,8 @@ def get_classify_segment_models(self): ''' for fold in self.n_splits: - self.classify_models.append(Get_Classify_Results(self.model_name, fold, self.save_path, self.class_num)) - self.segment_models.append(Get_Segment_Results(self.model_name, fold, self.save_path, self.class_num)) + self.classify_models.append(Get_Classify_Results(self.model_name, fold, self.model_path, self.class_num)) + self.segment_models.append(Get_Segment_Results(self.model_name, fold, self.model_path, self.class_num)) def classify_segment_folds(self, images): ''' 使用投票法处理所有fold一个batch的分割结果和分类结果 diff --git a/create_submission.py b/create_submission.py index f1305c2..9f15d7f 100644 --- a/create_submission.py +++ b/create_submission.py @@ -1,20 +1,16 @@ -kaggle = 0 import os +import pandas as pd +import numpy as np +from tqdm import tqdm +from torch.utils.data import DataLoader + +kaggle = 0 if kaggle: os.system('pip install /kaggle/input/segmentation-models/pretrainedmodels-0.7.4/ > /dev/null') os.system('pip install /kaggle/input/segmentation-models/segmentation_models.pytorch/ > /dev/null') - package_path = '../input/models' # add unet script dataset + package_path = 'kaggle/input/sources' # add unet script dataset import sys sys.path.append(package_path) - from model import Model -else: - from models.model import Model -import cv2 -import torch -import pandas as pd -import numpy as np -from tqdm import tqdm -from torch.utils.data import DataLoader, Dataset from datasets.steel_dataset import TestDataset from classify_segment import Classify_Segment_Folds, Classify_Segment_Fold @@ -32,7 +28,7 @@ def mask2rle(img): return ' '.join(str(x) for x in runs) -def create_submission(n_splits, model_name, batch_size, num_workers, mean, std, test_data_folder, sample_submission_path, save_path): +def create_submission(n_splits, model_name, batch_size, num_workers, mean, std, test_data_folder, sample_submission_path, model_path): ''' :param n_splits: 折数,类型为list @@ -43,7 +39,7 @@ def create_submission(n_splits, model_name, batch_size, num_workers, mean, std, :param std: 方差 :param test_data_folder: 测试数据存放的路径 :param sample_submission_path: 提交样例csv存放的路径 - :param save_path: 当前模型权重存放的根目录,注意下一级 model_name 目录存放的是当前模型权重 + :param model_path: 当前模型权重存放的目录 :return: None ''' # 加载数据集 @@ -56,9 +52,9 @@ def create_submission(n_splits, model_name, batch_size, num_workers, mean, std, pin_memory=True ) if len(n_splits) == 1: - classify_segment = Classify_Segment_Fold(model_name, n_splits[0], save_path).classify_segment + classify_segment = Classify_Segment_Fold(model_name, n_splits[0], model_path).classify_segment else: - classify_segment = Classify_Segment_Folds(model_name, n_splits, save_path, testset).classify_segment_folds + classify_segment = Classify_Segment_Folds(model_name, n_splits, model_path, testset).classify_segment_folds # start prediction predictions = [] @@ -77,25 +73,22 @@ def create_submission(n_splits, model_name, batch_size, num_workers, mean, std, if __name__ == "__main__": - if kaggle: - sample_submission_path = '../input/severstal-steel-defect-detection/sample_submission.csv' - test_data_folder = "../input/severstal-steel-defect-detection/test_images" - save_path = '../input/models' - else: - sample_submission_path = 'datasets/Steel_data/sample_submission.csv' - test_data_folder = 'datasets/Steel_data/test_images' - save_path = './checkpoints' - # 设置超参数 model_name = 'unet_resnet34' - # initialize test dataloader - num_workers = 12 batch_size = 8 mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) + n_splits = [1] # [0, 1, 2, 3, 4] - n_splits = [1, 2] # [0, 1, 2, 3, 4] + if kaggle: + sample_submission_path = 'kaggel/input/severstal-steel-defect-detection/sample_submission.csv' + test_data_folder = "kaggle/input/severstal-steel-defect-detection/test_images" + model_path = 'kaggle/input/checkpoints' + else: + sample_submission_path = 'datasets/Steel_data/sample_submission.csv' + test_data_folder = 'datasets/Steel_data/test_images' + model_path = './checkpoints/' + model_name create_submission(n_splits, model_name, batch_size, num_workers, mean, std, test_data_folder, - sample_submission_path, save_path) + sample_submission_path, model_path) diff --git a/models/model.py b/models/model.py index 06b7ac4..95299b0 100644 --- a/models/model.py +++ b/models/model.py @@ -3,7 +3,6 @@ from torch.nn import Module from torch import nn import torch.nn.functional as F -from utils.loss import ClassifyLoss class Model(): @@ -97,6 +96,4 @@ def forward(self, x): x = torch.Tensor(8, 3, 256, 1600) y = torch.ones(8, 4) output = class_net(x) - criterion = ClassifyLoss() - loss = criterion(output, y) - print(loss) + print(output.size()) diff --git a/uploads.sh b/uploads.sh index 74e207f..7383a38 100644 --- a/uploads.sh +++ b/uploads.sh @@ -1,18 +1,64 @@ export http_proxy=http://localhost:8123 export https_proxy=http://localhost:8123 +model_name="unet_resnet34" + +# 建立文件夹,并copy py文件 +if [ ! -d "kaggle/sources" ]; then + mkdir -p kaggle/sources +fi +if [ ! -d "kaggle/sources/models" ]; then + mkdir -p kaggle/sources/models +fi +if [ ! -d "kaggle/sources/datasets" ]; then + mkdir -p kaggle/sources/datasets +fi + +if [ ! -d "kaggle/checkpoints" ]; then + mkdir -p kaggle/checkpoints +fi + +rm kaggle/sources/*.py -f +rm kaggle/sources/*/*.py -f +rm checkpoints/$model_name/*_best.pth -f +rm checkpoints/$model_name/result.json -f + +cp models/model.py kaggle/sources/models +cp datasets/steel_dataset.py kaggle/sources/datasets +cp solver.py kaggle/sources +cp classify_segment.py kaggle/sources + +cp checkpoints/$model_name/*_best.pth kaggle/checkpoints +cp checkpoints/$model_name/result.json kaggle/checkpoints + if [ $1 -eq 0 ]; then - echo $1 + echo "init uploads" + # 初始化元数据文件以创建数据集 + kaggle datasets init -p kaggle/sources + # 更改默认的 json 文件,否则无法提交 + sed -i 's/INSERT_TITLE_HERE/sources/g' kaggle/sources/dataset-metadata.json + sed -i 's/INSERT_SLUG_HERE/sources/g' kaggle/sources/dataset-metadata.json + # 创建一个新的数据集 + kaggle datasets create -p kaggle/sources -r zip + # 初始化元数据文件以创建数据集 - kaggle datasets init -p models + kaggle datasets init -p kaggle/checkpoints # 更改默认的 json 文件,否则无法提交 - sed -i 's/INSERT_TITLE_HERE/models/g' models/dataset-metadata.json - sed -i 's/INSERT_SLUG_HERE/models/g' models/dataset-metadata.json + sed -i 's/INSERT_TITLE_HERE/checkpoints/g' kaggle/checkpoints/dataset-metadata.json + sed -i 's/INSERT_SLUG_HERE/checkpoints/g' kaggle/checkpoints/dataset-metadata.json # 创建一个新的数据集 - kaggle datasets create -p models + kaggle datasets create -p kaggle/checkpoints fi if [ $1 -eq 1 ]; then - # 更新数据集 - kaggle datasets version -p models -m "Updated data" + echo "只更新脚本文件" + # 更新脚本文件 + kaggle datasets version -p kaggle/sources -m "Updated Python Files" -r zip +fi + +if [ $1 -eq 2 ]; then + echo "更新脚本文件和权重文件" + # 更新数据集和脚本文件 + kaggle datasets version -p kaggle/sources -m "Updated Python Files" -r zip + kaggle datasets version -p kaggle/checkpoints -m "Updated Checkpoints" fi