Skip to content

Commit

Permalink
update uploads.sh
Browse files Browse the repository at this point in the history
  • Loading branch information
zdaiot committed Sep 28, 2019
1 parent f8a6ef8 commit 0938044
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 57 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ checkpoints/
*.pkl
submission.csv
*.json
kaggle/
34 changes: 16 additions & 18 deletions classify_segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,21 @@


class Get_Classify_Results():
def __init__(self, model_name, fold, save_path, class_num=4):
def __init__(self, model_name, fold, model_path, class_num=4):
''' 处理当前fold一个batch的数据分类结果
:param model_name: 当前的模型名称
:param fold: 当前的折数
:param save_path: 存放所有模型的路径
:param model_path: 存放所有模型的路径
:param class_num: 类别总数
'''
self.model_name = model_name
self.fold = fold
self.save_path = save_path
self.model_path = model_path
self.class_num = class_num

self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 加载模型及其权重
self.model_path = os.path.join(self.save_path, self.model_name)
self.classify_model = ClassifyResNet(model_name)
if torch.cuda.is_available():
self.classify_model = torch.nn.DataParallel(self.classify_model)
Expand All @@ -48,21 +47,20 @@ def get_classify_results(self, images, thrshold=0.5):


class Get_Segment_Results():
def __init__(self, model_name, fold, save_path, class_num=4):
def __init__(self, model_name, fold, model_path, class_num=4):
''' 处理当前fold一个batch的数据分割结果
:param model_name: 当前的模型名称
:param fold: 当前的折数
:param save_path: 存放所有模型的路径
:param model_path: 存放所有模型的路径
:param class_num: 类别总数
'''
self.model_name = model_name
self.fold = fold
self.save_path = save_path
self.model_path = model_path
self.class_num = class_num

# 加载模型及其权重
self.model_path = os.path.join(self.save_path, self.model_name)
self.segment_model = Model(self.model_name).create_model()
self.segment_model_path = os.path.join(self.model_path, '%s_fold%d_best.pth' % (self.model_name, self.fold))
self.solver = Solver(self.segment_model)
Expand Down Expand Up @@ -115,21 +113,21 @@ def get_thresholds_minareas(self, json_path, fold):


class Classify_Segment_Fold():
def __init__(self, model_name, fold, save_path, class_num=4):
def __init__(self, model_name, fold, model_path, class_num=4):
''' 处理当前fold一个batch的分割结果和分类结果
:param model_name: 当前的模型名称
:param fold: 当前的折数
:param save_path: 存放所有模型的路径
:param model_path: 存放所有模型的路径
:param class_num: 类别总数
'''
self.model_name = model_name
self.fold = fold
self.save_path = save_path
self.model_path = model_path
self.class_num = class_num

self.classify_model = Get_Classify_Results(self.model_name, self.fold, self.save_path, self.class_num)
self.segment_model = Get_Segment_Results(self.model_name, self.fold, self.save_path, self.class_num)
self.classify_model = Get_Classify_Results(self.model_name, self.fold, self.model_path, self.class_num)
self.segment_model = Get_Segment_Results(self.model_name, self.fold, self.model_path, self.class_num)

def classify_segment(self, images):
''' 处理当前fold一个batch的分割结果和分类结果
Expand All @@ -149,17 +147,17 @@ def classify_segment(self, images):


class Classify_Segment_Folds():
def __init__(self, model_name, n_splits, save_path, dataloader, class_num=4):
def __init__(self, model_name, n_splits, model_path, dataloader, class_num=4):
''' 使用投票法处理所有fold一个batch的分割结果和分类结果
:param model_name: 当前的模型名称
:param n_splits: 总共有多少折,为list列表
:param save_path: 存放所有模型的路径
:param model_path: 存放所有模型的路径
:param class_num: 类别总数
'''
self.model_name = model_name
self.n_splits = n_splits
self.save_path = save_path
self.model_path = model_path
self.class_num = class_num
self.dataloader = dataloader

Expand All @@ -171,8 +169,8 @@ def get_classify_segment_models(self):
'''

for fold in self.n_splits:
self.classify_models.append(Get_Classify_Results(self.model_name, fold, self.save_path, self.class_num))
self.segment_models.append(Get_Segment_Results(self.model_name, fold, self.save_path, self.class_num))
self.classify_models.append(Get_Classify_Results(self.model_name, fold, self.model_path, self.class_num))
self.segment_models.append(Get_Segment_Results(self.model_name, fold, self.model_path, self.class_num))

def classify_segment_folds(self, images):
''' 使用投票法处理所有fold一个batch的分割结果和分类结果
Expand Down
49 changes: 21 additions & 28 deletions create_submission.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
kaggle = 0
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader

kaggle = 0
if kaggle:
os.system('pip install /kaggle/input/segmentation-models/pretrainedmodels-0.7.4/ > /dev/null')
os.system('pip install /kaggle/input/segmentation-models/segmentation_models.pytorch/ > /dev/null')
package_path = '../input/models' # add unet script dataset
package_path = 'kaggle/input/sources' # add unet script dataset
import sys
sys.path.append(package_path)
from model import Model
else:
from models.model import Model
import cv2
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from datasets.steel_dataset import TestDataset
from classify_segment import Classify_Segment_Folds, Classify_Segment_Fold

Expand All @@ -32,7 +28,7 @@ def mask2rle(img):
return ' '.join(str(x) for x in runs)


def create_submission(n_splits, model_name, batch_size, num_workers, mean, std, test_data_folder, sample_submission_path, save_path):
def create_submission(n_splits, model_name, batch_size, num_workers, mean, std, test_data_folder, sample_submission_path, model_path):
'''
:param n_splits: 折数,类型为list
Expand All @@ -43,7 +39,7 @@ def create_submission(n_splits, model_name, batch_size, num_workers, mean, std,
:param std: 方差
:param test_data_folder: 测试数据存放的路径
:param sample_submission_path: 提交样例csv存放的路径
:param save_path: 当前模型权重存放的根目录,注意下一级 model_name 目录存放的是当前模型权重
:param model_path: 当前模型权重存放的目录
:return: None
'''
# 加载数据集
Expand All @@ -56,9 +52,9 @@ def create_submission(n_splits, model_name, batch_size, num_workers, mean, std,
pin_memory=True
)
if len(n_splits) == 1:
classify_segment = Classify_Segment_Fold(model_name, n_splits[0], save_path).classify_segment
classify_segment = Classify_Segment_Fold(model_name, n_splits[0], model_path).classify_segment
else:
classify_segment = Classify_Segment_Folds(model_name, n_splits, save_path, testset).classify_segment_folds
classify_segment = Classify_Segment_Folds(model_name, n_splits, model_path, testset).classify_segment_folds

# start prediction
predictions = []
Expand All @@ -77,25 +73,22 @@ def create_submission(n_splits, model_name, batch_size, num_workers, mean, std,


if __name__ == "__main__":
if kaggle:
sample_submission_path = '../input/severstal-steel-defect-detection/sample_submission.csv'
test_data_folder = "../input/severstal-steel-defect-detection/test_images"
save_path = '../input/models'
else:
sample_submission_path = 'datasets/Steel_data/sample_submission.csv'
test_data_folder = 'datasets/Steel_data/test_images'
save_path = './checkpoints'

# 设置超参数
model_name = 'unet_resnet34'
# initialize test dataloader

num_workers = 12
batch_size = 8
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
n_splits = [1] # [0, 1, 2, 3, 4]

n_splits = [1, 2] # [0, 1, 2, 3, 4]
if kaggle:
sample_submission_path = 'kaggel/input/severstal-steel-defect-detection/sample_submission.csv'
test_data_folder = "kaggle/input/severstal-steel-defect-detection/test_images"
model_path = 'kaggle/input/checkpoints'
else:
sample_submission_path = 'datasets/Steel_data/sample_submission.csv'
test_data_folder = 'datasets/Steel_data/test_images'
model_path = './checkpoints/' + model_name

create_submission(n_splits, model_name, batch_size, num_workers, mean, std, test_data_folder,
sample_submission_path, save_path)
sample_submission_path, model_path)
5 changes: 1 addition & 4 deletions models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from torch.nn import Module
from torch import nn
import torch.nn.functional as F
from utils.loss import ClassifyLoss


class Model():
Expand Down Expand Up @@ -97,6 +96,4 @@ def forward(self, x):
x = torch.Tensor(8, 3, 256, 1600)
y = torch.ones(8, 4)
output = class_net(x)
criterion = ClassifyLoss()
loss = criterion(output, y)
print(loss)
print(output.size())
60 changes: 53 additions & 7 deletions uploads.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,64 @@
export http_proxy=http://localhost:8123
export https_proxy=http://localhost:8123

model_name="unet_resnet34"

# 建立文件夹,并copy py文件
if [ ! -d "kaggle/sources" ]; then
mkdir -p kaggle/sources
fi
if [ ! -d "kaggle/sources/models" ]; then
mkdir -p kaggle/sources/models
fi
if [ ! -d "kaggle/sources/datasets" ]; then
mkdir -p kaggle/sources/datasets
fi

if [ ! -d "kaggle/checkpoints" ]; then
mkdir -p kaggle/checkpoints
fi

rm kaggle/sources/*.py -f
rm kaggle/sources/*/*.py -f
rm checkpoints/$model_name/*_best.pth -f
rm checkpoints/$model_name/result.json -f

cp models/model.py kaggle/sources/models
cp datasets/steel_dataset.py kaggle/sources/datasets
cp solver.py kaggle/sources
cp classify_segment.py kaggle/sources

cp checkpoints/$model_name/*_best.pth kaggle/checkpoints
cp checkpoints/$model_name/result.json kaggle/checkpoints

if [ $1 -eq 0 ]; then
echo $1
echo "init uploads"
# 初始化元数据文件以创建数据集
kaggle datasets init -p kaggle/sources
# 更改默认的 json 文件,否则无法提交
sed -i 's/INSERT_TITLE_HERE/sources/g' kaggle/sources/dataset-metadata.json
sed -i 's/INSERT_SLUG_HERE/sources/g' kaggle/sources/dataset-metadata.json
# 创建一个新的数据集
kaggle datasets create -p kaggle/sources -r zip

# 初始化元数据文件以创建数据集
kaggle datasets init -p models
kaggle datasets init -p kaggle/checkpoints
# 更改默认的 json 文件,否则无法提交
sed -i 's/INSERT_TITLE_HERE/models/g' models/dataset-metadata.json
sed -i 's/INSERT_SLUG_HERE/models/g' models/dataset-metadata.json
sed -i 's/INSERT_TITLE_HERE/checkpoints/g' kaggle/checkpoints/dataset-metadata.json
sed -i 's/INSERT_SLUG_HERE/checkpoints/g' kaggle/checkpoints/dataset-metadata.json
# 创建一个新的数据集
kaggle datasets create -p models
kaggle datasets create -p kaggle/checkpoints
fi

if [ $1 -eq 1 ]; then
# 更新数据集
kaggle datasets version -p models -m "Updated data"
echo "只更新脚本文件"
# 更新脚本文件
kaggle datasets version -p kaggle/sources -m "Updated Python Files" -r zip
fi

if [ $1 -eq 2 ]; then
echo "更新脚本文件和权重文件"
# 更新数据集和脚本文件
kaggle datasets version -p kaggle/sources -m "Updated Python Files" -r zip
kaggle datasets version -p kaggle/checkpoints -m "Updated Checkpoints"
fi

0 comments on commit 0938044

Please sign in to comment.