diff --git a/.gitattributes b/.gitattributes
index 20739a9d8..7fd6aee34 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,51 @@
 data/** filter=lfs diff=lfs merge=lfs -text
+data filter=lfs diff=lfs merge=lfs -text
+data/test/inference/lookup_export/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+data/test/inference/fg_export_multi/variables/variables.index filter=lfs diff=lfs merge=lfs -text
+data/test/inference/tb_multitower_export/assets/pipeline.config filter=lfs diff=lfs merge=lfs -text
+data/test/latest_ckpt_test/model.ckpt-500.meta filter=lfs diff=lfs merge=lfs -text
+data/test/tb_data/taobao_test_data filter=lfs diff=lfs merge=lfs -text
+data/test/test.csv filter=lfs diff=lfs merge=lfs -text
+data/test/inference/tb_multitower_placeholder_rename_export/assets/pipeline.config filter=lfs diff=lfs merge=lfs -text
+data/test/inference/tb_multitower_export/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+data/test/tb_data_with_time/taobao_test_data_with_time filter=lfs diff=lfs merge=lfs -text
+data/test/latest_ckpt_test/model.ckpt-500.index filter=lfs diff=lfs merge=lfs -text
+data/test/lookup_data.csv filter=lfs diff=lfs merge=lfs -text
+data/test/criteo_sample.tfrecord filter=lfs diff=lfs merge=lfs -text
+data/test/rtp/taobao_valid.csv filter=lfs diff=lfs merge=lfs -text
+data/test/rtp/taobao_train_feature.txt filter=lfs diff=lfs merge=lfs -text
+data/test/tb_data/taobao_train_data filter=lfs diff=lfs merge=lfs -text
+data/test/inference/fg_export_single/variables/variables.index filter=lfs diff=lfs merge=lfs -text
+data/test/inference/lookup_data_test80.csv filter=lfs diff=lfs merge=lfs -text
+data/test/inference/tb_multitower_export/variables/variables.index filter=lfs diff=lfs merge=lfs -text
+data/test/export/data.csv filter=lfs diff=lfs merge=lfs -text
+data/test/embed_data.csv filter=lfs diff=lfs merge=lfs -text
+data/test/rtp/taobao_fg_pred.out filter=lfs diff=lfs merge=lfs -text
+data/test/dwd_avazu_ctr_deepmodel_10w.csv filter=lfs diff=lfs merge=lfs -text
+data/test/tb_data/taobao_train_data_kd filter=lfs diff=lfs merge=lfs -text
+data/test/inference/fg_export_single/saved_model.pb filter=lfs diff=lfs merge=lfs -text
+data/test/inference/lookup_export/variables/variables.index filter=lfs diff=lfs merge=lfs -text
+data/test/tag_kv_data.csv filter=lfs diff=lfs merge=lfs -text
+data/test/rtp/taobao_train_input.txt filter=lfs diff=lfs merge=lfs -text
+data/test/rtp/taobao_test_bucketize_feature.txt filter=lfs diff=lfs merge=lfs -text
+data/test/inference/tb_multitower_placeholder_rename_export/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+data/test/inference/fg_export_single/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+data/test/inference/fg_export_multi/assets/pipeline.config filter=lfs diff=lfs merge=lfs -text
+data/test/inference/fg_export_multi/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+data/test/inference/lookup_export/assets/pipeline.config filter=lfs diff=lfs merge=lfs -text
+data/test/inference/lookup_export/saved_model.pb filter=lfs diff=lfs merge=lfs -text
+data/test/inference/taobao_infer_data.txt filter=lfs diff=lfs merge=lfs -text
+data/test/inference/tb_multitower_placeholder_rename_export/saved_model.pb filter=lfs diff=lfs merge=lfs -text
+data/test/tb_data/taobao_test_data_kd filter=lfs diff=lfs merge=lfs -text
+data/test/hpo_test/eval_val/events.out.tfevents.1597889819.j63d04245.sqa.eu95 filter=lfs diff=lfs merge=lfs -text
+data/test/inference/tb_multitower_placeholder_rename_export/variables/variables.index filter=lfs diff=lfs merge=lfs -text
+data/test/inference/fg_export_single/assets/pipeline.config filter=lfs diff=lfs merge=lfs -text
+data/test/inference/fg_export_multi/saved_model.pb filter=lfs diff=lfs merge=lfs -text
+data/test/inference/tb_multitower_export/saved_model.pb filter=lfs diff=lfs merge=lfs -text
+data/test/rtp/taobao_test_input.txt filter=lfs diff=lfs merge=lfs -text
+data/test/rtp/taobao_test_feature.txt filter=lfs diff=lfs merge=lfs -text
+data/test/test_with_quote.csv filter=lfs diff=lfs merge=lfs -text
+data/test/tb_data_with_time/taobao_train_data_with_time filter=lfs diff=lfs merge=lfs -text
+data/test/latest_ckpt_test/model.ckpt-500.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+data/test/rtp/taobao_valid_feature.txt filter=lfs diff=lfs merge=lfs -text
+data/test/rtp/taobao_train_bucketize_feature.txt filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
index a01a1b1ba..a209f839b 100644
--- a/README.md
+++ b/README.md
@@ -12,12 +12,12 @@ EasyRec致力于成为容易上手的工业界深度学习推荐算法框架，
 
 ### Run everywhere
 
-- [MaxCompute](https://help.aliyun.com/product/27797.html) / [DataScience](https://help.aliyun.com/document_detail/170836.html) / [DLC](https://www.alibabacloud.com/help/zh/doc-detail/165137.htm?spm=a2c63.p38356.b99.79.4c0734a4bVav8D) / Local
+-  Local / [MaxCompute](https://help.aliyun.com/product/27797.html) / [DataScience](https://help.aliyun.com/document_detail/170836.html) / [DLC](https://www.alibabacloud.com/help/zh/doc-detail/165137.htm?spm=a2c63.p38356.b99.79.4c0734a4bVav8D)
 - TF1.12-1.15 / TF2.x / PAI-TF
 
 ### Diversified input data
 
-- MaxCompute Table
+- [MaxCompute Table](https://help.aliyun.com/document_detail/27819.html?spm=a2c4g.11186623.6.554.91d517bazK7nTF)
 - HDFS files
 - [OSS files](https://help.aliyun.com/product/31815.html?spm=5176.7933691.1309819.8.5bb52a66ZQOobj)
 - Kafka Streams
@@ -32,7 +32,7 @@ EasyRec致力于成为容易上手的工业界深度学习推荐算法框架，
 ### It is smart
 
 - EarlyStop / Best Checkpoint Saver
-- Hyper Parameter Search / AutoFeatureCross
+- [Hyper Parameter Search](docs/source/automl/hpo_pai.md) / [AutoFeatureCross](docs/source/automl/auto_cross_emr.md)
 - In development: NAS, Knowledge Distillation, MultiModal
 
 ### Large scale and easy deployment
@@ -44,10 +44,29 @@ EasyRec致力于成为容易上手的工业界深度学习推荐算法框架，
 
 ### A variety of models
 
-- DeepFM / MultiTower / Deep Interest Network / DSSM / MMoE / ESMM
+- [DeepFM](docs/source/models/deepfm.md) / [MultiTower](docs/source/models/multi_tower.md) / [Deep Interest Network](docs/source/models/din.md) / [DSSM](docs/source/models/dssm.md) / [MMoE](docs/source/models/mmoe.md) / [ESMM](docs/source/models/esmm.md)
 - More models in development
 
 ### Easy to customize
 
-- Easy to implement customized models
+- Easy to implement [customized models](docs/source/models/user_define.md)
 - Not need to care about data pipelines
+
+### Get Started
+
+- Download
+```
+    git clone https://github.com/AlibabaPAI/EasyRec.git
+    wget https://easyrec.oss-cn-beijing.aliyuncs.com/data/easyrec_data_20210818.tar.gz
+```
+
+- [EasyRec Framework](https://easyrec.oss-cn-beijing.aliyuncs.com/docs/EasyRec.pptx)
+
+- [Run](docs/source/quick_start/local_tutorial.md)
+
+- [PAI-DSW DEMO](https://dsw-dev.data.aliyun.com/#/?fileUrl=http://easyrec.oss-cn-beijing.aliyuncs.com/dsw/easy_rec_demo.ipynb&fileName=EasyRec_DeepFM.ipynb)
+(Rember to select Python 3 kernel)
+
+- [Develop](docs/source/develop.md)
+
+- [Doc](https://easyrec.readthedocs.io/en/latest/)
diff --git a/docs/images/faq/field_type2.png b/docs/images/faq/field_type2.png
new file mode 100644
index 000000000..6346f8869
Binary files /dev/null and b/docs/images/faq/field_type2.png differ
diff --git a/docs/images/models/autoint.png b/docs/images/models/autoint.png
new file mode 100644
index 000000000..5aae36cfa
Binary files /dev/null and b/docs/images/models/autoint.png differ
diff --git a/docs/images/models/dcn.png b/docs/images/models/dcn.png
new file mode 100644
index 000000000..38f561be2
Binary files /dev/null and b/docs/images/models/dcn.png differ
diff --git a/docs/images/models/dssm_neg_sampler.png b/docs/images/models/dssm_neg_sampler.png
new file mode 100644
index 000000000..a24775869
Binary files /dev/null and b/docs/images/models/dssm_neg_sampler.png differ
diff --git a/docs/images/models/fm.png b/docs/images/models/fm.png
new file mode 100644
index 000000000..447a626f8
Binary files /dev/null and b/docs/images/models/fm.png differ
diff --git a/docs/images/models/mind.png b/docs/images/models/mind.png
new file mode 100644
index 000000000..5a24feb31
Binary files /dev/null and b/docs/images/models/mind.png differ
diff --git a/docs/images/models/rocket_launching.png b/docs/images/models/rocket_launching.png
new file mode 100644
index 000000000..4d68aec28
Binary files /dev/null and b/docs/images/models/rocket_launching.png differ
diff --git a/docs/images/models/simple_multi_task.png b/docs/images/models/simple_multi_task.png
new file mode 100644
index 000000000..1e88d97d6
Binary files /dev/null and b/docs/images/models/simple_multi_task.png differ
diff --git a/docs/images/models/wide_and_deep.png b/docs/images/models/wide_and_deep.png
new file mode 100644
index 000000000..b599740b1
Binary files /dev/null and b/docs/images/models/wide_and_deep.png differ
diff --git a/docs/images/other/Role0.png b/docs/images/other/Role0.png
new file mode 100644
index 000000000..f7b5e0b71
Binary files /dev/null and b/docs/images/other/Role0.png differ
diff --git a/docs/images/other/Role1.png b/docs/images/other/Role1.png
new file mode 100644
index 000000000..98e88d0b0
Binary files /dev/null and b/docs/images/other/Role1.png differ
diff --git a/docs/images/other/Role2.png b/docs/images/other/Role2.png
new file mode 100644
index 000000000..22ee3056c
Binary files /dev/null and b/docs/images/other/Role2.png differ
diff --git a/docs/images/other/Role3.png b/docs/images/other/Role3.png
new file mode 100644
index 000000000..bd9ff15bc
Binary files /dev/null and b/docs/images/other/Role3.png differ
diff --git a/docs/images/other/log1.png b/docs/images/other/log1.png
new file mode 100644
index 000000000..f7b5e0b71
Binary files /dev/null and b/docs/images/other/log1.png differ
diff --git a/docs/images/other/log2.png b/docs/images/other/log2.png
new file mode 100644
index 000000000..2e5a24ed5
Binary files /dev/null and b/docs/images/other/log2.png differ
diff --git a/docs/images/other/log3.png b/docs/images/other/log3.png
new file mode 100644
index 000000000..5324b9326
Binary files /dev/null and b/docs/images/other/log3.png differ
diff --git a/docs/images/other/log4.png b/docs/images/other/log4.png
new file mode 100644
index 000000000..dafbceca5
Binary files /dev/null and b/docs/images/other/log4.png differ
diff --git a/docs/images/other/log5.png b/docs/images/other/log5.png
new file mode 100644
index 000000000..86484e0b3
Binary files /dev/null and b/docs/images/other/log5.png differ
diff --git a/docs/images/other/log6.png b/docs/images/other/log6.png
new file mode 100644
index 000000000..12fe8cc0b
Binary files /dev/null and b/docs/images/other/log6.png differ
diff --git a/docs/images/quick_start/image.png b/docs/images/quick_start/image.png
new file mode 100644
index 000000000..ae1f32968
Binary files /dev/null and b/docs/images/quick_start/image.png differ
diff --git a/docs/source/api/easy_rec.python.core.rst b/docs/source/api/easy_rec.python.core.rst
index 4ed7e6778..ff6568c11 100644
--- a/docs/source/api/easy_rec.python.core.rst
+++ b/docs/source/api/easy_rec.python.core.rst
@@ -8,3 +8,13 @@ easy\_rec.python.core.learning\_schedules
    :members:
    :undoc-members:
    :show-inheritance:
+
+.. automodule:: easy_rec.python.core.metrics
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: easy_rec.python.core.sampler
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/easy_rec.python.layers.rst b/docs/source/api/easy_rec.python.layers.rst
index b2cc0ef86..e859d0878 100644
--- a/docs/source/api/easy_rec.python.layers.rst
+++ b/docs/source/api/easy_rec.python.layers.rst
@@ -48,3 +48,19 @@ easy\_rec.python.layers.seq\_input\_layer
    :members:
    :undoc-members:
    :show-inheritance:
+
+easy\_rec.python.layers.multihead\_attention\_layer
+------------------------------------------------
+
+.. automodule:: easy_rec.python.layers.multihead_attention
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+easy\_rec.python.layers.mmoe
+------------------------------------------------
+
+.. automodule:: easy_rec.python.layers.mmoe
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api/easy_rec.python.model.rst b/docs/source/api/easy_rec.python.model.rst
index c819de57a..df553ebd5 100644
--- a/docs/source/api/easy_rec.python.model.rst
+++ b/docs/source/api/easy_rec.python.model.rst
@@ -25,6 +25,14 @@ easy\_rec.python.model.fm
    :undoc-members:
    :show-inheritance:
 
+easy\_rec.python.model.wide_and_deep
+--------------------------------
+
+.. automodule:: easy_rec.python.model.wide_and_deep
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 easy\_rec.python.model.deepfm
 ------------------------------------
 
@@ -41,6 +49,30 @@ easy\_rec.python.model.multi\_tower
    :undoc-members:
    :show-inheritance:
 
+easy\_rec.python.model.dcn
+------------------------------------------
+
+.. automodule:: easy_rec.python.model.dcn
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+easy\_rec.python.model.autoint
+------------------------------------------
+
+.. automodule:: easy_rec.python.model.autoint
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+easy\_rec.python.model.dbmtl
+------------------------------------------
+
+.. automodule:: easy_rec.python.model.dbmtl
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 easy\_rec.python.model.multi\_tower\_bst
 -----------------------------------------------
 
@@ -65,6 +97,14 @@ easy\_rec.python.model.dssm
    :undoc-members:
    :show-inheritance:
 
+easy\_rec.python.model.mind
+----------------------------------
+
+.. automodule:: easy_rec.python.model.mind
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 easy\_rec.python.model.multi\_task\_model
 ------------------------------------------------
 
diff --git a/docs/source/automl/auto_cross_emr.md b/docs/source/automl/auto_cross_emr.md
index 12f8f0937..d39e099b0 100644
--- a/docs/source/automl/auto_cross_emr.md
+++ b/docs/source/automl/auto_cross_emr.md
@@ -5,8 +5,8 @@
 输入一般是csv格式的文件。 如下所示，列之间用,分割
 
 - 示例数据（小数据集）:
-  - train: [ctr\_train.csv](https://yuguang-test.oss-cn-beijing.aliyuncs.com/fe/data/ctr_train.csv)
-  - test: [ctr\_test.csv](https://yuguang-test.oss-cn-beijing.aliyuncs.com/fe/data/ctr_test.csv)
+  - train: [ctr_train.csv](https://easyrec.oss-cn-beijing.aliyuncs.com/data/autocross/ctr_train.csv)
+  - test: [ctr_test.csv](https://easyrec.oss-cn-beijing.aliyuncs.com/data/autocross/ctr_test.csv)
   - 数据示例:
 
 ```
@@ -23,10 +23,8 @@ hadoop fs -put ctr_test.csv hdfs:///user/fe/data/
 
 ### AutoCross
 
-AutoCross使用请参考文档 [AutoCross EMR](https://yuque.antfin-inc.com/pai/automl/cicak6)。
-
-- AutoCross yaml配置文件：[ctr\_autocross.yaml](https://yuguang-test.oss-cn-beijing.aliyuncs.com/fe/configs/ctr_autocross.yaml)（[配置文件解析](https://yuque.antfin-inc.com/pai/automl/cicak6)）
-- alink环境配置文件，另存为a[link.env](https://yuguang-test.oss-cn-beijing.aliyuncs.com/fe/configs/alink.env)
+- AutoCross yaml配置文件：[ctr_autocross.yaml](https://easyrec.oss-cn-beijing.aliyuncs.com/data/autocross/ctr_autocross.yaml)
+- alink环境配置文件，另存为a[link.env](https://easyrec.oss-cn-beijing.aliyuncs.com/data/autocross/alink.env)
 
 ```bash
 userId=default
@@ -34,7 +32,6 @@ alinkServerEndpoint=http://localhost:9301
 hadoopHome=/usr/lib/hadoop-current
 hadoopUserName=hadoop
 token=ZSHTIeEkwrtZJJsN1ZZmCJJmr5jaj1wO
-
 ```
 
 - 使用 pai-automl-fe 提交任务
@@ -43,10 +40,10 @@ token=ZSHTIeEkwrtZJJsN1ZZmCJJmr5jaj1wO
 pai-automl-fe run -e configs/alink.env --config configs/ctr_autocross.yaml --mode emr
 ```
 
-### 对接easy\_rec
+### 对接EasyRec
 
-Easy\_rec使用请参考文档 [EMR Tutorial](https://yuque.antfin.com/pai/arch/zucdp3)。
-以下说明AutoCross后的数据对接easy\_rec的配置（[ctr\_deepmodel\_ac.config](https://yuguang-test.oss-cn-beijing.aliyuncs.com/fe/configs/ctr_deepmodel_ac.config)）
+EasyRec使用请参考文档 [EMR Train](../train.md)。
+以下说明AutoCross后的数据对接easy_rec的配置（[ctr_deepmodel_ac.config](https://easyrec.oss-cn-beijing.aliyuncs.com/data/autocross/ctr_deepmodel_ac.config)）
 
 #### 数据据相关
 
@@ -210,7 +207,6 @@ model_config:{
     feature_names: "cross_2"
     wide_deep:DEEP
   }
-
 ```
 
-使用el\_submit提交训练即可，请参照 [EMR Tutorial](https://yuque.antfin.com/pai/arch/zucdp3)。
+使用el_submit提交训练即可，请参照 [EMR Train](../train.md)。
diff --git a/docs/source/automl/hpo_emr.md b/docs/source/automl/hpo_emr.md
index 5e79d9f2c..889ecd4eb 100644
--- a/docs/source/automl/hpo_emr.md
+++ b/docs/source/automl/hpo_emr.md
@@ -5,7 +5,7 @@
 - 下载安装automl包
 
 ```bash
-wget http://easy-rec.oss-cn-hangzhou.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl
+wget http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl
 pip install pai_automl-0.0.1rc1-py3-none-any.whl
 ```
 
@@ -17,19 +17,19 @@ python -m easy_rec.python.hpo.emr_hpo --hyperparams hyperparams.json  --config_p
 
 ### 参数说明
 
-- \--config\_path  easy-rec训练配置文件
-- \--exp\_dir  调优实验目录
-- \--debug  保留本地临时目录
-- \--metric\_name &#160;调优的指标，默认是auc，其它可选指标[参考](https://yuque.antfin.com/pai/arch/moxgm5)
-- \--max\_parallel &#160; 同一时刻可以并行跑的实验数目，默认4
-- \--total\_trial\_num &#160;总共跑多少组实验，默认6
-- \--el\_submit\_params  el\_submit指定PS/Worker资源的一些参数，包括-t x -m x \[-pn x -pc x -pm x\] -wn x -wc x -wm x -wg x 默认值
+- --config_path  easyrec训练配置文件
+- --exp_dir  调优实验目录
+- --debug  保留本地临时目录
+- --metric_name  调优的指标，默认是auc，其它可选指标\[参考../eval.md)
+- --max_parallel   同一时刻可以并行跑的实验数目，默认4
+- --total_trial_num  总共跑多少组实验，默认6
+- --el_submit_params  el_submit指定PS/Worker资源的一些参数，包括-t x -m x \[-pn x -pc x -pm x\] -wn x -wc x -wm x -wg x 默认值
 
 ```bash
 -t standalone -m local -wn 1 -wc 6 -wm 20000 -wg 1
 ```
 
-- \--hyperparams 参数空间配置空间
+- --hyperparams 参数空间配置空间
 
 #### hyperparams设置
 
@@ -43,11 +43,11 @@ python -m easy_rec.python.hpo.emr_hpo --hyperparams hyperparams.json  --config_p
 ]
 ```
 
-- name:  easy\_rec pipeline\_config里面的参数名称，注意要用全路径
+- name:  easy_rec pipeline_config里面的参数名称，注意要用全路径
 
-  feature\_configs\[**input\_names\[0\]=field\_name1**\].embedding\_dim
+  feature_configs\[**input_names\[0\]=field_name1**\].embedding_dim
 
-  - 由于feature\_configs是一个数组，所以需要用到选择器，根据**属性值**选择部分特征:
+  - 由于feature_configs是一个数组，所以需要用到选择器，根据**属性值**选择部分特征:
 
 ![image.png](../../images/automl/pai_field.png)
 
@@ -66,9 +66,9 @@ python -m easy_rec.python.hpo.emr_hpo --hyperparams hyperparams.json  --config_p
 
 - 关联参数设置
 
-有些参数的值是关联的，比如对于deepfm算法，所有的embedding\_dim必须是一样的
+有些参数的值是关联的，比如对于deepfm算法，所有的embedding_dim必须是一样的
 
-- name里面可以指定多个要调整的参数名称，用";"分割feature\_configs\[input\_names\[0\]=field1\].embedding\_dim;feature\_configs\[input\_names\[0\]=field20\].embedding\_dim
+- name里面可以指定多个要调整的参数名称，用";"分割feature_configs\[input_names\[0\]=field1\].embedding_dim;feature_configs\[input_names\[0\]=field20\].embedding_dim
 - 如果name里面包含了多个参数名称，那么candidates也需要有多个参数值，用";"分割如"32;32"
 - candidates: 候选值
 - type: 候选值类型, 支持Categorical, Integer, Real
@@ -99,13 +99,13 @@ python -m easy_rec.python.hpo.emr_hpo --hyperparams hyperparams.json  --config_p
 - LOG信息
 
 ![image.png](../../images/automl/emr_log.png)
-一共做了5组实验，可以看到embedding\_dim越小越好。
+一共做了5组实验，可以看到embedding_dim越小越好。
 
-- 实验目录信息(exp\_dir):  hdfs:///user/easy\_rec\_test/experiment/hpo\_test\_v8
+- 实验目录信息(exp_dir):  hdfs:///user/easy_rec_test/experiment/hpo_test_v8
 
 ![image.png](../../images/automl/emr_exp.png)
 
-- 如果设置了--debug，那么将会保留本地临时目录: /tmp/emr\_easy\_rec\_hpo\_1600519258
+- 如果设置了--debug，那么将会保留本地临时目录: /tmp/emr_easy_rec_hpo_1600519258
 
   rewrite\_\[0-4\].json定义了每组实验的参数
   ![image.png](../../images/automl/emr_json.png)
diff --git a/docs/source/automl/hpo_pai.md b/docs/source/automl/hpo_pai.md
index 99f453fb7..9969cfc97 100644
--- a/docs/source/automl/hpo_pai.md
+++ b/docs/source/automl/hpo_pai.md
@@ -5,7 +5,7 @@
 #### 下载安装automl包
 
 ```bash
-wget http://easy-rec.oss-cn-hangzhou.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl
+wget http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl
 pip install pai_automl-0.0.1rc1-py3-none-any.whl
 ```
 
@@ -17,7 +17,7 @@ python -m easy_rec.python.hpo.pai_hpo --odps_config ~/.odps_config.ini --oss_con
 
 ### 参数说明
 
-- \--odps\_config: odps账号信息文件
+- --odps_config: odps账号信息文件
 
 ```
 project_name=easy_rec_test
@@ -33,33 +33,39 @@ https_check=true
 #决定是否开启 HTTPS 访问
 ```
 
-- \--oss\_config : oss配置文件
+- --oss_config : oss配置文件
 
 ```json
 [Credentials]
 language=ch
-endpoint = oss-cn-shanghai.aliyuncs.com
+endpoint = oss-cn-beijing.aliyuncs.com
 accessKeyID = xxx
 accessKeySecret= xxx
 ```
 
-- \--bucket   oss\_bucket
+- --bucket   oss_bucket
 
-- \--role\_arn   acs:ram::xxx:role/xxx
+- --role_arn   acs:ram::xxx:role/xxx
 
-  pai tensorflow 任务访问oss数据的钥匙，获取方式[见附件](https://yuque.antfin.com/pai/arch/icv7x2)。
+  pai tensorflow 任务访问oss数据的钥匙，[获取方式](https://help.aliyun.com/document_detail/190477.html?spm=h2-url-1)。
 
-- \--tables 输入输出表
+- --train_tables 训练表
 
-- \--exp\_dir 调优目录
+- --eval_tables 评估表
 
-- \--config\_path  easy-rec训练配置文件
+- --exp_dir 调优目录, oss上的目录
 
-- \--metric\_name  调优的指标，默认是auc，其它可选指标[参考](https://yuque.antfin.com/pai/arch/moxgm5)
+- --config_path  easyrec训练配置文件
 
-- \--max\_parallel   同一时刻可以并行跑的实验数目
+- --metric_name  调优的指标，默认是auc，其它可选指标[参考](../eval.md)
 
-- \--total\_trial\_num  总共跑多少组实验
+- --max_parallel   同一时刻可以并行跑的实验数目
+
+- --total_trial_num  总共跑多少组实验
+
+- --is_outer 内部pai还是外部pai
+
+- --selected_cols 用于训练和评估的列, 参考
 
 #### hyperparams设置
 
@@ -75,19 +81,17 @@ accessKeySecret= xxx
 ]
 ```
 
-- name:  easy\_rec pipeline\_config里面的参数名称，注意要用全路径
-
+- name:  easy_rec pipeline_config里面的参数名称，注意要用全路径
   ```
-      feature_configs[**input_names[0]=field1**].embedding_dim
-
-  - 由于feature_configs是一个数组，所以需要用到选择器，根据**属性值**选择部分特征:
+      feature_configs[input_names[0]=field1].embedding_dim
   ```
+  - 由于feature_configs是一个数组，所以需要用到选择器，根据**属性值**选择部分特征:
 
 ![image.png](../../images/automl/pai_field.png)
 
 ```
      - input_names[0]=field_name1是选择器
-     - 也支持数字作为选择器, 如: feature_configs[**0**], feature_configs[**1**]
+     - 也支持数字作为选择器, 如: feature_configs[0], feature_configs[1]
      - 支持使用:选择所有的特征，如:
         - feature_configs[:]选择全部特征
         - feature_configs[5:]选择index从5开始的特征
@@ -99,9 +103,9 @@ accessKeySecret= xxx
 
 - 关联参数设置
 
-有些参数的值是关联的，比如对于deepfm算法，所有的embedding\_dim必须是一样的
+有些参数的值是关联的，比如对于deepfm算法，所有的embedding_dim必须是一样的
 
-- name里面可以指定多个要调整的参数名称，用";"分割feature\_configs\[input\_names\[0\]=field1\].embedding\_dim;feature\_configs\[input\_names\[0\]=field20\].embedding\_dim
+- name里面可以指定多个要调整的参数名称，用";"分割feature_configs\[input_names\[0\]=field1\].embedding_dim;feature_configs\[input_names\[0\]=field20\].embedding_dim
 - 如果name里面包含了多个参数名称，那么candidates也需要有多个参数值，用";"分割如"32;32"
 - candidates: 候选值
 - type: 候选值类型, 支持Categorical, Integer, Real
@@ -132,10 +136,10 @@ accessKeySecret= xxx
 - LOG信息
 
 ![image.png](../../images/automl/pai_log.png)
-一共做了5组实验，可以看出embedding\_dim=80的这一组实验效果最好。
+一共做了5组实验，可以看出embedding_dim=80的这一组实验效果最好。
 
-- 实验目录信息(exp\_dir): oss://xxx/easy\_rec/experiment/model\_hpo
-  - easy\_rec\_hpo\_\[0-4\]: 每组实验的模型目录
+- 实验目录信息(exp_dir): oss://xxx/easy_rec/experiment/model_hpo
+  - easy_rec_hpo\_\[0-4\]: 每组实验的模型目录
   - \*.json包含了每组实验的参数信息
 
 ```json
diff --git a/docs/source/develop.md b/docs/source/develop.md
index e66e68c74..39a76dc85 100644
--- a/docs/source/develop.md
+++ b/docs/source/develop.md
@@ -82,8 +82,4 @@ build pip package
 python setup.py sdist bdist_wheel
 ```
 
-### Deploy
-
-```bash
-sh pai_jobs/deploy_ext.sh
-```
+### [Deploy](./release.md)
diff --git a/docs/source/emr_tensorboard.md b/docs/source/emr_tensorboard.md
new file mode 100644
index 000000000..8bb0b1040
--- /dev/null
+++ b/docs/source/emr_tensorboard.md
@@ -0,0 +1,20 @@
+# 在Header上启动tensorboard
+
+```bash
+ssh root@39.104.103.119 # login to header
+source $HADOOP_HOME/libexec/hadoop-config.sh
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JAVA_HOME/jre/lib/amd64/server
+CLASSPATH=$($HADOOP_HDFS_HOME/bin/hadoop classpath --glob) tensorboard --logdir=hdfs:///user/experiments/mnist_train_v2 --port 6006
+```
+
+# 通过SSH隧道方式访问Header
+
+- 详细见 [通过SSH隧道方式访问开源组件Web UI](https://help.aliyun.com/document_detail/169151.html?spm=a2c4g.11186623.6.598.658d727beowT5O)
+
+```bash
+# 在mac上执行
+ssh -N -D 8157 root@39.104.103.119
+/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome  --proxy-server="socks5://localhost:8157" --host-resolver-rules="MAP * 0.0.0.0 , EXCLUDE localhost" --user-data-dir=/tmp/
+```
+
+在浏览器中输入: [http://emr-header-1:6006/](http://emr-header-1:6006/)
diff --git a/docs/source/emr_yarn_log.md b/docs/source/emr_yarn_log.md
new file mode 100644
index 000000000..ba8767615
--- /dev/null
+++ b/docs/source/emr_yarn_log.md
@@ -0,0 +1,44 @@
+## 任务运行时
+
+#### 1. 登录阿里云，访问E-MapReduce控制台
+
+![image.png](../images/other/log1.png)
+
+#### 2. 访问YARN UI
+
+![image.png](../images/other/log2.png)
+![image.png](../images/other/log3.png)
+![image.png](../images/other/log4.png)
+![image.png](../images/other/log5.png)
+
+## 任务结束以后
+
+在header上执行
+
+```bash
+# applicationId可以在执行el_submit的日志中查看
+# 仅在任务结束了以后可以用这个方式看日志
+yarn logs -applicationId=application_1596459377740_0015
+```
+
+可以查看log
+
+## 打洞查看
+
+也可以打洞进行查看, 详细见 [通过SSH隧道方式访问开源组件Web UI](https://help.aliyun.com/document_detail/169151.html)
+
+```bash
+# 在mac上执行
+ssh -N -D 8157 root@39.104.103.119
+/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome  --proxy-server="socks5://localhost:8157" --host-resolver-rules="MAP * 0.0.0.0 , EXCLUDE localhost" --user-data-dir=/tmp/
+```
+
+在浏览器中输入: http://emr-header-1:8088
+
+### 任务未结束
+
+界面和[不打洞方式](#y09Xq)相同
+
+### 任务结束以后
+
+![image.png](../images/other/log5.png)
diff --git a/docs/source/eval.md b/docs/source/eval.md
new file mode 100644
index 000000000..a2a96e7b9
--- /dev/null
+++ b/docs/source/eval.md
@@ -0,0 +1,76 @@
+# 评估
+
+### eval_config
+
+```sql
+eval_config {
+  metrics_set: {
+    # metric为auc
+    auc {}
+  }
+}
+```
+
+- metrics_set: 配置评估指标，可以配置多个，如:
+
+```sql
+eval_config {
+  metrics_set: {
+    # metric为auc
+    auc {}
+    # metric为mae
+    mean_absolute_error {}
+  }
+}
+```
+
+- num_examples: 默认为0, 表示评估所有样本；大于0，则每次只评估num_examples样本，一般在调试或者示例的时候使用
+
+### Metric:
+
+| MetricClass       | Example                | 适用模型                                        |
+| ----------------- | ---------------------- | ------------------------------------------- |
+| Accuracy          | accuracy {}            | 多分类模型LossType=CLASSIFICATION, num_class > 1 |
+| MeanAbsoluteError | mean_absolute_error {} | 回归模型LossType=L2_LOSS                        |
+| RecallAtTopK      | recall_at_topk {}      | 多分类模型LossType=CLASSIFICATION, num_class > 1 |
+| Max_F1            | max_f1 {}              | 分类模型LossType=CLASSIFICATION                 |
+| MeanSquaredError  | mean_squared_error{}   | 回归模型LossType=L2_LOSS                        |
+| AUC               | auc{}                  | 二分类模型LossType=CLASSIFICATION, num_class = 1 |
+| GAUC              | gauc{}                 | 二分类模型LossType=CLASSIFICATION, num_class = 1 |
+| SessionAUC        | session_auc{}          | 二分类模型LossType=CLASSIFICATION, num_class = 1 |
+
+### 评估命令
+
+#### Local
+
+```bash
+python -m easy_rec.python.eval --pipeline_config_path dwd_avazu_ctr_deepmodel.config
+```
+
+- --pipeline_config_path: config文件路径
+- --model_dir: 如果指定了model_dir将会覆盖config里面的model_dir，一般在周期性调度的时候使用
+
+#### PAI
+
+```sql
+pai -name easy_rec_ext -project algo_public
+-Dcmd=evaluate
+-Dconfig=oss://easyrec/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config
+-Dtables=odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_test
+-Dcluster='{"worker" : {"count":1, "cpu":1000, "gpu":100, "memory":40000}}'
+-Dmodel_dir=oss://easyrec/ckpt/MultiTower
+-Darn=acs:ram::xxx:role/xxx
+-Dbuckets=oss://easyrec/
+-DossHost=oss-cn-beijing-internal.aliyuncs.com；
+```
+
+- -Dcmd: evaluate 模型评估
+- -Dconfig: 同训练
+- -Dtables: 只需要指定测试 tables
+- -Dcluster: 评估不需要PS节点，指定一个worker节点即可
+- -Dmodel_dir: 如果指定了model_dir将会覆盖config里面的model_dir，一般在周期性调度的时候使用
+- -Dcheckpoint_path: 使用指定的checkpoint_path，如oss://easyrec/ckpt/MultiTower/model.ckpt-1000。不指定的话，默认model_dir中最新的ckpt文件。
+- 如果是pai内部版,则不需要指定arn和ossHost, arn和ossHost放在-Dbuckets里面
+  - -Dbuckets=oss://easyrec/?role_arn=acs:ram::xxx:role/ev-ext-test-oss&host=oss-cn-beijing-internal.aliyuncs.com
+
+评估的结果会写到model_dir目录下的文件"eval_result.txt"中。
diff --git a/docs/source/export.md b/docs/source/export.md
new file mode 100644
index 000000000..e8bd1c48f
--- /dev/null
+++ b/docs/source/export.md
@@ -0,0 +1,71 @@
+# 导出
+
+### export_config
+
+```protobuf
+export_config {
+}
+```
+
+- batch_size: 导出模型的batch_size，默认是-1，即可以接收任意batch_size
+- exporter_type: 导出类型,  best | final | latest | none，默认final
+  - best 导出最好的模型
+  - final 训练结束后导出
+  - latest 导出最新的模型
+  - none 不导出
+- dump_embedding_shape: 打印出embedding的shape，方便在EAS上部署分片大模型
+- best_exporter_metric: 当exporter_type为best的时候，确定最优导出模型的metric，注意该metric要在eval_config的metrics_set设置了才行
+- metric_bigger: 确定最优导出模型的metric是越大越好，还是越小越好，默认是越大越好
+- exports_to_keep: 当exporter_type为best或lastest时，保留n个最好或最新的模型，默认为1
+  ```protobuf
+  export_config {
+    exporter_type: "best"
+    best_exporter_metric: "auc"
+    exports_to_keep: 1
+  }
+  ```
+- multi_placeholder: 使用一个placeholder还是多个placeholder。默认为true，即对每个特征使用单个placeholder
+- multi_value_fields: 针对tagFeature，指定一个字段集合，使得导出的placeholder可以接收二维数组，而不是训练时用的字符串类型，这样可以节省字符串拆分和类型转换的时间。
+  ```protobuf
+  export_config {
+    multi_value_fields {
+       input_name: ["field1", "field2", "field3"]
+    }
+  }
+  ```
+- placeholder_named_by_input: True时利用data_config.input_fields.input_name来命令每个placeholder，False时每个placeholder名字为"input_X"，"X"为data_config.input_fields的顺序。默认为False
+
+### 导出命令
+
+#### Local
+
+```bash
+python -m easy_rec.python.export --pipeline_config_path dwd_avazu_ctr_deepmodel.config --export_dir ./export
+```
+
+- --pipeline_config_path: config文件路径
+- --model_dir: 如果指定了model_dir将会覆盖config里面的model_dir，一般在周期性调度的时候使用
+- --export_dir: 导出的目录
+
+#### PAI
+
+```sql
+pai -name easy_rec_ext -project algo_public
+-Dconfig=oss://easyrec/easy_rec_test/dwd_avazu_ctr_deepmodel_ext.config
+-Dcmd=export
+-Dexport_dir=oss://easyrec/easy_rec_test/export
+-Dcluster='{"worker" : {"count":1, "cpu":1000, "memory":40000}}'
+-Darn=acs:ram::xxx:role/ev-ext-test-oss
+-Dbuckets=oss://easyrec/
+-DossHost=oss-cn-beijing-internal.aliyuncs.com
+```
+
+- -Dconfig: 同训练
+- -Dcmd: export 模型导出
+- -Dexport_dir: 导出的目录
+- -Dcheckpoint_path: 使用指定的checkpoint_path
+- -Darn: rolearn  注意这个的arn要替换成客户自己的。可以从dataworks的设置中查看arn。
+- -DossHost: ossHost地址
+- -Dbuckets: config所在的bucket和保存模型的bucket; 如果有多个bucket，逗号分割
+- 如果是pai内部版,则不需要指定arn和ossHost, arn和ossHost放在-Dbuckets里面
+  - -Dbuckets=oss://easyrec/?role_arn=acs:ram::xxx:role/ev-ext-test-oss&host=oss-cn-beijing-internal.aliyuncs.com
diff --git a/docs/source/faq.md b/docs/source/faq.md
index b20c80a8f..832e87df4 100644
--- a/docs/source/faq.md
+++ b/docs/source/faq.md
@@ -23,7 +23,7 @@ Traceback (most recent call last):
 AssertionError: sep[b','] maybe invalid: field_num=7, required_num=131
 ```
 
-数据的列数和data\_configs里面的input\_fields数目不一致
+数据的列数和data_configs里面的input_fields数目不一致
 
 ```bash
 #查看数据列数
@@ -32,9 +32,9 @@ head -5 train.csv | awk -v FS=',' '{ print NF }'
 grep input_fields easy_rec.config | wc -l
 ```
 
-#### 多余的feature\_configs
+#### 多余的feature_configs
 
-如下所示, device\_make在feature\_groups里面没有出现，所以报下面的错误:
+如下所示, device_make在feature_groups里面没有出现，所以报下面的错误:
 
 ```bash
   File "/usr/lib/python3.7/site-packages/easy_rec/python/feature_column/feature_column.py", line 46, in __init__
@@ -52,19 +52,29 @@ KeyError: 'device_make'
 InvalidArgumentError (see above for traceback): Column size of the record to be saved: '588' does not match the default record column size: '17'.
 ```
 
-config中每个input\_fields需要与数据表一一对齐。上图错误为数据表588列数据，input\_fields只配置了17列。如有大量字段在训练中用不到，建议把使用的列从全量表里select出来，形成单独的表，也较少了io消耗，提高训练速度。
-其它未知的string\_to\_number错误，或者field number不一致，或者TagFeature的Id数和Weight数不一致，都有可能是odps table column和config的不一致导致的。
+config中每个input_fields需要与数据表一一对齐。上图错误为数据表588列数据，input_fields只配置了17列。如有大量字段在训练中用不到，建议把使用的列从全量表里select出来，形成单独的表，也较少了io消耗，提高训练速度。
+其它未知的string_to_number错误，或者field number不一致，或者TagFeature的Id数和Weight数不一致，都有可能是odps table column和config的不一致导致的。
 
 #### PAI上数据类型配置错误
 
+**1**
 ```
 [2020-08-10 11:37:14.903966] [FATAL] [70#292] [tensorflow/core/framework/tensor.cc:626] Check failed: dtype() == expected_dtype (1 vs. 2) double expected, got float
 xargs: ../python_bin: terminated by signal 6
 ```
 
-比如实际是bigint类型，data\_config里面的input\_type写成了FLOAT类型。
+比如实际是bigint类型，data_config里面的input_type写成了FLOAT类型。
 建议: describe table; 看一下字段类型。
 ![image.png](../images/faq/field_type.png)
+**2**
+```
+tensorflow.python.framework.errors_impl.InternalError: Unable to get element as bytes.
+terminate called after throwing an instance of 'apsara::odps::algo::BaseException'
+what():  build/release64/algo/data_io/table_writer/cluster/sql_record_writer.cpp(103): BaseException: |Commit to master failed
+```
+场景:在执行predict命令的时候报错如上
+问题：模型导出的配置文件的某数据类型和预测表的数据类型不一致
+![image2.png](../images/faq/field_type2.png)
 
 #### 模型导出错误
 
@@ -81,7 +91,7 @@ pai -name easy_rec_ext
   -Dcluster='{"worker" : {"count":1, "cpu":1000, "memory":40000}}'
   -Darn=acs:ram::1730760139076263:role/aliyunodpspaidefaultrole
   -Dbuckets=oss://yanzhen1/
-  -DossHost=oss-cn-shanghai-internal.aliyuncs.com;
+  -DossHost=oss-cn-beijing-internal.aliyuncs.com;
  错误
  Traceback (most recent call last):
   File "run.py", line 252, in <module>
@@ -102,7 +112,7 @@ pai -name easy_rec_ext
     compat.as_bytes(oldname), compat.as_bytes(newname), overwrite, status)
   File "/usr/lib/python2.7/site-packages/tensorflow/python/framework/errors_impl.py", line 528, in __exit__
     c_api.TF_GetCode(self.status.status))
-tensorflow.python.framework.errors_impl.UnavailableError: req_id: 5F225E6AD0E798313135AFAF, http status code: 400, error code: InvalidRequest, message: It is forbidden to copy appendable object in versioning state, oss host:oss-cn-shanghai-internal.aliyuncs.com, path:/yanzhen1/easy_rec_test/export_tmp/temp-1596087897/assets/pipeline.config.
+tensorflow.python.framework.errors_impl.UnavailableError: req_id: 5F225E6AD0E798313135AFAF, http status code: 400, error code: InvalidRequest, message: It is forbidden to copy appendable object in versioning state, oss host:oss-cn-beijing-internal.aliyuncs.com, path:/yanzhen1/easy_rec_test/export_tmp/temp-1596087897/assets/pipeline.config.
 ```
 
 #### 提交任务后卡死，无法查看log
@@ -217,7 +227,7 @@ Failed to execute system command. (exit code: 123.)
 可能是模型路径写错了，如下忘记写冒号了: oss://
 
 ```protobuf
-oss//easy-rec/test/din/model/model.ckpt.
+oss//easyrec/test/din/model/model.ckpt.
 ```
 
 #### 其它错误:
diff --git a/docs/source/feature/data.md b/docs/source/feature/data.md
index ae9b8cdf3..b282359ed 100644
--- a/docs/source/feature/data.md
+++ b/docs/source/feature/data.md
@@ -4,11 +4,11 @@ EasyRec作为阿里云PAI的推荐算法包，可以无缝对接MaxCompute的数
 
 为了识别这些输入数据中的字段信息，需要设置相应的字段名称和字段类型、设置默认值，帮助EasyRec去读取相应的数据。设置label字段，作为训练的目标。为了适应多目标模型，label字段可以设置多个。
 
-另外还有一些参数如prefetch\_size，是tensorflow中读取数据需要设置的参数。
+另外还有一些参数如prefetch_size，是tensorflow中读取数据需要设置的参数。
 
 ## 一个最简单的data config的配置
 
-这个配置里面，只有三个字段，用户ID（uid）、物品ID（item\_id）、label字段（click）。
+这个配置里面，只有三个字段，用户ID（uid）、物品ID（item_id）、label字段（click）。
 
 OdpsInputV2表示读取MaxCompute的表作为输入数据。
 
@@ -35,16 +35,16 @@ data_config {
 
 ```
 
-## input\_fields:
+## input_fields:
 
-针对csv文件中的某一列或者odps table中的每一个字段，在data\_config都需有一个input\_fields与之对应，顺序也务必是一致的。
-input\_fields字段:
+input_fields字段:
 
-- input\_name，方便在后续的feature\_configs中和data\_config.label\_fields中引用;
-- input\_type，默认是STRING，可以不设置。可选的字段参考[DatasetConfig.FieldType](../proto.html)
-- default\_val，默认是空，**注意默认值都是设置成字符串**
-  - 如果input是INT类型，并且默认值是6，那么default\_val是"6";
-  - 如果input是FLOAT类型，并且默认值是0.5，那么default\_val是"0.5";
+- input_name，方便在后续的feature_configs中和data_config.label_fields中引用;
+- input_type，默认是STRING，可以不设置。可选的字段参考[DatasetConfig.FieldType](../proto.html)
+- default_val，默认是空，**注意默认值都是设置成字符串**
+  - 如果input是INT类型，并且默认值是6，那么default_val是"6";
+  - 如果input是FLOAT类型，并且默认值是0.5，那么default_val是"0.5";
+- input_dim, 目前仅适用于RawFeature类型，可以指定多维数据，如一个图片的embedding vector.
 
 ```protobuf
   input_fields: {
@@ -54,25 +54,30 @@ input\_fields字段:
   }
 ```
 
-- \*\*注意: \*\*
-  - input\_fields的顺序和odps table里面字段的顺序必须是一一对应的:
-  - input\_fields和csv文件里面字段的顺序必须是一一对应的(csv文件没有header)
-  - input\_fields里面input\_type必须和odps table/csv文件对应列的类型一致，或者是可以转换的类型，如:
+- **注意:**
+  - input_fields的顺序和odps table里面字段的顺序不需要保证一一对应的
+  - input_fields和csv文件里面字段的顺序必须是一一对应的(csv文件没有header)
+  - input_fields里面input_type必须和odps table/csv文件对应列的类型一致，或者是可以转换的类型，如:
     - odps table里面string类型的"64"可以转成int类型的64
     - odps table里面string类型的"abc"不能转成int类型
 
-### input\_type:
+### input_type:
 
-- 默认值是CSVInput，表示数据格式是CSV，注意要配合separator使用
-- 如果在Odps上，应使用OdpsInputV2
+目前支持一下几种input_type：
 
-#### separator:
+- CSVInput，表示数据格式是CSV，注意要配合separator使用
+- OdpsInputV2，如果在Odps上运行EasyRec，则应使用OdpsInputV2
+- 如果需要使用RTP FG, 那么：
+  - 在EMR或者本地运行EasyRec，应使用RTPInput；
+  - 在Odps上运行，则应使用OdpsRTPInput
+
+### separator:
 
 - 使用csv格式的输入需要指定separator作为列之间的分隔符
 - 默认是半角逗号","
 - 可使用不可见字符作为分隔符（二进制分隔符），如'\\001', '\\002'等
 
-#### label\_fields
+### label_fields
 
 - label相关的列名，至少设置一个，可以根据算法需要设置多个，如多目标算法
 
@@ -81,9 +86,9 @@ input\_fields字段:
     label_fields: "buy"
   ```
 
-- 列名必须在data\_config中出现过
+- 列名必须在data_config中出现过
 
-### prefetch\_size
+### prefetch_size
 
 - data prefetch，以batch为单位，默认是32
 - 设置prefetch size可以提高数据加载的速度，防止数据瓶颈
@@ -93,7 +98,7 @@ input\_fields字段:
 - 默认值是true，不做shuffle则设置为false
 - 设置shuffle，可以对训练数据进行shuffle，获得更好的效果
 
-### shuffle\_buffer\_size
+### shuffle_buffer_size
 
 - 默认值32
 - shuffle queue的大小，代表每次shuffle的数据量
diff --git a/docs/source/feature/excel_config.md b/docs/source/feature/excel_config.md
new file mode 100644
index 000000000..00b9e1ddb
--- /dev/null
+++ b/docs/source/feature/excel_config.md
@@ -0,0 +1,159 @@
+# Excel特征配置
+
+由于特征配置部分比较长，从头构建配置文件比较麻烦，我们提供从excel生成训练配置文件的方式。
+
+### 命令
+
+- excel模板:
+  - [multi_tower](https://easyrec.oss-cn-beijing.aliyuncs.com/data/multi_tower_template.xls)
+  - [deep_fm](https://easyrec.oss-cn-beijing.aliyuncs.com/data/deepfm_template.xls)
+  - wide_and_deep同deepfm
+
+```bash
+python easy_rec.python.tools.create_config_from_excel --model_type multi_tower --excel_path multi_tower_template.xls --output_path multi_tower.config
+```
+
+- model_type
+
+**NOTE**: --model_type必须要和excel模板匹配
+
+- excel配置文件
+
+--excel_path
+
+- 输出config文件
+
+--output_path
+
+- 输入输出文件
+
+  --train_input_path TRAIN_INPUT_PATH    --eval_input_path EVAL_INPUT_PATH
+
+- 默认数据文件(csv)列(column)分割符号是, 列(column)内部里面字符分割符号是|
+
+  可以自定义分隔符:
+  --column_separator $'|'  --incol_separator   $','
+
+- 训练数据路径
+  --train_input_path
+
+- 评估数据路径
+  --eval_input_path
+
+- 模型目录
+  --model_dir
+
+### Excel配置说明
+
+包含**features**, **global, group, types, basic_types** 5 个**sheet**
+
+#### features:
+
+特征配置
+
+| **name** | **data_type** | **type** | **user_item_other** | **global** | **hash_bucket_size** | **embedding_dim** | **default_value** | **weights** | **boundaries** | **query** |
+| -------- | ------------- | -------- | ------------------- | ---------- | -------------------- | ----------------- | ----------------- | ----------- | -------------- | --------- |
+| label    | double        | label    | label               |            |                      |                   |                   |             |                |           |
+| uid      | string        | category | user                | uid        |                      |                   |                   |             |                |           |
+| own_room | bigint        | dense    | user                |            |                      |                   |                   |             | 10,20,30       |           |
+| cate_idx | string        | tags     | user                | cate       |                      |                   |                   | cate_wgt    |                |           |
+| cate_wgt | string        | weights  | user                |            |                      |                   |                   |             |                |           |
+| **...**  |               |          |                     |            |                      |                   |                   |             |                |           |
+
+- name: 输入列名
+- data_type: 输入的数据类型，包含double, string, bigint, 应用[basic_types](#6QphS)
+- type: 特征类型(引用[types](#78xRB) sheet types列)
+  - label: 要预测的列
+  - category: 离散值特征
+  - dense: 连续值特征
+  - tags: 标签型特征
+    - 关键词默认使用|分割，如使用其他分割符, 可以通过--incol_separator指定
+  - weights: tags对应的weight
+  - indexes: 一串数字，使用incol_separator分割, 如: 1|2|4|5
+  - notneed: 不需要的，可以忽略的
+- group(引用group sheet列)
+  - multi_tower
+    - user: user tower
+    - item: item tower
+    - user_item: user_item tower
+    - label: label tower
+  - deepfm
+    - wide: 特征仅用在wide部分
+    - deep: 特征仅用在deep和fm部分
+    - wide_and_deep: 特征用在wide, deep, fm部分，默认选wide_and_deep
+- global(引用[global](#ap1R1)里面的name列)
+
+global相同的列share embedding
+
+- hash_bucket_size: hash_bucket桶的大小
+- embedding_dim: embedding的大小
+  - **NOTE**: 对于deepfm，所有特征的embedding_dim必须是一样大的
+- default_value: 缺失值填充
+- weights: 如果type是tags，则可以指定weights, weights和tags必须要有相同的列
+- boundaries: 连续值离散化的区间，如: 10,20,30，将会离散成区间(-inf, 10), \[10, 20), \[20, 30), \[30, inf)
+  - **NOTE**: 配置了boundaries，一般也要配置embedding_dim
+  - **NOTE**: 对于deepfm，连续值必须要配置boundaries
+- query: 当前未使用，拟用作DIN的target，通常是item_id
+- **NOTE**:
+  - features必须和odps表或者csv文件的列是**一一对应的**，**顺序必须要一致**
+  - features的数目和输入标或者文件的列的数目必须是一致的
+  - 如果某些列不需要的话，可以设置type为notneed
+
+#### types
+
+描述数据类型
+
+| **types** |
+| --------- |
+| label     |
+| category  |
+| tags      |
+| weights   |
+| dense     |
+| indexes   |
+| notneed   |
+
+#### basic_types
+
+描述输入表里面的数据类型， 包含string, bigint, double
+
+| **basic_types** |
+| --------------- |
+| string          |
+| bigint          |
+| double          |
+
+#### global
+
+描述share embedding里面share embedding的信息
+其中hash_bucket_size embedding_dim default_value会覆盖features表里面对应的信息
+
+| **name** | **type** | **hash_bucket_size** | **embedding_dim** | **default_value** |
+| -------- | -------- | -------------------- | ----------------- | ----------------- |
+| cate     | category | 1000                 | 10                | 0                 |
+| uid      | category | 100000               | 10                | 0                 |
+| **...**  |          |                      |                   |                   |
+
+#### group
+
+- deepfm模型中的分组设置
+
+  | **group**     |
+  | ------------- |
+  | wide_and_deep |
+  | wide          |
+  | deep          |
+  | label         |
+
+- multi_tower模型中的分组设置
+
+  | **group** |
+  | --------- |
+  | user      |
+  | item      |
+  | user_item |
+  | label     |
+
+  - 其中user, item, user_item可以自定义
+
+- 其它模型的分组设置暂不支持
diff --git a/docs/source/feature/feature.rst b/docs/source/feature/feature.rst
index 31d701163..0a1ee8c03 100644
--- a/docs/source/feature/feature.rst
+++ b/docs/source/feature/feature.rst
@@ -28,6 +28,20 @@ IdFeature: 离散值特征/ID类特征
       hash_bucket_size: 100000
     }
 
+    feature_configs {
+      input_names: "month"
+      feature_type: IdFeature
+      embedding_dim: 8
+      num_buckets: 12
+    }
+
+    feature_configs {
+      input_names: "weekday"
+      feature_type: IdFeature
+      embedding_dim: 8
+      vocab_list: ["1", "2", "3", "4", "5", "6", "7"]
+    }
+
 -  其中embedding\_dim 的计算方法可以参考：
 
    .. math::
@@ -36,7 +50,7 @@ IdFeature: 离散值特征/ID类特征
         embedding\_dim=8+x^{0.25}
 
 
--  hash\_bucket\_size: hash bucket的大小
+-  hash\_bucket\_size: hash bucket的大小。适用于category_id, user_id等
 
 -  对于user\_id等规模比较大的，hash冲突影响比较小的特征，
 
@@ -69,7 +83,39 @@ IdFeature: 离散值特征/ID类特征
 RawFeature：连续值特征
 ----------------------
 
-连续值类特征可以先在pai-studio中先进行离散化，可以进行等频/等距/自动离散化，变成IdFeature。也可以将离散化的区间配置在config中，如下：
+连续值类特征可以先使用分箱组件+进行离散化，可以进行等频/等距/自动离散化，变成离散值。推荐使用分箱组件得到分箱信息表，在训练时可以通过"-Dboundary\_table odps://project_name/tables/boundary\_info"导入boundary\_info表，省去在config中写入boundaries的操作。
+
+.. code:: protobuf
+
+   DROP table if exists boundary_info;
+   PAI -name binning
+   -project algo_public
+   -DinputTableName=train_data
+   -DoutputTableName=boundary_info
+   -DselectedColNames=col1,col2,col3,col4,col5
+   -DnDivide=20;
+
+   pai -name easy_rec_ext -project algo_public
+    -Dconfig=oss://easyrec/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config
+    -Dcmd=train
+    -Dtables=odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_train,odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_test
+    -Dboundary_table=odps://pai_online_project/tables/boundary_info
+    -Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000, "gpu":100, "memory":40000}}'
+    -Darn=acs:ram::xxx:role/xxx
+    -Dbuckets=oss://easyrec/
+    -DossHost=oss-cn-beijing-internal.aliyuncs.com
+    -Dwith_evaluator=1;
+
+.. code:: protobuf
+
+    feature_configs {
+      input_names: "ctr"
+      feature_type: RawFeature
+      embedding_dim: 8
+    }
+
+分箱组件使用方法见： `机器学习组件 <https://help.aliyun.com/document_detail/54352.html>`_
+也可以手动导入分箱信息。如下：
 
 .. code:: protobuf
 
@@ -80,12 +126,24 @@ RawFeature：连续值特征
       embedding_dim: 8
     }
 
--  boundaries: 分桶的值，通过一个数组来设置。
--  如果这个分割点来自pai-studio
-   的分箱模型，需要根据代码读取分割点并设置值。参考：easy\_rec/python/tools/add\_boundaries\_to\_config.py
+-  boundaries: 分桶的值，通过一个数组来设置。如果通过"-Dboundary\_table"导入分箱表，则无需写入，程序会自动导入到pipeline.config中。
 -  embedding\_dim: 如果设置了boundaries，则需要配置embedding dimension。
 -  如果没有设置boundaries，在deepfm算法的wide端会被忽略
 
+
+这里同样支持embedding特征，如"0.233\|0.123\|0.023\|2.123\|0.233\|0.123\|0.023\|2.123"
+
+.. code:: protobuf
+
+    feature_configs {
+      input_names: "pic_emb"
+      feature_type: RawFeature
+      separator: '|'
+      raw_input_dim: 8
+    }
+
+- raw_input_dim: 指定embedding特征的维度
+
 TagFeature
 ----------
 
@@ -106,24 +164,36 @@ tags字段可以用于描述商品的多个属性
        embedding_dim: 24
     }
 
-结合weights字段，可以描述用户的偏好类目和分数：
+-  separator: 分割符，默认为'\|'
+-  hash\_bucket\_size: hash分桶大小，配置策略和IdFeature类似
+-  num\_buckets: 针对输入是整数的情况,
+   如6\|20\|32，可以配置num\_buckets，配置为最大值
+-  embedding\_dim: embedding的dimension，和IdFeature类似
+
+我们同样支持有权重的tag特征，如"体育:0.3\|娱乐:0.2\|军事:0.5"：
 
 .. code:: protobuf
 
     feature_configs : {
-       input_names: 'categories'
-       input_names: 'scores'
+       input_names: 'tag_kvs'
        feature_type: TagFeature
        separator: '|'
+       kv_separator: ':'
        hash_bucket_size: 100000
        embedding_dim: 24
     }
+或"体育\|娱乐\|军事"和"0.3\|0.2\|0.5"的输入形式：
 
--  separator: 分割符，默认为'\|'
--  hash\_bucket\_size: hash分桶大小，配置策略和IdFeature类似
--  num\_buckets: 针对输入是整数的情况,
-   如6\|20\|32，可以配置num\_buckets，配置为最大值
--  embedding\_dim: embedding的dimension，和IdFeature类似
+.. code:: protobuf
+
+    feature_configs : {
+       input_names: 'tags'
+       input_names: 'tag_scores'
+       feature_type: TagFeature
+       separator: '|'
+       hash_bucket_size: 100000
+       embedding_dim: 24
+    }
 
 NOTE:
 ~~~~~
@@ -171,6 +241,27 @@ ComboFeature：组合特征
 -  embedding\_dim: embedding的维度，同IdFeature
 -  hash\_bucket\_size: hash bucket的大小
 
+特征选择
+------
+对输入层使用变分dropout计算特征重要性，根据重要性排名进行特征选择。
+
+rank模型中配置相应字段：
+
+.. code:: protobuf
+
+    variational_dropout{
+        regularization_lambda:0.01
+        embedding_wise_variational_dropout:false
+    }
+
+-  regularization\_lambda: 变分dropout层的正则化系数设置
+-  embedding\_wise\_variational\_dropout: 变分dropout层维度是否为embedding维度（true：embedding维度；false：feature维度；默认false）
+
+
+
+
+
+
 分隔符
 ------
 
diff --git a/docs/source/feature/rtp_fg.md b/docs/source/feature/rtp_fg.md
new file mode 100644
index 000000000..65dbaa90b
--- /dev/null
+++ b/docs/source/feature/rtp_fg.md
@@ -0,0 +1,627 @@
+# RTP FG
+
+- RTP FG能够以比较高的效率生成一些复杂的特征，如MatchFeature和LookupFeature, 线上线下使用同一套代码保证一致性.
+
+- 其生成的特征可以接入EasyRec进行训练，从RTP FG的配置(fg.json)可以生成EasyRec的配置文件(pipeline.config).
+
+- 线上部署的时候提供带FG功能的EAS processor，一键部署.
+
+### 训练
+
+#### 编写配置 [fg.json](https://easyrec.oss-cn-beijing.aliyuncs.com/rtp_fg/fg.json)
+
+- 包含了features配置和全局配置两个部分,  示例:
+
+```json
+{
+  "features": [
+     {"expression": "user:user_id", "feature_name": "user_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:cms_segid", "feature_name": "cms_segid", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100, "embedding_dim": 16, "group":"user"},
+     ...
+     {"expression": "item:price", "feature_name": "price", "feature_type":"raw_feature", "value_type":"Integer", "combiner":"mean", "group":"item"},
+     {"expression": "item:pid", "feature_name": "pid", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "user:tag_category_list", "feature_name": "user_tag_cate", "feature_type":"id_feature", "hash_bucket_size":100000, "group":"user"},
+     {"map": "user:tag_brand_list", "key":"item:brand", "feature_name": "combo_brand", "feature_type":"lookup_feature",  "needDiscrete":true, "hash_bucket_size":100000, "group":"combo"},
+     {"map": "user:tag_category_list", "key":"item:cate_id", "feature_name": "combo_cate_id", "feature_type":"lookup_feature",  "needDiscrete":true, "hash_bucket_size":10000, "group":"combo"}
+ ],
+
+
+ "reserves": [
+   "user_id", "campaign_id", "clk"
+ ],
+ "multi_val_sep": "|"
+}
+```
+
+- Feature配置说明：
+
+  - [IdFeature](http://easyrec.oss-cn-beijing.aliyuncs.com/fg_docs/IdFeature.pdf)
+
+    - is_multi: id_feature是否是多值属性
+
+      - 默认是false, 转换成EasyRec的config时会转成IdFeature
+
+      - 如果设成true, 转换成EasyRec的config时会转成TagFeature.
+
+      - 多值分隔符使用chr(29)\[ctrl+v ctrl+\].
+
+    - vocab_file: 词典文件路径，根据词典将对应的输入映射成ID.
+
+    - vocab_list: 词典list，根据词典将对应的输入映射成ID.
+
+    - num_buckets: 当输入是unsigned int类型的时候，并且输入有界的时候，可以指定num_bucket为输入的最大值.
+
+    - hash_bucket_size: 对应EasyRec feature_configs的hash_bucket_size.
+
+      - 和vocab_file, vocab_list相比，优势是不需要词典，词典可以是不固定的.
+
+      - 劣势是需要设置的容量比较大，容易导致hash冲突.
+
+    - embedding_dimension/embedding_dim: 对应EasyRec feature_configs里面的embedding_dim.
+
+  - [RawFeature](http://easyrec.oss-cn-beijing.aliyuncs.com/fg_docs/RawFeature.pdf)
+
+    - bucketize_boundaries: 会生成离散化的结果, 在生成EasyRec config的时候:
+
+    - 设置feature_configs.num_buckets = len(boundaries) + 1
+
+    - value_dimension > 1时, feature_type = TagFeature
+
+    - value_dimension = 1时, feature_type = IdFeature
+
+    - boundaries: 生成的还是连续值，但在生成EasyRec config的时候:
+
+    ```
+    会配置离散化的bucket, 如:
+    feature_configs: {
+      input_names: "hour"
+      feature_type: RawFeature
+      boundaries: [1,5,9,15,19,23]
+      embedding_dim: 16
+    }
+    ```
+
+    - 设置bucketize_boundaries/boundaries的同时需要设置embedding_dimension.
+
+    - value_dimension: 连续值的维度，>1时表示有多个连续值, 也就是一个向量.
+
+      - 比如ctr_1d,ctr_2d,ctr_3d,ctr_12d可以放在一个RawFeature里面.
+      - 该选项对生成数据有影响.
+      - 该选项对生成EasyRec config也有影响.
+
+  - [ComboFeature](http://easyrec.oss-cn-beijing.aliyuncs.com/fg_docs/ComboFeature.pdf)
+
+    - 需要设置embedding_dimension和hash_bucket_size.
+
+  - [LookupFeature](http://easyrec.oss-cn-beijing.aliyuncs.com/fg_docs/LookupFeature.pdf)
+
+    - 根据id查找对应的value.
+
+  - [MatchFeature](http://easyrec.oss-cn-beijing.aliyuncs.com/fg_docs/MatchFeature.pdf)
+
+    - 双层查找, 根据category和item_id查找value.
+
+    - match Feature里面多值分隔符可以使用chr(29) (ctrl+v ctrl+\])或者逗号\[,\]， 如:
+
+    ```
+      50011740^107287172:0.2^]36806676:0.3^]122572685:0.5|50006842^16788816:0.1^]10122:0.2^]29889:0.3^]30068:19
+    ```
+
+    - needWeighting: 生成特征权重，即kv格式, kv之间用\[ctrl+v ctrl+e\]分割, 转换成TagFeature.
+
+  - [OverLapFeature](http://easyrec.oss-cn-beijing.aliyuncs.com/fg_docs/OverLapFeature.pdf)
+
+  - 所有feature都需要的字段:
+
+    - group: feature所属的分组
+
+      - 对于WideAndDeep/DeepFM是wide/deep.
+
+      - 对于MultiTower可以自定义分组名称，如user/item/combo.
+
+    - combiner: 默认是mean, 也可以是sum.
+
+      - 影响数据生成和EasyRec feature_configs生成, 主要是多值Feature.
+
+    - [多值类型说明](http://easyrec.oss-cn-beijing.aliyuncs.com/fg_docs/%E5%A4%9A%E5%80%BC%E7%B1%BB%E5%9E%8B.pdf)
+
+      - 多值feature使用chr(29)\[ctrl+v ctrl+\]\]作为分隔符.
+
+- 全局配置说明:
+
+  - reserves: 要在最终表里面要保存的字段，通常包括label, user_id, item_id等
+
+  - separator: sparse格式里面，特征之间的分隔符，不指定默认是","，
+
+    - 训练时，对稠密格式没有影响，对稀疏格式有影响
+    - 预测时，item feature在redis里面存储的是稀疏格式，因此是有影响的
+
+    ```
+    i_item_id:10539078362,i_seller_id:21776327,...
+    ```
+
+  - multi_val_sep: 多值特征的分隔符，不指定默认是chr(29) 即"\\u001D"
+
+  - model_dir: 模型目录，仅仅影响EasyRec config生成.
+
+  - num_steps: 训练的轮数，仅仅影响EasyRec config生成.
+
+  - embedding_dim: 全局的embedding dimension.
+
+    - 适合DeepFM等需要所有的feature都使用统一的embedding_dim.
+
+    - 如果feature字段没有单独设置embedding_dimension, 将使用统一的embedding_dim.
+
+    - 配置里面的embedding_dim会覆盖从命令行easy_rec.python.tools.convert_rtp_fg传入的embedding_dim.
+
+  - model_type: 模型的类型，当前支持WideAndDeep/MultiTower/DeepFM.
+
+    - 暂未支持的EasyRec模型，可以不指定model_type，在生成EasyRec config之后添加相应的部分.
+
+  - label_fields: label数组，针对多目标模型需要设置多个label fields.
+
+  - model_path: 定义模型部分的config文件, 适用于暂未支持的EasyRec模型或自定义模型.
+
+  - edit_config_json: 对EasyRec config的修改, 如修改dnn的hidden_units
+
+  ```
+  "edit_config_json": [{"model_config.wide_and_deep.dnn.hidden_units": [48, 24]}]
+  ```
+
+#### 上传数据(如果已经有数据，可以跳过这一步)
+
+支持两种格式: 稀疏格式和稠密格式, 根据表的schema自动识别是哪一种格式, 包含user_feature和item_feature则识别成稀疏格式.
+
+- 稀疏格式的数据: user特征, item特征, context特征各放一列；特征在列内以kv形式存储, 如：
+
+| label | user_id | item_id | context_feature | user_feature                                                    | item_feature                                       |
+| ----- | ------- | ------- | --------------- | --------------------------------------------------------------- | -------------------------------------------------- |
+| 0     | 122017  | 389957  |                 | tag_category_list:4589,new_user_class_level:,...,user_id:122017 | adgroup_id:539227,pid:430548_1007,...,cate_id:4281 |
+
+```sql
+-- taobao_train_input.txt oss://easyrec/data/rtp/
+-- wget http://easyrec.oss-cn-beijing.aliyuncs.com/data/rtp/taobao_train_input.txt
+-- wget http://easyrec.oss-cn-beijing.aliyuncs.com/data/rtp/taobao_test_input.txt
+drop table if exists taobao_train_input;
+create table if not exists taobao_train_input(`label` BIGINT,user_id STRING,item_id STRING,context_feature STRING,user_feature STRING,item_feature STRING);
+tunnel upload taobao_train_input.txt taobao_train_input -fd=';';
+drop table if exists taobao_test_input;
+create table if not exists taobao_test_input(`label` BIGINT,user_id STRING,item_id STRING,context_feature STRING,user_feature STRING,item_feature STRING);
+tunnel upload taobao_test_input.txt taobao_test_input -fd=';';
+```
+
+- 稠密格式的数据，每个特征是单独的一列，如：
+
+| label | user_id | item_id | tag_category_list | new_user_class_level | age_level |
+| ----- | ------- | ------- | ----------------- | -------------------- | --------- |
+| 1     | 122017  | 389957  | 4589              |                      | 0         |
+
+```sql
+  drop table if exists taobao_train_input;
+  create table taobao_train_input_dense(label bigint, user_id string, item_id string, tag_category_list bigint, ...);
+```
+
+- **Note:** 特征列名可以加上prefix: **"user\_\_", "item\_\_", "context\_\_"**
+
+```
+  如: 列名ctx_position也可以写成 context__ctx_position
+```
+
+#### 生成样本
+
+- 下载rtp_fg [jar ](https://easyrec.oss-cn-beijing.aliyuncs.com/deploy/fg_on_odps-1.3.57-jar-with-dependencies.jar)包
+- 生成特征
+
+```sql
+add jar target/fg_on_odps-1.3.57-jar-with-dependencies.jar -f;
+add file fg.json -f;
+
+set odps.sql.planner.mode=sql;
+set odps.isolation.session.enable=true;
+set odps.sql.counters.dynamic.limit=true;
+
+drop table if exists taobao_fg_train_out;
+create table taobao_fg_train_out(label bigint, user_id string, item_id string,  features string);
+jar -resources fg_on_odps-1.3.57-jar-with-dependencies.jar,fg.json -classpath fg_on_odps-1.3.57-jar-with-dependencies.jar com.taobao.fg_on_odps.EasyRecFGMapper -i taobao_train_input -o taobao_fg_train_out -f fg.json;
+drop table if exists taobao_fg_test_out;
+create table taobao_fg_test_out(label bigint, user_id string, item_id string,  features string);
+jar -resources fg_on_odps-1.3.57-jar-with-dependencies.jar,fg.json -classpath fg_on_odps-1.3.57-jar-with-dependencies.jar com.taobao.fg_on_odps.EasyRecFGMapper -i taobao_test_input -o taobao_fg_test_out -f fg.json;
+
+--下载查看数据(可选)
+tunnel download taobao_fg_test_out taobao_fg_test_out.txt -fd=';';
+```
+
+- EasyRecFGMapper参数格式:
+  - -i, 输入表
+    - 支持分区表，分区表可以指定partition，也可以不指定partition，不指定partition时使用所有partition
+    - **分区格式示例:** my_table/day=20201010,sex=male
+    - 可以用多个-i指定**多个表的多个分区**
+  - -o, 输出表，如果是分区表，一定要指定分区，只能指定一个输出表
+  - -f, fg.json
+  - -m, mapper memory的大小，默认可以不设置
+- EasyRecFGMapper会自动判断是**稠密格式**还是**稀疏格式**
+  - 如果表里面有user_feature和item_feature字段，那么判定是稀疏格式
+  - 否则，判定是稠密格式
+- 生成的特征示例(taobao_fg_train_out):
+
+| label | user_id | item_id | features                                                                                                      |
+| ----- | ------- | ------- | ------------------------------------------------------------------------------------------------------------- |
+| 0     | 336811  | 100002  | user_id_100002^Bcms_segid_5^Bcms_group_id_2^Bage_level_2^Bpvalue_level_1^Bshopping_level_3^Boccupation_1^B... |
+
+#### 从配置文件\[fg.json\]生成EasyRec的config
+
+本地安装wheel包
+
+```
+pip install http://easyrec.oss-cn-beijing.aliyuncs.com/release/whls/easy_rec-0.1.3-py2.py3-none-any.whl
+```
+
+```python
+python -m easy_rec.python.tools.convert_rtp_fg  --label clk --rtp_fg fg.json --model_type multi_tower --embedding_dim 10  --output_path fg.config --selected_cols "label,features"
+```
+
+多目标模型写法
+
+```
+python -m easy_rec.python.tools.convert_rtp_fg  --label is_product_detail is_purchase --rtp_fg fg.json --model_type dbmtl --embedding_dim 10  --output_path fg.config --selected_cols "is_product_detail,is_purchase,features"
+```
+
+- --model_type: 模型类型, 可选: multi_tower, deepfm, essm, dbmtl 其它模型暂时不能设置，需要在生成的config里面增加model_config的部分
+
+- --embedding_dim: embedding dimension, 如果fg.json里面的feature没有指定embedding_dimension, 那么将使用该选项指定的值
+
+- --batch_size: batch_size, 训练时使用的batch_size
+
+- --label: label字段, 可以指定多个
+
+- --num_steps: 训练的步数,默认1000
+
+- --output_path: 输出的EasyRec config路径
+
+- --separator: feature之间的分隔符, 默认是CTRL_B(\\u0002)
+
+- --selected_cols: 指定输入列，包括label和features，其中label可以指定多列，表示要使用多个label(一般是多任务模型),  最后一列必须是features, 如:
+
+  ```
+  label0,label1,features
+  ```
+
+  - 注意不要有**空格**
+
+- --incol_separator: feature内部的分隔符，即多值分隔符，默认是CTRL_C(\\u0003)
+
+- --input_type: 输入类型，默认是OdpsRTPInput, 如果在EMR上使用或者本地使用，应该用RTPInput, 如果使用RTPInput那么--selected_cols也需要进行修改, 使用对应的列的id:
+
+  ```
+  0,4
+  ```
+
+  - 其中第0列是label, 第4列是features
+  - 还需要指定--rtp_separator，表示label和features之间的分隔符, 默认是";"
+
+- --train_input_path, 训练数据路径
+
+  - MaxCompute上不用指定，在训练的时候指定
+
+- --eval_input_path, 评估数据路径
+
+  - MaxCompute上不用指定，在训练的时候指定
+
+#### 启动训练
+
+- 上传fg.config到oss
+- 启动训练
+
+```sql
+pai -name easy_rec_ext
+-Dconfig=oss://bucket-name/easy_rec_test/fg.config
+-Dcmd=train
+-Dtables='odps://project-name/tables/taobao_fg_train_out,odps://project-name/tables/taobao_fg_test_out'
+-Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000, "gpu":100, "memory":40000}}'
+-Darn=acs:ram::xxx:role/ev-ext-test-oss
+-Dbuckets=oss://bucket-name/
+-DossHost=oss-cn-xxx.aliyuncs.com
+-Deval_method=separate;
+```
+
+环境里没有安装easy_rec_ext ，则上传easy_rec.tar.gz包
+
+```
+pai -name tensorflow1120_cpu_ext
+    -Dscript='oss://<path>/easy_rec.tar.gz'
+    -DentryFile='run.py'
+    -Dbuckets='oss://<bucket-name>/'
+    -Dtables='odps://<project-name>/tables/<train_table_name>/dt=${bizdate},odps://<project-name>/tables/<test_table_name>/dt=${bizdate}'
+    -Darn='acs:ram::xxx:role/aliyunodpspaidefaultrole'
+    -DossHost='oss-us-west-1-internal.aliyuncs.com'
+    -Dcluster='{
+      \"ps\": {
+          \"count\" : 4,
+          \"cpu\" : 600,
+          \"memory\" : 30000
+      },
+      \"worker\" : {
+          \"count\" : 33,
+          \"cpu\" : 800,
+          \"memory\" : 30000
+      }
+    }'
+    -DuserDefinedParameters='--cmd train --config oss://<path>/fg.config --model_dir oss://<model_path>/ --train_tables odps://<project-name>/tables/<train_table_name>/dt=${bizdate} --eval_tables odps://<project-name>/tables/<test_table_name>/dt=${bizdate} --with_evaluator'
+;
+```
+
+#### 模型导出
+
+```sql
+pai -name tensorflow1120_cpu_ext
+    -Dscript='oss://<path>/easy_rec.tar.gz'
+    -DentryFile='run.py'
+    -Dbuckets='oss://<bucket-name>/'
+    -Darn='acs:ram::xxx:role/aliyunodpspaidefaultrole'
+    -DossHost='oss-us-west-1-internal.aliyuncs.com'
+    -DuserDefinedParameters='--cmd export --config=oss://<model_path>/pipeline.config --export_dir=oss://<export_path>/ --asset_files=oss://<path>/fg.json';
+;
+
+```
+
+#### 增加特征
+
+- 增加特征可以用原来的样本表A left outer join 新增的特征表B 生成表C
+
+```
+  create table C
+  as select * from A
+  left outer join B
+  on A.req_id = B.req_id and A.item_id = B.item_id
+```
+
+- 表C使用增量incre_fg.json生成表incre_fea_table, incre_fg.json定义了新增的特征
+
+```
+  jar -resources fg_on_odps-1.3.57-jar-with-dependencies.jar,incre_fg.json -classpath fg_on_odps-1.3.57-jar-with-dependencies.jar com.taobao.fg_on_odps.EasyRecFGMapper -i taobao_test_input -o taobao_fg_test_out -f incre_fg.json;
+```
+
+- 生成新的样本表D:
+
+```
+  create new_feature_table as
+  select A.*, wm_concat(fea_table.features, chr(2), incre_fea_table.features) as features
+  from A
+    inner join fea_table
+  on A.req_id = fea_table.req_id and A.item_id = fea_table.item_id
+    inner join incre_fea_table
+  on A.req_id = incre_fea_table.req_id and A.req_id = incre_fea_table.item_id
+```
+
+#### 特征筛选
+
+- 可以筛选fg.json里面的部分特征用于训练
+
+- 方法: 在fg.config的model_config.feature_groups里面把不需要的特征注释掉即可
+
+### 预测
+
+#### 服务部署
+
+- 部署的 service.json 示例如下
+
+```shell
+bizdate=$1
+cat << EOF > echo.json
+{
+  "name":"easyrec_processor",
+  "baseimage": "registry.cn-shanghai.aliyuncs.com/eas/eas-worker-amd64:0.4.22",
+  "metadata": {
+    "region": "us-west-1",
+    "cpu": 6,
+    "memory": 20000,
+    "instance": 3
+  },
+  "model_config":"{\"holo-conf\":{\"url\":\"postgresql://<AccessKeyID>:<AccessKeySecret>@<域名>:<port>/<database>\",\"prefix\":\"fg_*\",\"table\" : [{\"name\": \"<schema>.<table_name>\",\"key\" : \"<index_column_name>\",\"value\": \"<column_name>\",\"period\": 2880}]},\"period\": 2880,\"fg\":true,\"multitargets\":true,\"outputs\":\"probs_ctr,probs_cvr\",\"inter_op_parallelism_threads\": 6, \"intra_op_parallelism_threads\": 6, \"fg_ins_num\":2}",
+  "model_path": "oss://<model_path>/",
+  "processor_path": "oss://easyrec/deploy/processor/easyrec_holo_broadwell.tar.gz",
+  "processor_entry": "libtf_predictor.so",
+  "token": "Y2E4OGY2MTBkODFhMzJhMDUzODM0YmE4OGRjZTI2MTgxYWNhOWRkNw==",
+  "processor_type": "cpp"
+}
+
+EOF
+# 执行部署命令。
+#/home/admin/usertools/tools/eascmd -i <AccessKeyID>  -k  <AccessKeySecret>   -e pai-eas.us-west-1.aliyuncs.com create echo.json
+/home/admin/usertools/tools/eascmd -i <AccessKeyID>  -k  <AccessKeySecret>   -e pai-eas.us-west-1.aliyuncs.com update easyrec_processor -s echo.json
+
+
+
+
+```
+
+训练导出的时候需要修改fg.config ，保证导出的模型是支持多个place_holder 的输入
+
+```
+export_config {
+  multi_placeholder: true
+}
+```
+
+- processor_path， processor_entry， processor_type 自定义 easyrec processor  设置，与示例保持一致即可
+
+- model_config: eas 部署配置。主要控制把 item 特征加载到内存中。目前数据源支持redis和holo
+
+  - redis-conf: 配置redis 访问的相关配置，包括 url, password
+    - prefix: item_id key的前缀, 为了和其它的key(如user_id等)区分开来
+    - cluster: cluster模式访问redis, 默认是false, 使用单例模式
+  - pool_size: redis connection pool size
+  - period: item feature reload period, 单位minutes
+
+- 更多选项:
+
+  - model_config:
+    - fg_ins_num: fg并行数，可以加快fg的计算速度
+    - multitargets: 是否多目标模型
+    - outputs: saved_model output signatures, 如果有多个，之间用,分割
+
+  ```
+    "model_config":{
+      "fg_ins_num": 4,
+      "multitargets": true,
+      "outputs": "probs_ctr,probs_cvr",
+      ...
+    }
+  ```
+
+- holo-conf: 也支持使用[holo](https://www.aliyun.com/product/bigdata/hologram)存储item feature, 好处是支持增量更新
+
+  - 需要创建一张holo表, 包含3列:
+    ```
+    ｜item_id｜item_features｜update_time｜
+    ```
+  - url: holo url
+  - user: holo db username
+  - password: holo db password
+  - dbname: holo dbname
+  - table: holo table name
+  - key: name of the column store item_ids
+  - value: name of the column store item features
+
+```
+  {
+    "model_config":{
+      "holo-conf":{
+        "url":"hgprecn-cn-09k22ikm5008-cn-hangzhou.hologres.aliyuncs.com",
+        "user":"admin",
+        "password":"1234567",
+        "dbname":"easyrec_test",
+        "table":"test_table",
+        "key":"item_id",
+        "value":"item_features"
+      }
+      ...
+    }
+  }
+```
+
+#### 客户端访问
+
+同eas sdk 中的TFRequest类似，easyrec 也是使用ProtoBuffer 作为传输协议. proto 文件定义：
+
+```protobuf
+syntax = "proto3";
+
+package com.alibaba.pairec.processor;
+option cc_enable_arenas = true;
+option java_package = "com.alibaba.pairec.processor";
+option java_outer_classname = "PredictProtos";
+
+// context features
+message ContextFeatures {
+  repeated string features = 1;
+}
+
+// PBRequest specifies the request for aggregator
+message PBRequest {
+  // debug mode
+  bool debug_mode = 1;
+
+  // user features
+  map<string, string> user_features = 2;
+
+  // item ids
+  repeated string item_ids = 3;
+
+  // context features for each item
+  map<string, ContextFeatures> context_features = 4;
+}
+
+// return results
+message Results {
+  # use repeated to be compatiable for multi-target models.
+  repeated double scores = 1 [packed = true];
+}
+
+enum StatusCode {
+  OK = 0;
+  INPUT_EMPTY = 1;
+  EXCEPTION = 2;
+}
+
+// PBResponse specifies the response for aggregator
+message PBResponse {
+  // results
+  map<string, Results> results = 1;
+
+  // item features
+  map<string, string> item_features = 2;
+
+  // generate features
+  map<string, string> generate_features = 3;
+
+  // context features
+  map<string, ContextFeatures> context_features = 4;
+
+  string error_msg = 5;
+
+  StatusCode status_code = 6;
+}
+```
+
+提供了 java 的客户端实例，[客户端 jar 包地址](http://easyrec.oss-cn-beijing.aliyuncs.com/deploy/easyrec-eas-client-0.0.1-jar-with-dependencies.jar).
+下载后的 jar 通过下面命令安装到本地 mvn 库里.
+
+```
+mvn install:install-file -Dfile=easyrec-eas-client-0.0.1-jar-with-dependencies.jar -DgroupId=com.alibaba.pairec -DartifactId=easyrec-eas-client -Dversion=0.0.1 -Dpackaging=jar
+```
+
+然后在pom.xml里面加入:
+
+```
+<dependency>
+    <groupId>com.alibaba.pairec</groupId>
+    <artifactId>easyrec-eas-client</artifactId>
+    <version>0.0.1</version>
+</dependency>
+```
+
+java 客户端测试代码参考：
+
+```java
+import com.alibaba.pairec.processor.client.*;
+
+PaiPredictClient client = new PaiPredictClient(new HttpConfig());
+client.setEndpoint(cmd.getOptionValue("e"));
+client.setModelName(cmd.getOptionValue("m"));
+
+EasyrecRequest easyrecRequest = new EasyrecRequest(separator);
+easyrecRequest.appendUserFeatureString(userFeatures);
+easyrecRequest.appendContextFeatureString(contextFeatures);
+easyrecRequest.appendItemStr(itemIdStr, ",");
+
+PredictProtos.PBResponse response = client.predict(easyrecRequest);
+
+for (Map.Entry<String, PredictProtos.Results> entry : response.getResultsMap().entrySet()) {
+      String key = entry.getKey();
+      PredictProtos.Results value = entry.getValue();
+      System.out.print("key: " + key);
+      for (int i = 0; i < value.getScoresCount(); i++) {
+          System.out.format(" value: %.4f ", value.getScores(i));
+      }
+}
+```
+
+- 验证特征一致性
+
+```
+...
+
+easyrecRequest.setDebug();
+PredictProtos.PBResponse response = client.predict(easyrecRequest);
+for (Map.Entry<String, PredictProtos.Results> entry :
+     response.getResultsMap().entrySet()) {
+   Map<String, String> itemFeas = response.getItemFeatures();
+   for(String itemId: itemFeas.keySet()) {
+     System.out.println(itemId);
+     System.out.println(itemFeas.get(itemId));
+   }
+}
+```
+
+- Note: 生产环境调用的时候不要设置debug，会导致rt升高，qps下降.
diff --git a/docs/source/get_role_arn.md b/docs/source/get_role_arn.md
new file mode 100644
index 000000000..8bcbc9e3a
--- /dev/null
+++ b/docs/source/get_role_arn.md
@@ -0,0 +1,9 @@
+# 获取Role arn
+
+Role arn 是PAI上任务访问OSS数据的钥匙。
+
+1.首先打开要提交任务的pai-studio的项目
+![image.png](../images/other/Role1.png)
+2.点击进入【机器学习】--》【设置】--》【显示】
+![image.png](../images/other/Role2.png)
+![image.png](../images/other/Role3.png)
diff --git a/docs/source/incremental_train.md b/docs/source/incremental_train.md
new file mode 100644
index 000000000..0ccd8a8ee
--- /dev/null
+++ b/docs/source/incremental_train.md
@@ -0,0 +1,60 @@
+# 增量训练
+
+增量训练的优势:
+
+- 每当有新增的样本时，并不需要重新训练全量样本，而是用现有模型初始化，然后在当天的新增样本上继续finetune。实验表明，增量训练可以使模型迅速收敛，加快训练速度，并且由于见过了更多的样本，所以模型性能也有一定的提升。增量训练在新数据上finetune一个epoch即可达到比较好的效果。
+
+### 训练命令
+
+#### Local
+
+初始化：
+
+```bash
+python -m easy_rec.python.train_eval --pipeline_config_path dwd_avazu_ctr_deepmodel.config
+```
+
+增量训练：
+
+```bash
+python -m easy_rec.python.train_eval --pipeline_config_path dwd_avazu_ctr_deepmodel.config --edit_config_json='{"train_config.fine_tune_checkpoint": "${bizdate-1}/model.ckpt-50", "train_config.num_steps": 10000}'
+```
+
+- bizdate是业务日期，一般是运行日期-1day.
+
+#### on PAI
+
+初始化：
+
+```sql
+pai -name easy_rec_ext -project algo_public
+-Dconfig=oss://easyrec/easy_rec_test/dwd_avazu_ctr_deepmodel_ext.config
+-Dcmd=train
+-Dtrain_tables=odps://pai_online_project/tables/train_data_d1_to_d14
+-Deval_tables=odps://pai_online_project/tables/eval_data/ds=${bizdate}
+-Dmodel_dir="oss://easyrec/easy_rec_test/checkpoints/${bizdate}/"
+-Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000, "gpu":100, "memory":40000}}'
+-Darn=acs:ram::xxx:role/ev-ext-test-oss
+-Dbuckets=oss://easyrec/
+-DossHost=oss-cn-beijing-internal.aliyuncs.com
+-Dwith_evaluator=1;
+```
+
+增量训练：
+
+```sql
+pai -name easy_rec_ext -project algo_public
+-Dconfig=oss://easyrec/easy_rec_test/dwd_avazu_ctr_deepmodel_ext.config
+-Dcmd=train
+-Dtrain_tables=odps://pai_online_project/tables/train_data/ds=${bizdate}
+-Deval_tables=odps://pai_online_project/tables/eval_data/ds=${bizdate}
+-Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000, "gpu":100, "memory":40000}}'
+-Darn=acs:ram::xxx:role/ev-ext-test-oss
+-Dbuckets=oss://easyrec/
+-DossHost=oss-cn-beijing-internal.aliyuncs.com
+-Dedit_config_json='{"train_config.fine_tune_checkpoint": "oss://easyrec/easy_rec_test/checkpoints/${bizdate-1}/"}'
+-Dwith_evaluator=1;
+```
+
+- bizdate在dataworks里面是业务日期，一般是运行日期的前一天。
+- train_config.fine_tune_checkpoint:  fine_tune_checkpoint的路径，可以指定具体的checkpoint，也可以指定一个目录，将自动定位目录里面最新的checkpoint。
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 263c340c9..f379fa016 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -19,6 +19,8 @@ Welcome to easy_rec's documentation!
 
    feature/data
    feature/feature
+   feature/excel_config
+   feature/rtp_fg
 
 .. toctree::
    :maxdepth: 3
@@ -29,6 +31,22 @@ Welcome to easy_rec's documentation!
    models/multi_target
    models/user_define
 
+.. toctree::
+   :maxdepth: 2
+   :caption: TRAIN & EVAL & EXPORT
+
+   train
+   incremental_train
+   eval
+   export
+   kd
+
+.. toctree::
+   :maxdepth: 2
+   :caption: PREDICT
+
+   predict/MaxCompute离线预测
+   predict/OnlinePrediction
 
 .. toctree::
    :maxdepth: 2
@@ -65,7 +83,8 @@ Welcome to easy_rec's documentation!
 
    reference
    faq
-
+   tf_on_yarn
+   get_role_arn
 
 
 
diff --git a/docs/source/intro.md b/docs/source/intro.md
index df854f789..1baf21a8a 100644
--- a/docs/source/intro.md
+++ b/docs/source/intro.md
@@ -1,6 +1,6 @@
 # EasyRec简介
 
-## What is&#160;EasyRec?
+## What is EasyRec?
 
 ![intro.png](../images/intro.png)
 
diff --git a/docs/source/kd.md b/docs/source/kd.md
new file mode 100644
index 000000000..4ec2c4ae5
--- /dev/null
+++ b/docs/source/kd.md
@@ -0,0 +1,102 @@
+# 知识蒸馏
+
+知识蒸馏在推荐场景中有着广泛的引用，其优势在于不增加模型参数量和特征的情况下也能提高模型的性能。常见应用场景包括:
+
+- 利用精排模型蒸馏粗排模型，可以提升粗排模型的auc，并且使得粗排模型和精排模型有比较好的一致性
+- 利用优势特征蒸馏不带该特征的模型, 可以使得模型能够利用优势特征的信息
+- 利用gdbt蒸馏DNN模型，可以增强DNN模型对于连续值特征的处理能力
+
+### kd
+
+- loss_name: loss的名称, 默认是'kd_loss\_' + pred_name
+
+- pred_name: 预测的名称, 对于RankModel可以是logits, probs
+
+  - 如果不确定，可以随便填一个，然后在报错信息中，可以查看所有的pred_name
+
+- pred_is_logits: 预测的是logits, 还是probs, 默认是logits
+
+- soft_label_name: 蒸馏的目标, 对应训练数据中的某一列，该目标由teacher模型产生
+
+- label_is_logits: 目标是logits, 还是probs, 默认是logits
+
+- loss_type: loss的类型, 可以是CROSS_ENTROPY_LOSS或者L2_LOSS
+
+- loss_weight: loss的权重, 默认是1.0
+
+- temperature: 蒸馏的温度，温度越高，student模型学到的细节越丰富, 但对于student模型的能力要求越高, 最优的温度需要通过多次试验才能确定
+
+- Note: 可以设置多个kd, 如多目标场景需要对多个预测结果进行蒸馏
+
+- [示例config](https://easyrec.oss-cn-beijing.aliyuncs.com/configs/dssm_kd_on_taobao.config)
+
+```
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+
+  ...
+
+  input_fields {
+    input_name: 'kd_soft'
+    input_type: DOUBLE
+  }
+
+  label_fields: ['clk', 'kd_soft']
+}
+
+
+model_config {
+  model_class: "DSSM"
+
+  ...
+
+  kd {
+    soft_label_name: 'kd_soft'
+    pred_name: 'logits'
+    loss_type: CROSS_ENTROPY_LOSS
+    loss_weight: 1.0
+    temperature: 2.0
+  }
+}
+```
+
+### 训练命令
+
+训练命令不改变, 详细参考[模型训练](./train.md)
+
+#### Local
+
+```bash
+python -m easy_rec.python.train_eval --pipeline_config_path samples/model_config/dssm_kd_on_taobao.config
+```
+
+#### On PAI
+
+```sql
+pai -name easy_rec_ext -project algo_public
+-Dconfig=oss://easyrec/easy_rec_test/dssm_kd_on_taobao.config
+-Dcmd=train
+-Dtables=odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_train,odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_test
+-Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000, "gpu":100, "memory":40000}}'
+-Darn=acs:ram::xxx:role/ev-ext-test-oss
+-Dbuckets=oss://easyrec/
+-DossHost=oss-cn-beijing-internal.aliyuncs.com
+-Dwith_evaluator=1;
+```
+
+#### On EMR
+
+```bash
+el_submit -t tensorflow-ps -a easy_rec_train -f dwd_avazu_ctr_deepmodel.config -m local -pn 1 -pc 4 -pm 20000 -wn 3 -wc 6 -wm 20000 -c "python -m easy_rec.python.train_eval --pipeline_config_path dssm_kd_on_taobao.config --continue_train"
+```
+
+### 参考文献
+
+- [Distilling the Knowledge in a Neural Network](https://arxiv.org/pdf/1503.02531.pdf).
+
+- [Privileged Features Distillation at Taobao Recommendations](https://arxiv.org/pdf/1907.05171.pdf).
+
+- [Knowledge Distillation](https://en.wikipedia.org/wiki/Knowledge_distillation).
diff --git a/docs/source/mnist_demo.md b/docs/source/mnist_demo.md
new file mode 100644
index 000000000..2353e9016
--- /dev/null
+++ b/docs/source/mnist_demo.md
@@ -0,0 +1,40 @@
+# Mnist Demo on EMR
+
+本示例中的程序都可以在**tf1.15**或者**tf2.0**上运行
+
+## 单机多卡模式: MirroredStragy
+
+使用keras model，是tf2.x推荐运行的方式
+
+```bash
+wget https://easyrec.oss-cn-beijing.aliyuncs.com/data/mnist_demo/mnist.npz
+hadoop fs -mkdir -p hdfs:///user/data/
+hadoop fs -put mnist.npz hdfs:///user/data/
+
+wget https://easyrec.oss-cn-beijing.aliyuncs.com/data/mnist_demo/mnist_mirrored.py -O mnist_mirrored.py
+把strategy = tf.distribute.MirroredStrategy()
+替换成strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+
+el_submit  -t standalone -a mnist_train -f mnist_mirrored.py  -m local -wn 1 -wg 2  -wc 6  -wm 20000 -c python mnist_mirrored.py
+```
+
+- -wn: worker number，必须是1
+- -wg: 2, 2GPUS
+- -wc: CPU number
+- -wm: cpu memory size in bytes, 20000 is 20G
+
+## 多机多卡模式: MultiWorkerMirroredStrategy
+
+```bash
+wget https://easyrec.oss-cn-beijing.aliyuncs.com/data/mnist_demo/mnist.npz
+hadoop fs -mkdir -p hdfs:///user/data/
+hadoop fs -put mnist.npz hdfs:///user/data/
+wget https://easyrec.oss-cn-beijing.aliyuncs.com/data/mnist_demo/mnist_mirrored.py -O mnist_mirrored.py
+
+el_submit  -t tensorflow-worker -a mnist_train -f mnist_mirrored.py  -m local -wn 2 -wg 1  -wc 6  -wm 20000 -c python mnist_mirrored.py
+```
+
+- -wn: worker number，2
+- -wg: 1, 1GPU, 可以 > 1
+- -wc: CPU number
+- -wm: cpu memory size in bytes, 20000 is 20G
diff --git a/docs/source/models/autoint.md b/docs/source/models/autoint.md
new file mode 100644
index 000000000..a87e3b1c3
--- /dev/null
+++ b/docs/source/models/autoint.md
@@ -0,0 +1,65 @@
+# AutoInt
+
+### 简介
+
+Automatic Feature Interaction Learning via Self-Attentive Neural Networks（AutoInt）通过将特征都映射到相同的低维空间中，然后利用带有残差连接的 Multi-head Self-Attention 机制显示构造高阶特征，对低维空间中的特征交互进行显式建模，有效提升了CTR预估的准确率。
+注意：AutoInt 模型要求所有输入特征的 embedding_dim 保持一致。
+
+![autoint.png](../../images/models/autoint.png)
+
+### 配置说明
+
+```protobuf
+model_config: {
+  model_class: 'AutoInt'
+  feature_groups: {
+    group_name: 'all'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  autoint {
+    multi_head_num: 2
+    multi_head_size: 32
+    interacting_layer_num: 3
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
+```
+
+- model_class: 'AutoInt', 不需要修改
+
+- feature_groups: 配置一个名为'all'的feature_group。
+
+- autoint: autoint相关的参数
+
+  - model_dim: 与特征的embedding_dim保持一致
+  - multi_head_size: Multi-head Self-attention 中的 head size，默认为1
+  - interacting_layer_num: 交叉层的层数，建议设在1到5之间，默认为1
+  - l2_regularization: L2正则，防止 overfit
+
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+### 示例Config
+
+[AutoInt_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/autoint.config)
+
+### 参考论文
+
+[AutoInt](https://dl.acm.org/doi/pdf/10.1145/3357384.3357925)
diff --git a/docs/source/models/bst.md b/docs/source/models/bst.md
new file mode 100644
index 000000000..3474daaac
--- /dev/null
+++ b/docs/source/models/bst.md
@@ -0,0 +1,98 @@
+# BST
+
+### 简介
+
+利用近年因 Transformer 而备受关注的 Multi-head Self-attention，捕捉用户行为序列的序列信息。支持多组序列共同embedding，如hist_item_id, hist_category_id。目前结合multitower共同使用，bst部分作为multitower的一个塔。
+
+### 模型配置
+
+```protobuf
+model_config:{
+  model_class: "MultiTowerBST"
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'item'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    wide_deep: DEEP
+  }
+  seq_att_groups: {
+    group_name: "bst"
+    seq_att_map: {
+       key: "brand"
+       hist_seq: "tag_brand_list"
+    }
+    seq_att_map: {
+       key: "cate_id"
+       hist_seq: "tag_category_list"
+    }
+  }
+  multi_tower {
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    bst_towers {
+      input: "bst"
+      seq_len: 50
+      multi_head_size: 4
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 5e-7
+  }
+  embedding_regularization: 5e-5
+}
+
+```
+
+- model_class: 'MultiTowerBST', 不需要修改。
+- feature_groups: 可配置多个feature_group，group name可以变。
+- seq_att_groups: 可配置多个seq_att_groups。
+  - group name
+  - seq_att_map: 需配置key和hist_seq，一一对应。
+- multi_tower: multi_tower相关的参数。
+  - towers: 每个feature_group对应了一个tower。
+    - input必须和feature_groups的group_name对应
+    - dnn: deep part的参数配置
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - bst_towers: 每个seq_att_groups对应了一个bst_tower。
+    - input必须和seq_att_groups的group_name对应
+    - seq_len: 历史序列的最大长度
+    - multi_head_size: Multi-head Self-attention 中的 head size
+  - final_dnn 整合towers和din_towers的输入。
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+### 示例config
+
+[BST_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/bst.config)
+
+### 参考论文
+
+[Behavior Sequence Transformer](https://arxiv.org/abs/1905.06874v1)
diff --git a/docs/source/models/dbmtl.md b/docs/source/models/dbmtl.md
index a7f4da5bb..834550a35 100644
--- a/docs/source/models/dbmtl.md
+++ b/docs/source/models/dbmtl.md
@@ -65,20 +65,20 @@ model_config {
 }
 ```
 
-- model\_class: 'DBMTL', 不需要修改
-- feature\_groups: 配置一个名为'all'的feature\_group。
+- model_class: 'DBMTL', 不需要修改
+- feature_groups: 配置一个名为'all'的feature_group。
 - dbmtl: dbmtl相关的参数
   - experts
-    - expert\_name
+    - expert_name
     - dnn deep part的参数配置
-      - hidden\_units: dnn每一层的channel数目，即神经元的数目
-  - task\_towers 根据任务数配置task\_towers
-    - tower\_name
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - task_towers 根据任务数配置task_towers
+    - tower_name
     - dnn deep part的参数配置
-      - hidden\_units: dnn每一层的channel数目，即神经元的数目
-    - 默认为二分类任务，即num\_class默认为1，weight默认为1.0，loss\_type默认为CLASSIFICATION，metrics\_set为auc
-    - 注：label\_fields需与task\_towers一一对齐。
-  - embedding\_regularization: 对embedding部分加regularization，防止overfit
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+    - 默认为二分类任务，即num_class默认为1，weight默认为1.0，loss_type默认为CLASSIFICATION，metrics_set为auc
+    - 注：label_fields需与task_towers一一对齐。
+  - embedding_regularization: 对embedding部分加regularization，防止overfit
 
 #### DBMTL+MMOE
 
@@ -139,15 +139,19 @@ model_config {
 ```
 
 - dbmtl
-  - expert\_dnn: MMOE的专家DNN配置
-    - hidden\_units: dnn每一层的channel数目，即神经元的数目
-  - expert\_num: 专家DNN的数目
+  - expert_dnn: MMOE的专家DNN配置
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - expert_num: 专家DNN的数目
   - 其余与dbmtl一致
 
+DBMTL模型每个塔的输出名为："logits\_" / "probs\_" / "y\_" + tower_name
+其中，logits/probs/y对应: sigmoid之前的值/概率/回归模型的预测值
+DBMTL模型每个塔的指标为：指标名+ "\_" + tower_name
+
 ### 示例Config
 
-- [DBMTL\_demo.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/dbmtl.config)
-- [DBMTL\_MMOE\_demo.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/dbmtl_mmoe.config)
+- [DBMTL_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/dbmtl.config)
+- [DBMTL_MMOE_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/dbmtl_mmoe.config)
 
 ### 参考论文
 
diff --git a/docs/source/models/dcn.md b/docs/source/models/dcn.md
new file mode 100644
index 000000000..1891c2f14
--- /dev/null
+++ b/docs/source/models/dcn.md
@@ -0,0 +1,83 @@
+# DCN
+
+### 简介
+
+Deep＆Cross Network（DCN）是在DNN模型的基础上，引入了一种新型的交叉网络，该网络在学习某些特征交叉时效率更高。特别是，DCN显式地在每一层应用特征交叉，不需要人工特征工程，并且只增加了很小的额外复杂性。
+
+![deepfm.png](../../images/models/dcn.png)
+
+### 配置说明
+
+```protobuf
+model_config: {
+  model_class: 'DCN'
+  feature_groups: {
+    group_name: 'all'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  dcn {
+    deep_tower {
+      input: "all"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    cross_tower {
+      input: "all"
+      cross_num: 5
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
+```
+
+- model_class: 'DCN', 不需要修改
+
+- feature_groups: 配置一个名为'all'的feature_group。
+
+- dcn: dcn相关的参数
+
+- deep_tower
+
+  - dnn: deep part的参数配置
+
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+
+- cross_tower
+
+  - cross_num: 交叉层层数，默认为3
+
+- final_dnn: 整合wide part, fm part, deep part的参数输入, 可以选择是否使用
+
+  - hidden_units: dnn每一层的channel数目，即神经元的数目
+
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+### 示例Config
+
+[DCN_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/dcn.config)
+
+### 参考论文
+
+[DCN](https://arxiv.org/abs/1708.05123)
diff --git a/docs/source/models/deepfm.md b/docs/source/models/deepfm.md
index 33b10d307..3b6e75c0a 100644
--- a/docs/source/models/deepfm.md
+++ b/docs/source/models/deepfm.md
@@ -3,7 +3,6 @@
 ### 简介
 
 DeepFM是在WideAndDeep基础上加入了FM模块的改进模型。FM模块和DNN模块共享相同的特征，即相同的Embedding。
-注意：经过我们的扩展，DeepFM支持不同特征使用不同大小的embedding size。
 
 ![deepfm.png](../../images/models/deepfm.png)
 
@@ -45,31 +44,29 @@ model_config:{
 }
 ```
 
-- model\_class: 'DeepFM', 不需要修改
+- model_class: 'DeepFM', 不需要修改
 
-- feature\_groups:
+- feature_groups:
 
-  需要两个feature\_group: wide group和deep group, **group name不能变**
+  需要两个feature_group: wide group和deep group, **group name不能变**
 
 - deepfm:  deepfm相关的参数
 
 - dnn: deep part的参数配置
 
-  - hidden\_units: dnn每一层的channel数目，即神经元的数目
+  - hidden_units: dnn每一层的channel数目，即神经元的数目
 
-- wide\_output\_dim: wide部分输出的大小
+- wide_output_dim: wide部分输出的大小
 
-- final\_dnn: 整合wide part, fm part, deep part的参数输入, 可以选择是否使用
+- final_dnn: 整合wide part, fm part, deep part的参数输入, 可以选择是否使用
 
-  - hidden\_units: dnn每一层的channel数目，即神经元的数目
+  - hidden_units: dnn每一层的channel数目，即神经元的数目
 
-- embedding\_regularization: 对embedding部分加regularization，防止overfit
-
-- input\_type: 如果在提交到pai-tf集群上面运行，读取max compute 表作为输入数据，data\_config：input\_type要设置为OdpsInputV2。
+- embedding_regularization: 对embedding部分加regularization，防止overfit
 
 ### 示例Config
 
-[DeepFM\_demo.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/deepfm.config)
+[DeepFM_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/deepfm.config)
 
 ### 参考论文
 
diff --git a/docs/source/models/din.md b/docs/source/models/din.md
index 93a9211fe..ab20a9ddb 100644
--- a/docs/source/models/din.md
+++ b/docs/source/models/din.md
@@ -2,7 +2,7 @@
 
 ### 简介
 
-利用DIN算法建模用户点击序列。支持多组序列共同embedding，如hist\_item\_id, hist\_category\_id。目前结合multitower共同使用，din部分作为multitower的一个塔。
+利用DIN算法建模用户点击序列。支持多组序列共同embedding，如hist_item_id, hist_category_id。目前结合multitower共同使用，din部分作为multitower的一个塔。
 ![din.png](../../images/models/din.png)
 
 ### 模型配置
@@ -67,27 +67,27 @@ model_config: {
 
 ```
 
-- model\_class: 'MultiTowerDIN', 不需要修改。
-- feature\_groups: 可配置多个feature\_group，group name可以变。
-- seq\_att\_groups: 可配置多个seq\_att\_groups。
+- model_class: 'MultiTowerDIN', 不需要修改。
+- feature_groups: 可配置多个feature_group，group name可以变。
+- seq_att_groups: 可配置多个seq_att_groups。
   - group name
-  - seq\_att\_map: 需配置key和hist\_seq，一一对应。
-- multi\_tower: multi\_tower相关的参数
-  - towers: 每个feature\_group对应了一个tower。
-    - input必须和feature\_groups的group\_name对应。
+  - seq_att_map: 需配置key和hist_seq，一一对应。
+- multi_tower: multi_tower相关的参数
+  - towers: 每个feature_group对应了一个tower。
+    - input必须和feature_groups的group_name对应。
     - dnn: deep part的参数配置
-      - hidden\_units: dnn每一层的channel数目，即神经元的数目
-  - din\_towers: 每个seq\_att\_groups对应了一个din\_tower
-    - input必须和seq\_att\_groups的group\_name对应。
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - din_towers: 每个seq_att_groups对应了一个din_tower
+    - input必须和seq_att_groups的group_name对应。
     - dnn: deep part的参数配置
-      - hidden\_units: dnn每一层的channel数目，即神经元的数目
-  - final\_dnn 整合towers和din\_towers的输入
-    - hidden\_units: dnn每一层的channel数目，即神经元的数目
-- embedding\_regularization: 对embedding部分加regularization，防止overfit
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - final_dnn 整合towers和din_towers的输入
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+- embedding_regularization: 对embedding部分加regularization，防止overfit
 
 ### 示例config
 
-[DIN\_demo.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/din.config)
+[DIN_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/din.config)
 
 ### 参考论文
 
diff --git a/docs/source/models/dssm.md b/docs/source/models/dssm.md
index 3a4bbf6aa..3b90529ef 100644
--- a/docs/source/models/dssm.md
+++ b/docs/source/models/dssm.md
@@ -41,24 +41,30 @@ model_config:{
         hidden_units: [256, 128, 64, 32]
       }
     }
+    simi_func: INNER_PRODUCT
     l2_regularization: 1e-6
   }
   embedding_regularization: 5e-5
 }
 ```
 
-- model\_class: 'DSSM', 不需要修改
-- feature\_groups: 需要两个feature\_group: user和item, **group name不能变**
-- dssm: dssm相关的参数，必须配置user\_tower和item\_tower
-- user\_tower/item\_tower:
+- model_class: 'DSSM', 不需要修改
+- feature_groups: 需要两个feature_group: user和item, **group name不能变**
+- dssm: dssm相关的参数，必须配置user_tower和item_tower
+- user_tower/item_tower:
   - dnn: deep part的参数配置
-    - hidden\_units: dnn每一层的channel数目，即神经元的数目
-  - id: 指定user\_id/item\_id列
-- embedding\_regularization: 对embedding部分加regularization，防止overfit
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - id: 指定user_id/item_id列
+- simi_func: 向量相似度函数，包括\[COSINE, INNER_PRODUCT, EUCLID\]，默认COSINE，建议使用INNER_PRODUCT
+- embedding_regularization: 对embedding部分加regularization，防止overfit
 
 ### 示例Config
 
-[DSSM\_demo.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/dssm.config)
+[DSSM_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/dssm.config)
+
+### 效果评估
+
+[效果评估](https://easyrec.oss-cn-beijing.aliyuncs.com/docs/recall_eval.pdf)
 
 ### 参考论文
 
diff --git a/docs/source/models/dssm_neg_sampler.md b/docs/source/models/dssm_neg_sampler.md
new file mode 100644
index 000000000..6822fab7f
--- /dev/null
+++ b/docs/source/models/dssm_neg_sampler.md
@@ -0,0 +1,156 @@
+# DSSM负采样版
+
+### 简介
+
+双塔召回模型，支持训练时负采样。
+
+![dssm](../../images/models/dssm_neg_sampler.png)
+
+当物品池很大上百万甚至是上亿的时候，双塔召回模型常常需要在物品池中针对每个正样本采样一千甚至一万的负样本才能达到比较好的召回效果，
+意味着正负样本比例达到了1: 1k，甚至是1: 1w， 要支持这个正负样本比例的训练，如果用离线构造样本的方式会导致离线存储和离线计算的压力都激增。
+该版本的DSSM支持运行时进行负采样，会以图存储的方式将物品的特征存储在Parameter Server节点上，并且Mini-Batch内的共享同一批负样本的计算，
+使得离线存储和离线计算的压力都大大降低。
+
+注：训练样本一般只需准备点击（正样本）的样本即可
+
+### 配置说明
+
+```protobuf
+eval_config {
+  metrics_set: {
+    recall_at_topk {
+      topk: 10
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 5
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 1
+    }
+  }
+}
+
+data_config: {
+  ...
+  negative_sampler {
+    input_path: 'data/test/tb_data/taobao_ad_feature_gl'
+    num_sample: 1024
+    num_eval_sample: 2048
+    attr_fields: 'adgroup_id'
+    attr_fields: 'cate_id'
+    attr_fields: 'campaign_id'
+    attr_fields: 'customer'
+    attr_fields: 'brand'
+    item_id_field: 'adgroup_id'
+  }
+}
+
+model_config:{
+  model_class: "DSSM"
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    ...
+    feature_names: 'tag_brand_list'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    ...
+    feature_names: 'brand'
+    wide_deep:DEEP
+  }
+  dssm {
+    user_tower {
+      id: "user_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+        # dropout_ratio : [0.1, 0.1, 0.1, 0.1]
+      }
+    }
+    item_tower {
+      id: "adgroup_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+    }
+	simi_func: INNER_PRODUCT
+    scale_simi: false
+    l2_regularization: 1e-6
+  }
+  loss_type: SOFTMAX_CROSS_ENTROPY
+  embedding_regularization: 5e-5
+}
+```
+
+- eval_config: 评估配置，目前只支持recall_at_topk
+- data_config: 数据配置，其中需要配置负采样Sampler，负采样Sampler的配置详见[负采样配置](%E8%B4%9F%E9%87%87%E6%A0%B7%E9%85%8D%E7%BD%AE)
+- model_class: 'DSSM', 不需要修改
+- feature_groups: 需要两个feature_group: user和item, **group name不能变**
+- dssm: dssm相关的参数，必须配置user_tower和item_tower
+- user_tower/item_tower:
+  - dnn: deep part的参数配置
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - id: 指定user_id/item_id列
+- simi_func: 向量相似度函数，包括\[COSINE, INNER_PRODUCT, EUCLID\]，默认COSINE，建议使用INNER_PRODUCT
+- scale_simi: 是否自动缩放相似度便于loss计算，建议设置成false
+- loss_type: 目前只支持SOFTMAX_CROSS_ENTROPY
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+#### 负采样配置
+
+目前支持四种负采样Sampler：
+
+- negative_sampler：加权随机负采样，会排除Mini-Batch内的Item Id
+  - input_path: 负采样Item表, Schema为: id:int64 | weight:float | attrs:string，其中attr为":"分隔符拼接的Item特征
+  - num_sample: 训练worker的负采样数
+  - num_eval_sampler: 评估worker的负采样数
+  - attr_fields: Item特征名，顺序与Item的attr中特征的拼接顺序保持一致
+  - item_id_field: item_id列名
+- negative_sampler_v2：加权随机负采样，会跟排除Mini-Batch内的User有边的Item Id
+  - user_input_path: User表, Schema为: id:int64 | weight:float
+  - item_input_path: 负采样Item表, Schema为: id:int64 | weight:float | attrs:string，其中attr为":"分隔符拼接的Item特征
+  - pos_edge_input_path: Positive边表, Schema为: userid:int64 | itemid:int64 | weight:float
+  - user_id_field: user_id列名
+  - 其余同negative_sampler
+- hard_negative_sampler：加权随机负采样，会排除Mini-Batch内的Item Id，同时HardNegative边表中(一般为曝光未点击)进行负采样作为HardNegative
+  - user_input_path: User表, Schema为: id:int64 | weight:float
+  - item_input_path: 负采样Item表, Schema为: id:int64 | weight:float | attrs:string，其中attr为":"分隔符拼接的Item特征
+  - hard_neg_edge_input_path: HardNegative边表, Schema为: userid:int64 | itemid:int64 | weight:float
+  - num_hard_sample: hard negative的最大采样数目
+  - user_id_field: user_id列名
+  - 其余同negative_sampler
+- hard_negative_sampler_v2：加权随机负采样，会跟排除Mini-Batch内的User有边的Item Id，同时HardNegative边表中(一般为曝光未点击)进行负采样作为HardNegative
+  - user_input_path: User表, Schema为: id:int64 | weight:float
+  - item_input_path: 负采样Item表, Schema为: id:int64 | weight:float | attrs:string，其中attr为":"分隔符拼接的Item特征
+  - pos_edge_input_path: Positive边表, Schema为: userid:int64 | itemid:int64 | weight:float
+  - hard_neg_edge_input_path: HardNegative边表, Schema为: userid:int64 | itemid:int64 | weight:float
+  - num_hard_sample: hard negative的最大采样数目
+  - user_id_field: user_id列名
+  - 其余同negative_sampler
+    一般用negative_sampler即可。
+
+### 示例Config
+
+[DSSM_NegSampler.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/dssm_neg_sampler_on_taobao.config)
+
+[DSSM_NegSamplerV2.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/dssm_neg_sampler_v2_on_taobao.config)
+
+[DSSM_HardNegSampler.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/dssm_hard_neg_sampler_on_taobao.config)
+
+[DSSM_HardNegSamplerV2.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/dssm_hard_neg_sampler_v2_on_taobao.config)
+
+### 效果评估
+
+[效果评估](https://easyrec.oss-cn-beijing.aliyuncs.com/docs/recall_eval.pdf)
+
+### 参考论文
+
+[DSSM.pdf](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/cikm2013_DSSM_fullversion.pdf)
diff --git a/docs/source/models/esmm.md b/docs/source/models/esmm.md
index 5c6981210..c12c78dae 100644
--- a/docs/source/models/esmm.md
+++ b/docs/source/models/esmm.md
@@ -90,30 +90,42 @@ model_config: {
 }
 ```
 
-- model\_class: 'ESMM', 不需要修改
-- feature\_groups: 支持多组feature\_group
+- model_class: 'ESMM', 不需要修改
+- feature_groups: 支持多组feature_group
 - esmm: esmm相关的参数
   - groups
-    - input  tower的input必须和feature\_groups的group\_name对应
+    - input  tower的input必须和feature_groups的group_name对应
     - dnn deep part的参数配置
-      - hidden\_units: dnn每一层的channel数目，即神经元的数目
-  - cvr\_tower
-    - tower\_name：'cvr'，不需要修改
-    - label\_name: tower对应的label名，若不设置，label\_fields需与task\_towers一一对齐
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - cvr_tower
+    - tower_name：'cvr'，不需要修改
+    - label_name: tower对应的label名，若不设置，label_fields需与task_towers一一对齐
     - dnn deep part的参数配置
-      - hidden\_units: dnn每一层的channel数目，即神经元的数目
-    - 默认为二分类任务，即num\_class默认为1，weight默认为1.0，loss\_type默认为CLASSIFICATION，metrics\_set为auc
-  - ctr\_tower
-    - tower\_name：'ctr'，不需要修改
-    - label\_name: tower对应的label名，若不设置，label\_fields需与task\_towers一一对齐
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+    - 默认为二分类任务，即num_class默认为1，weight默认为1.0，loss_type默认为CLASSIFICATION，metrics_set为auc
+  - ctr_tower
+    - tower_name：'ctr'，不需要修改
+    - label_name: tower对应的label名，若不设置，label_fields需与task_towers一一对齐
     - dnn deep part的参数配置
-      - hidden\_units: dnn每一层的channel数目，即神经元的数目
-    - 默认为二分类任务，即num\_class默认为1，weight默认为1.0，loss\_type默认为CLASSIFICATION，metrics\_set为auc
-- embedding\_regularization: 对embedding部分加regularization，防止overfit
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+    - 默认为二分类任务，即num_class默认为1，weight默认为1.0，loss_type默认为CLASSIFICATION，metrics_set为auc
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+ESMM模型输出的值有以下几项：
+
+- "logits\_" / "probs\_" + ctr_tower的tower_name
+- "logits\_" / "probs\_" /  "y\_" + cvr_tower的tower_name
+- "probs_ctcvr" / "y_ctcvr"
+
+ESMM模型的指标有以下几项：
+
+- 指标名 + "\_" + ctr_tower 的 tower_name
+- 指标名 + "\_" + cvr_tower 的 tower_name + "\_masked"
+- 指标名 + "\_ctcvr"
 
 ### 示例Config
 
-[ESMM\_demo.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/esmm.config)
+[ESMM_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/esmm.config)
 
 ### 参考论文
 
diff --git a/docs/source/models/fm.md b/docs/source/models/fm.md
new file mode 100644
index 000000000..f17acba3f
--- /dev/null
+++ b/docs/source/models/fm.md
@@ -0,0 +1,53 @@
+# FM
+
+### 简介
+
+FM模型的主要应用场景是点击率预估，目的是在数据高维稀疏的情况下，解决特征的组合问题。
+
+![fm.png](../../images/models/fm.png)
+
+### 配置说明
+
+```protobuf
+model_config: {
+  model_class: 'FM'
+  feature_groups: {
+    group_name: 'wide'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    ...
+    feature_names: 'cms_group_id'
+    wide_deep:WIDE
+  }
+  feature_groups: {
+    group_name: 'deep'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    ...
+    feature_names: 'cms_group_id'
+    wide_deep: DEEP
+  }
+
+  fm {
+  }
+  embedding_regularization: 1e-5
+}
+```
+
+- model_class: 'FM', 不需要修改
+
+- feature_groups:
+
+需要一个feature_group: wide group **group name不能变**
+
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+- input_type: 如果在提交到pai-tf集群上面运行，读取max compute 表作为输入数据，data_config：input_type要设置为OdpsInputV2。
+
+### 示列Config
+
+[FM_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/fm.config)
+
+### 参考论文
+
+[FM](https://www.csie.ntu.edu.tw/%7Eb97053/paper/Rendle2010FM.pdf)
diff --git a/docs/source/models/mind.md b/docs/source/models/mind.md
new file mode 100644
index 000000000..0b819b315
--- /dev/null
+++ b/docs/source/models/mind.md
@@ -0,0 +1,117 @@
+# MIND
+
+### 简介
+
+mind召回模型, 在dssm的基础上加入了兴趣聚类功能，支持多兴趣召回。
+![mind](../../images/models/mind.png)
+
+### 配置说明
+
+```protobuf
+model_config:{
+  model_class: "MIND"
+  feature_groups: {
+    group_name: 'hist'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    feature_naems: 'time_id'
+  }
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    wide_deep:DEEP
+  }
+  mind {
+    user_dnn {
+      hidden_units: [256, 128, 64, 32]
+    }
+    item_dnn {
+      hidden_units: [256, 128, 64, 32]
+    }
+
+    capsule_config {
+      max_k: 5
+      max_seq_len: 64
+      high_dim: 64
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 5e-5
+}
+```
+
+- model_class: 'MIND', 不需要修改
+- feature_groups: 需要三个feature_group: hist, user和item, **group name不能变**
+- mind: mind相关的参数，必须配置user_dnn和item_dnn
+- user_dnn/item_dnn:
+  - dnn: deep part的参数配置
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+- pre_capsule_dnn: 进入capsule之前的dnn的配置, 可选，配置同user_dnn和item_dnn
+- capsule_config: 胶囊(动态路由)的配置
+  - max_k: 胶囊(兴趣)的个数
+  - max_seq_len: hist seq的最大长度
+  - high_dim: 兴趣向量的维度
+  - num_iters: 动态路由(兴趣聚类)的轮数
+  - routing_logits_scale: routing logits 放大的超参，为0时，不放大;
+    - 一些场景显示设置为20时，兴趣向量的相似度比较低(0.8左右)
+    - 设置为0时，容易导致兴趣向量趋于相同(相似度接近1)，覆盖的兴趣面变窄。
+- simi_pow: label guided attention, 对相似度做的幂指数, 更倾向于选择和label相近的兴趣向量来计算loss
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+### time_id, 注意特征的名字必须是time_id
+
+- 行为序列特征可以加上time_id, time_id经过1 dimension的embedding后, 在time维度进行softmax, 然后和其它sequence feature的embedding相乘
+
+- 具体的 time_id 的取值可参考:
+
+  - 训练数据:  Math.round((2 * Math.log1p((labelTime - itemTime) / 60.) / Math.log(2.))) + 1;
+  - inference: Math.round((2 * Math.log1p((currentTime - itemTime) / 60.) / Math.log(2.))) + 1;
+  - 此处的时间(labelTime, itemTime, currentTime) 为秒, 这里给的只是一种取法, 供参考
+
+### 调参经验
+
+- 尽量使用全网的点击数据来生成训练样本，全网的行为会更加丰富，这有利于mind模型的训练。
+
+- 刚开始训练的时候训练长一点，后面可以使用增量训练，增量训练的时候就可以训练短一点。
+
+- 进行数据清洗，把那些行为太少的item直接在构造行为序列的时候就挖掉；也可以看看网站内是否有那些行为商品数巨量的(爬虫）用户。
+
+- 根据自己的业务指标进行数据的重采样，因为mind模型的训练主要是以点击为目标的，如果业务指标是到交易，那么可以对产生交易的样本进行下重采样。
+
+- 建议搞一个demo，看看mind整体召回和单个兴趣召回的结果，以便评估模型训练的好坏。
+
+- 要看的指标是召回率，准确率和兴趣损失(interest loss，衡量生成的多个兴趣向量之前的差异度，interest loss越小，表示mind聚类效果越好)，三个指标要一起看。
+
+- 建议基于itemid、cateid、timeid的简单序列特征训练模型取得一定成效后，再添加其他侧信息，以避免不必要的试错时间。
+
+- 如果loss降不下来(一般loss要小于3), 并且是加了time_id，那建议多跑个100/200万步，如果还是没有明显下降，这时需要检查下训练数据。
+
+### 示例Config
+
+[MIND_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/mind.config)
+
+### 效果评估
+
+[效果评估](https://easyrec.oss-cn-beijing.aliyuncs.com/docs/recall_eval.pdf)
+
+### 参考论文
+
+[MIND.pdf](https://arxiv.org/pdf/1904.08030.pdf)
diff --git a/docs/source/models/mmoe.md b/docs/source/models/mmoe.md
index 1b52d2cd8..3225e34bb 100644
--- a/docs/source/models/mmoe.md
+++ b/docs/source/models/mmoe.md
@@ -56,23 +56,27 @@ model_config {
 
 ```
 
-- model\_class: 'MMoE', 不需要修改
-- feature\_groups: 配置一个名为'all'的feature\_group。
+- model_class: 'MMoE', 不需要修改
+- feature_groups: 配置一个名为'all'的feature_group。
 - mmoe: mmoe相关的参数
-  - expert\_dnn: MMOE的专家DNN配置
-    - hidden\_units: dnn每一层的channel数目，即神经元的数目
-  - expert\_num: 专家DNN的数目
-  - task\_towers: 根据任务数配置task\_towers
-    - tower\_name：任务名
-    - label\_name: tower对应的label名，若不设置，label\_fields需与task\_towers一一对齐
+  - expert_dnn: MMOE的专家DNN配置
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - expert_num: 专家DNN的数目
+  - task_towers: 根据任务数配置task_towers
+    - tower_name：任务名
+    - label_name: tower对应的label名，若不设置，label_fields需与task_towers一一对齐
     - dnn: deep part的参数配置
-      - hidden\_units: dnn每一层的channel数目，即神经元的数目
-    - 默认为二分类任务，即num\_class默认为1，weight默认为1.0，loss\_type默认为CLASSIFICATION，metrics\_set为auc
-  - embedding\_regularization: 对embedding部分加regularization，防止overfit
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+    - 默认为二分类任务，即num_class默认为1，weight默认为1.0，loss_type默认为CLASSIFICATION，metrics_set为auc
+  - embedding_regularization: 对embedding部分加regularization，防止overfit
+
+MMoE模型每个塔的输出名为："logits\_" / "probs\_" / "y\_" + tower_name
+其中，logits/probs/y对应: sigmoid之前的值/概率/回归模型的预测值
+MMoE模型每个塔的指标为：指标名+ "\_" + tower_name
 
 ### 示例Config
 
-[MMoE\_demo.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/mmoe.config)
+[MMoE_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/mmoe.config)
 
 ### 参考论文
 
diff --git a/docs/source/models/multi_cls.md b/docs/source/models/multi_cls.md
index 687f5ed96..e62212ee4 100644
--- a/docs/source/models/multi_cls.md
+++ b/docs/source/models/multi_cls.md
@@ -1,9 +1,9 @@
 # 多分类模型
 
-多分类模型和CTR模型基本一致，只是num\_class > 1。
+多分类模型和CTR模型基本一致，只是num_class > 1。
 
 如下图所示, 和CTR模型相比增加了:
-num\_class: 2
+num_class: 2
 
 ```protobuf
 model_config:{
diff --git a/docs/source/models/multi_tower.md b/docs/source/models/multi_tower.md
index 2e6117f2c..a5c0de57c 100644
--- a/docs/source/models/multi_tower.md
+++ b/docs/source/models/multi_tower.md
@@ -64,21 +64,21 @@ model_config: {
 }
 ```
 
-- feature\_groups: 不同的特征组，如user feature为一组，item feature为一组, combo feature为一组
-  - group\_name: 可以根据实际情况取
-  - wide\_deep: 必须是DEEP
+- feature_groups: 不同的特征组，如user feature为一组，item feature为一组, combo feature为一组
+  - group_name: 可以根据实际情况取
+  - wide_deep: 必须是DEEP
 - towers:
-  - 每个feature\_group对应了一个tower, tower的input必须和feature\_groups的group\_name对应
+  - 每个feature_group对应了一个tower, tower的input必须和feature_groups的group_name对应
   - dnn: 深度网络
-    - hidden\_units: 定义不同层的channel数目，即神经元数目
-- final\_dnn 整合towers和din\_towers的输入
-  - hidden\_units: dnn每一层的channel数目，即神经元的数目
-- l2\_regularization: L2正则，防止overfit
-- embedding\_regularization: embedding的L2正则
+    - hidden_units: 定义不同层的channel数目，即神经元数目
+- final_dnn 整合towers和din_towers的输入
+  - hidden_units: dnn每一层的channel数目，即神经元的数目
+- l2_regularization: L2正则，防止overfit
+- embedding_regularization: embedding的L2正则
 
 ### 示例config
 
-[multi\_tower\_demo.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/multi-tower.config)
+[multi_tower_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/multi-tower.config)
 
 ### 参考论文
 
diff --git a/docs/source/models/rank.rst b/docs/source/models/rank.rst
index b182c643a..0c0bb0a9a 100644
--- a/docs/source/models/rank.rst
+++ b/docs/source/models/rank.rst
@@ -5,7 +5,10 @@
    :maxdepth: 2
 
    deepfm
-   din
    multi_tower
+   dcn
+   autoint
+   din
+   bst
    regression
    multi_cls
diff --git a/docs/source/models/recall.rst b/docs/source/models/recall.rst
index d2fc1c574..46abe2a98 100644
--- a/docs/source/models/recall.rst
+++ b/docs/source/models/recall.rst
@@ -5,3 +5,5 @@
    :maxdepth: 2
 
    dssm
+   dssm_neg_sampler
+   mind
diff --git a/docs/source/models/regression.md b/docs/source/models/regression.md
index aaa89cc4a..beb172b73 100644
--- a/docs/source/models/regression.md
+++ b/docs/source/models/regression.md
@@ -3,7 +3,7 @@
 回归模型和CTR模型基本一致，只是采用的loss不一样。
 
 如下图所示, 和CTR模型相比增加了:
-loss\_type: L2\_LOSS
+loss_type: L2_LOSS
 
 ```protobuf
 model_config:{
diff --git a/docs/source/models/rocket_launching.md b/docs/source/models/rocket_launching.md
new file mode 100644
index 000000000..172319e6d
--- /dev/null
+++ b/docs/source/models/rocket_launching.md
@@ -0,0 +1,74 @@
+# Rocket Launching
+
+### 简介
+
+像点击率预估这样的在线实时响应系统对响应时间要求非常严格，结构复杂，层数很深的深度模型不能很好的满足严苛的响应时间的限制。
+
+为了获得满足响应时间限制的具有优良表现的模型，采用Rocket Launching框架。训练阶段，同时训练繁简两个复杂度有明显差异的网络，简单的网络称为轻量网络（light net），复杂的网络称为助推器网络（booster net），两网络共享部分参数，分别学习类别标记，此外，轻量网络通过学习助推器的 soft target 来模仿助推器的学习过程，从而得到更好的训练效果。测试阶段，仅采用轻量网络进行预测。
+![autoint.png](../../images/models/rocket_launching.png)
+
+### 模型配置
+
+```protobuf
+model_config: {
+  model_class: 'RocketLaunching'
+  feature_groups: {
+    group_name: 'all'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  rocket_launching {
+   share_dnn {
+      hidden_units: [128, 96, 64]
+    }
+    booster_dnn {
+      hidden_units: [256, 128, 96, 64]
+    }
+    light_dnn{
+      hidden_units:[128, 64]
+    }
+    l2_regularization: 1e-6
+    feature_based_distillation:false
+    feature_distillation_function:1
+  }
+  embedding_regularization:5e-6
+  num_class: 2
+}
+```
+
+- model_class: 'RocketLaunching', 不需要修改。
+- feature_groups: 可配置多个feature_group，group name可以变。
+- rocket_launching: rocket_launching相关的参数。
+  - share_dnn: 共享部分的参数配置。
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - booster_dnn: 助推器网络的参数配置
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - light_dnn:  轻量网络的参数配置
+    - hidden_units: dnn每一层的channel数目，即神经元的数目
+  - feature_based_distillation：是否配置特征蒸馏（默认不配）
+  - feature_distillation_function：中间层相似度衡量指标（COSINE; EUCLID; 默认COSINE）
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+### 示例config
+
+[Rocket_Launching_demo.config](http://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/rocket_launching.config)
+
+### 参考论文
+
+[Rocket Launching: A Universal and Efficient Framework for Training Well-performing Light Net](https://arxiv.org/abs/1708.04106)
diff --git a/docs/source/models/simple_multi_task.md b/docs/source/models/simple_multi_task.md
index 46661f9ea..465a3ea87 100644
--- a/docs/source/models/simple_multi_task.md
+++ b/docs/source/models/simple_multi_task.md
@@ -4,6 +4,8 @@
 
 针对简单的多任务模型,所有任务共享特征和embedding,但是针对每个任务使用单独的Task Tower,任务之间相互独立
 
+![simple_multi_task.png](../../images/models/simple_multi_task.png)
+
 ### 配置说明
 
 ```protobuf
@@ -51,13 +53,17 @@ model_config:{
 }
 ```
 
-- model\_class: 'SimpleMultiTask', 不需要修改
-- feature\_groups: 配置一个名为'all'的feature\_group。
-- simple\_multi\_task: 相关的参数
-  - task\_towers 根据任务数配置task\_towers
-    - tower\_name：任务名
-    - label\_name: tower对应的label名，若不设置，label\_fields需与task\_towers一一对齐
+- model_class: 'SimpleMultiTask', 不需要修改
+- feature_groups: 配置一个名为'all'的feature_group。
+- simple_multi_task: 相关的参数
+  - task_towers 根据任务数配置task_towers
+    - tower_name：任务名
+    - label_name: tower对应的label名，若不设置，label_fields需与task_towers一一对齐
     - dnn deep part的参数配置
-      - hidden\_units: dnn每一层的channel数目，即神经元的数目
-    - 默认为二分类任务，即num\_class默认为1，weight默认为1.0，loss\_type默认为CLASSIFICATION，metrics\_set为auc
-  - embedding\_regularization: 对embedding部分加regularization，防止overfit
+      - hidden_units: dnn每一层的channel数目，即神经元的数目
+    - 默认为二分类任务，即num_class默认为1，weight默认为1.0，loss_type默认为CLASSIFICATION，metrics_set为auc
+  - embedding_regularization: 对embedding部分加regularization，防止overfit
+
+SimpleMultiTask模型每个塔的输出名为："logits\_" / "probs\_" / "y\_" + tower_name
+其中，logits/probs/y对应: sigmoid之前的值/概率/回归模型的预测值
+SimpleMultiTask模型每个塔的指标为：指标名+ "\_" + tower_name
diff --git a/docs/source/models/user_define.md b/docs/source/models/user_define.md
index 4aa7ebdb8..5f1704f10 100644
--- a/docs/source/models/user_define.md
+++ b/docs/source/models/user_define.md
@@ -8,8 +8,7 @@ git submodule init
 git submodule update
 python git-lfs/git_lfs.py pull
 # 运行测试用例确保通过
-python -m easy_rec.python.test.run
-sh scripts/end2end_test.sh
+sh scripts/ci_test.sh
 ```
 
 ### 编写模型proto文件
@@ -29,11 +28,11 @@ message CustomModel {
 };
 ```
 
-#### 修改easy\_rec\_model.proto:
+#### 修改easy_rec_model.proto:
 
-easy\_rec/python/protos/easy\_rec\_model.proto:
+easy_rec/python/protos/easy_rec_model.proto:
 
-- import custom\_model.proto
+- import custom_model.proto
 - 在oneof model里面增加CustomModel
 
 ```protobuf
@@ -94,38 +93,38 @@ sh scripts/gen_proto.sh
 ```
 
 - 如果是Rank模型，则推荐继承自RankModel
-  - 可以复用RankModel的build\_predict\_graph和build\_loss\_graph
-  - 可以利用RankModel中实现的\_add\_to\_prediction\_dict把build\_predict\_graph中DNN的输出加入到self.\_prediction\_dict中，具体参考DeepFM和MultiTower的实现。
+  - 可以复用RankModel的build_predict_graph和build_loss_graph
+  - 可以利用RankModel中实现的_add_to_prediction_dict把build_predict_graph中DNN的输出加入到self.\_prediction_dict中，具体参考DeepFM和MultiTower的实现。
 
-#### 初始化函数: __init__(self, model\_config, feature\_configs, features, labels, is\_training)
+#### 初始化函数: __init__(self, model_config, feature_configs, features, labels, is_training)
 
-- model\_config: 模型配置, easy\_rec.python.protos.easy\_rec\_model\_pb2.EasyRecModel对象
-  - model\_config.custom\_model: easy\_rec.python.protos.custom\_model\_pb2.CustomModel对象，是模型特有的参数
-  - model\_config.feature\_groups: 特征组，如DeepFM包含deep组和wide组，多塔算法包含user组、item组、combo组等
-- feature\_configs: feature column配置，使用self.\_input\_layer可以获得经过feature\_column处理过的特征
+- model_config: 模型配置, easy_rec.python.protos.easy_rec_model_pb2.EasyRecModel对象
+  - model_config.custom_model: easy_rec.python.protos.custom_model_pb2.CustomModel对象，是模型特有的参数
+  - model_config.feature_groups: 特征组，如DeepFM包含deep组和wide组，多塔算法包含user组、item组、combo组等
+- feature_configs: feature column配置，使用self.\_input_layer可以获得经过feature_column处理过的特征
 - features: 原始输入
-- labels: 样本的label
-- is\_training: 是否是训练，其它状态(评估/预测)。
+- labels: 样本的label, 如果estimator的mode是predict或者export时, labels为None, 此时build_loss_graph不会被调用
+- is_training: 是否是训练，其它状态(评估/预测)。
 
-#### 前向函数: build\_predict\_graph
+#### 前向函数: build_predict_graph
 
-- 使用输入的features，使用tensorflow的函数构建深度模型，输出预测值y，预测值y放到self.\_prediction\_dict中
-- Return: self.\_prediction\_dict
+- 使用输入的features，使用tensorflow的函数构建深度模型，输出预测值y，预测值y放到self.\_prediction_dict中
+- Return: self.\_prediction_dict
 
-#### 损失函数: build\_loss\_graph
+#### 损失函数: build_loss_graph
 
-- 使用build\_predict\_graph函数中输出的预测值y和self.\_labels构建损失函数，loss tensor加入到self.\_loss\_dict
+- 使用build_predict_graph函数中输出的预测值y和self.\_labels构建损失函数，loss tensor加入到self.\_loss_dict
 - self.\_labels通常是一个tensor list，如果CustomModel继承自RankModel，那么self.\_labels是一个tensor
-- Return: self.\_loss\_dict
-- loss会被EasyRec框架记录(tf.summary), 写入model\_dir目录下的events.\*文件
+- Return: self.\_loss_dict
+- loss会被EasyRec框架记录(tf.summary), 写入model_dir目录下的events.\*文件
 
-#### 评估函数: build\_metric\_graph(self, eval\_config)
+#### 评估函数: build_metric_graph(self, eval_config)
 
-- eval\_config: easy\_rec.python.protos.eval\_pb2.EvalConfig:
-  - 一般根据其中的metric\_sets来确定要计算哪些metric
-- 使用build\_predict\_graph函数中输出的预测值y和self.\_labels构建metric op
-- Return: dict of {"metric\_name" : metric\_tensor }
-- metric会被EasyRec框架记录(tf.summary), 写入model\_dir目录下的events.\*文件
+- eval_config: easy_rec.python.protos.eval_pb2.EvalConfig:
+  - 一般根据其中的metric_sets来确定要计算哪些metric
+- 使用build_predict_graph函数中输出的预测值y和self.\_labels构建metric op
+- Return: dict of {"metric_name" : metric_tensor }
+- metric会被EasyRec框架记录(tf.summary), 写入model_dir目录下的events.\*文件
 
 ```python
 # easy_rec/python/model/custom_model.py
@@ -161,16 +160,13 @@ class CustomModel(EasyRecModel):
             model_config, feature_configs, features, labels, is_training
         )
         """
-       use feature columns to build complex features from features(input)
-       use self._input_layer to build features from feature_configs:
-    """
-        self._wide_features, _ = self._input_layer(self._feature_dict, "wide")
+        use feature columns to build complex features from input
+        use self._input_layer to build features from feature_configs:
+        self._ctxt_features are a single tensor, where the all features are concatentated into one,
+        self._ctxt_feature_list is a list of tensors, each feature_config lead to one tensor.
         """
-      self._deep_features are a single tensor, where the all features are concatentated into one,
-      self._deep_feature_list is a list of tensors, each feature_config lead to one tensor.
-    """
-        self._deep_features, self._deep_feature_lst = self._input_layer(
-            self._feature_dict, "deep"
+        self._ctxt_features, self._ctxt_feature_lst = self._input_layer(
+            self._feature_dict, "ctxt"
         )
         self._user_features, self._user_feature_lst = self._input_layer(
             self._feature_dict, "user"
@@ -179,13 +175,12 @@ class CustomModel(EasyRecModel):
             self._feature_dict, "item"
         )
         """
-      The deep, user, item corresponds to feature_groups defined in model_config.feature_groups:
-        "deep", "user", "item" are the feature_group names.
-      It is suggested to use the feature_configs to build features.
-      But if the feature_configs could not satified your requirements, you can use tensorflow
-      functions to process the raw inputs in features.
-      ...
-    """
+        The ctxt, user, item corresponds to 3 feature_groups defined in model_config.feature_groups:
+          "ctxt", "user", "item" are the feature_group names.
+        It is suggested to use the feature_configs to build features.
+        But if the feature_configs could not satified your requirements, you can use tensorflow
+        functions to process the raw inputs in features.
+        """
 
         # do some other initializing work
         ...
@@ -217,23 +212,23 @@ class CustomModel(EasyRecModel):
 
 #### Note:
 
-如果是RankModel则直接继承easy\_rec.python.model.rank\_model.RankModel，可以省略:
+如果是RankModel则直接继承easy_rec.python.model.rank_model.RankModel，可以省略:
 
-- build\_loss\_graph
-- build\_metric\_graph
+- build_loss_graph
+- build_metric_graph
 
 因为这两个函数在RankModel里面已经完成了
 
 ### 测试
 
-#### 编写pipeline.config
+#### 编写samples/model_config/custom_model.config
 
 ```protobuf
-# 训练表和测试表，如果在PAI上，会被-Dtables参数覆盖
-train_input_path: "train_longonehot_4deepfm_20.csv"
-eval_input_path: "test_longonehot_4deepfm_20.csv"
+# 训练数据和测试文件路径, 支持多个文件，匹配规则参考glob
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
 # 模型保存路径
-model_dir: "experiment/custom_model_ctr/"
+model_dir: "experiments/custom_model_ctr/"
 
 # 数据相关的描述
 data_config {
@@ -288,28 +283,49 @@ model_config: {
 }
 ```
 
-#### 增加测试数据到data/test/
+#### 测试
 
-#### 增加测试用例到scripts/end2end\_test.sh
+增加测试数据到data/test/
 
 ```bash
-python -m easy_rec.python.train_eval --pipeline_config_path pipeline.config
+python -m easy_rec.python.train_eval --pipeline_config_path samples/model_config/custom_model.config
+```
+
+增加测试用例到easy_rec/python/test/train_eval_test.py
+
+```python
+  def test_custom_model(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/custom_model.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
 ```
 
-运行测试用例
+运行CustomModel测试用例
 
 ```bash
-scripts/end2end_test.sh
+python -m easy_rec.python.test.train_eval_test TrainEvalTest.test_custom_model
 ```
 
-确保所有用例都通过了
+运行所有测试用例
+
+```bash
+scripts/ci_test.sh
+```
 
 #### 提交代码
 
 ```shell
 python git-lfs/git_lfs.py add data/test/your_data_files
 python git-lfs/git_lfs.py push
-git add your_config_file your_code.py
-git commit -a -m "add new model xxx"
+git add easy_rec/python/model/custom_model.py
+git add samples/model_config/custom_model.config
+git add easy_rec/python/protos/custom_model.proto
+git commit -a -m "add custom model"
 git push origin your_branch
 ```
+
+#### 参考
+
+打包、发布[开发指南](../develop.md)
diff --git a/docs/source/models/wide_and_deep.md b/docs/source/models/wide_and_deep.md
new file mode 100644
index 000000000..7f166231d
--- /dev/null
+++ b/docs/source/models/wide_and_deep.md
@@ -0,0 +1,75 @@
+# WideAndDeep
+
+### 简介
+
+WideAndDeep包含Wide和Deep两部分，Wide部分负责记忆，Deep部分负责泛化。Wide部分可以做显式的特征交叉，Deep部分可以实现隐式自动的特征交叉。
+
+![wide_and_deep.png](../../images/models/wide_and_deep.png)
+
+### 配置说明
+
+```protobuf
+model_config:{
+  model_class: "WideAndDeep"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    ...
+    feature_names: "site_id_app_id"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    ...
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  wide_and_deep {
+    wide_output_dim: 16
+
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    l2_regularization: 1e-5
+  }
+  embedding_regularization: 1e-7
+}
+```
+
+- model_class: 'WideAndDeep', 不需要修改
+
+- feature_groups:
+
+  需要两个feature_group: wide group和deep group, **group name不能变**
+
+- wide_and_deep:  wide_and_deep 相关的参数
+
+- dnn: deep part的参数配置
+
+  - hidden_units: dnn每一层的channel数目，即神经元的数目
+
+- wide_output_dim: wide部分输出的大小
+
+- final_dnn: 整合wide part, deep part的参数输入, 可以选择是否使用
+
+  - hidden_units: dnn每一层的channel数目，即神经元的数目
+
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+- input_type: 如果在提交到pai-tf集群上面运行，读取max compute 表作为输入数据，data_config：input_type要设置为OdpsInputV2。
+
+### 示例Config
+
+[WideAndDeep_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/wide_and_deep.config)
+
+### 参考论文
+
+[WideAndDeep](https://arxiv.org/abs/1606.07792)
diff --git "a/docs/source/predict/MaxCompute\347\246\273\347\272\277\351\242\204\346\265\213.md" "b/docs/source/predict/MaxCompute\347\246\273\347\272\277\351\242\204\346\265\213.md"
new file mode 100644
index 000000000..f60135c8c
--- /dev/null
+++ "b/docs/source/predict/MaxCompute\347\246\273\347\272\277\351\242\204\346\265\213.md"
@@ -0,0 +1,57 @@
+# MaxCompute离线预测
+
+### 前置条件：
+
+- 模型训练
+- 模型导出
+
+### 离线预测
+
+```bash
+drop table if exists ctr_test_output;
+pai -name easy_rec_ext
+-Dcmd=predict
+-Dcluster='{"worker" : {"count":5, "cpu":1600,  "memory":40000, "gpu":100}}'
+-Darn=acs:ram::1217060697188167:role/ev-ext-test-oss
+-Dbuckets=oss://easyrec/
+-Dsaved_model_dir=oss://easyrec/easy_rec_test/experiment/ctr_export/1597299619
+-Dinput_table=odps://pai_online_project/tables/test_longonehot_4deepfm_20
+-Doutput_table=odps://pai_online_project/tables/ctr_test_output
+-Dexcluded_cols=label
+-Dreserved_cols=ALL_COLUMNS
+-Dbatch_size=1024
+-DossHost=oss-cn-beijing-internal.aliyuncs.com;
+```
+
+- save_modeld_dir: 导出的模型目录
+- output_table: 输出表，不需要提前创建，会自动创建
+- excluded_cols: 预测模型不需要的columns，比如labels
+- selected_cols: 预测模型需要的columns，selected_cols和excluded_cols不能同时使用
+- reserved_cols: 需要copy到output_table的columns, 和excluded_cols/selected_cols不冲突，如果指定ALL_COLUMNS，则所有的column都被copy到output_table
+- batch_size: minibatch的大小
+- -Darn: rolearn  注意这个的arn要替换成客户自己的。可以从dataworks的设置中查看arn。
+- -DossHost: ossHost地址
+- -Dbuckets: config所在的bucket和保存模型的bucket; 如果有多个bucket，逗号分割
+- 如果是pai内部版,则不需要指定arn和ossHost, arn和ossHost放在-Dbuckets里面
+  - -Dbuckets=oss://easyrec/?role_arn=acs:ram::xxx:role/ev-ext-test-oss&host=oss-cn-beijing-internal.aliyuncs.com
+- output_cols: output_name和类型, 如:
+  - -Doutput_cols="probs double"
+  - 如果有多列，用逗号分割, -Doutput_cols='probs double,embedding string'
+- model_outputs: 导出saved_model时模型的导出字段，可以不指定，默认和output_cols一致
+  - 如果output_cols和model_outputs不一致时需要指定，如:
+  ```sql
+  -Doutput_cols='score double' -Dmodel_outputs='probs'
+  ```
+  - 如果有多列，用逗号分割
+  ```sql
+  -Doutput_cols='scores double,v string'
+  -Dmodel_outputs='probs,embedding'
+  ```
+  - ctr模型(num_class=1)，导出字段:logits、probs，对应: sigmoid之前的值/概率
+  - 回归模型，导出字段: y，对应: 预测值
+  - 多分类模型，导出字段: logits/probs/y，对应: softmax之前的值/概率/类别id
+- lifecyle: output_table的lifecyle
+
+### 输出表schema:
+
+包含reserved_cols和output_cols
diff --git a/docs/source/predict/OnlinePrediction.md b/docs/source/predict/OnlinePrediction.md
new file mode 100644
index 000000000..a5fa36f67
--- /dev/null
+++ b/docs/source/predict/OnlinePrediction.md
@@ -0,0 +1,52 @@
+# Online Prediction
+
+使用easy_rec提供的Predictor进行预测
+
+### 加载模型
+
+```bash
+from easy_rec.python.inference.predictor import Predictor
+
+# export is the directory of saved models
+predictor = Predictor('model/export/')
+```
+
+### 输入格式
+
+1. list 格式
+1. dict 格式
+
+输出是list of dict，dict里面包含一个字段y，即score: 范围在\[0, 1\]之间
+
+```bash
+# interface 1, input is a list of fields, the order of fields
+# must be the same as that of data_config.input_fields
+with open(test_path, 'r') as fin:
+  reader = csv.reader(fin)
+  inputs = []
+  # the first is label, skip first column
+  for row in reader:
+    inputs.append(row[1:])
+  output_res = predictor.predict(inputs, batch_size=32)
+  assert len(output_res) == 63
+  assert abs(output_res[0]['y'] - 0.5726) < 1e-3
+
+# interface 2, input is a dict of fields
+# the field_keys must be the same as data_config.input_fields.input_name
+field_keys = [ "field1", "field2", "field3", "field4", "field5",
+               "field6", "field7", "field8", "field9", "field10",
+               "field11", "field12", "field13", "field14", "field15",
+               "field16", "field17", "field18", "field19", "field20" ]
+with open(test_path, 'r') as fin:
+  reader = csv.reader(fin)
+  inputs = []
+  for row in reader:
+    inputs.append({ f : row[fid+1] for fid, f in enumerate(field_keys) })
+  output_res = predictor.predict(inputs, batch_size=32)
+  assert len(output_res) == 63
+  assert abs(output_res[0]['y'] - 0.5726) < 1e-3
+```
+
+### 使用EAS加载模型
+
+可以使用TF Processor或者自定义Processor, 具体参考:[EAS加载模型](https://help.aliyun.com/document_detail/113696.html?spm=a2c4g.11186623.6.716.69da371b9G94HF)
diff --git a/docs/source/proto.html b/docs/source/proto.html
new file mode 100644
index 000000000..fcf11aa13
--- /dev/null
+++ b/docs/source/proto.html
@@ -0,0 +1,5605 @@
+<!DOCTYPE html>
+
+<html>
+  <head>
+    <title>Protocol Documentation</title>
+    <meta charset="UTF-8">
+    <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Ubuntu:400,700,400italic"/>
+    <style>
+      body {
+        width: 60em;
+        margin: 1em auto;
+        color: #222;
+        font-family: "Ubuntu", sans-serif;
+        padding-bottom: 4em;
+      }
+
+      h1 {
+        font-weight: normal;
+        border-bottom: 1px solid #aaa;
+        padding-bottom: 0.5ex;
+      }
+
+      h2 {
+        border-bottom: 1px solid #aaa;
+        padding-bottom: 0.5ex;
+        margin: 1.5em 0;
+      }
+
+      h3 {
+        font-weight: normal;
+        border-bottom: 1px solid #aaa;
+        padding-bottom: 0.5ex;
+      }
+
+      a {
+        text-decoration: none;
+        color: #567e25;
+      }
+
+      table {
+        width: 100%;
+        font-size: 80%;
+        border-collapse: collapse;
+      }
+
+      thead {
+        font-weight: 700;
+        background-color: #dcdcdc;
+      }
+
+      tbody tr:nth-child(even) {
+        background-color: #fbfbfb;
+      }
+
+      td {
+        border: 1px solid #ccc;
+        padding: 0.5ex 2ex;
+      }
+
+      td p {
+        text-indent: 1em;
+        margin: 0;
+      }
+
+      td p:nth-child(1) {
+        text-indent: 0;  
+      }
+
+       
+      .field-table td:nth-child(1) {  
+        width: 10em;
+      }
+      .field-table td:nth-child(2) {  
+        width: 10em;
+      }
+      .field-table td:nth-child(3) {  
+        width: 6em;
+      }
+      .field-table td:nth-child(4) {  
+        width: auto;
+      }
+
+       
+      .extension-table td:nth-child(1) {  
+        width: 10em;
+      }
+      .extension-table td:nth-child(2) {  
+        width: 10em;
+      }
+      .extension-table td:nth-child(3) {  
+        width: 10em;
+      }
+      .extension-table td:nth-child(4) {  
+        width: 5em;
+      }
+      .extension-table td:nth-child(5) {  
+        width: auto;
+      }
+
+       
+      .enum-table td:nth-child(1) {  
+        width: 10em;
+      }
+      .enum-table td:nth-child(2) {  
+        width: 10em;
+      }
+      .enum-table td:nth-child(3) {  
+        width: auto;
+      }
+
+       
+      .scalar-value-types-table tr {
+        height: 3em;
+      }
+
+       
+      #toc-container ul {
+        list-style-type: none;
+        padding-left: 1em;
+        line-height: 180%;
+        margin: 0;
+      }
+      #toc > li > a {
+        font-weight: bold;
+      }
+
+       
+      .file-heading {
+        width: 100%;
+        display: table;
+        border-bottom: 1px solid #aaa;
+        margin: 4em 0 1.5em 0;
+      }
+      .file-heading h2 {
+        border: none;
+        display: table-cell;
+      }
+      .file-heading a {
+        text-align: right;
+        display: table-cell;
+      }
+
+       
+      .badge {
+        width: 1.6em;
+        height: 1.6em;
+        display: inline-block;
+
+        line-height: 1.6em;
+        text-align: center;
+        font-weight: bold;
+        font-size: 60%;
+
+        color: #89ba48;
+        background-color: #dff0c8;
+
+        margin: 0.5ex 1em 0.5ex -1em;
+        border: 1px solid #fbfbfb;
+        border-radius: 1ex;
+      }
+    </style>
+
+    
+    <link rel="stylesheet" type="text/css" href="stylesheet.css"/>
+  </head>
+
+  <body>
+
+    <h1 id="title">Protocol Documentation</h1>
+
+    <h2>Table of Contents</h2>
+
+    <div id="toc-container">
+      <ul id="toc">
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fautoint.proto">easy_rec/python/protos/autoint.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.AutoInt"><span class="badge">M</span>AutoInt</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fdataset.proto">easy_rec/python/protos/dataset.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.DatasetConfig"><span class="badge">M</span>DatasetConfig</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.DatasetConfig.Field"><span class="badge">M</span>DatasetConfig.Field</a>
+                </li>
+              
+              
+                <li>
+                  <a href="#protos.DatasetConfig.FieldType"><span class="badge">E</span>DatasetConfig.FieldType</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.DatasetConfig.InputType"><span class="badge">E</span>DatasetConfig.InputType</a>
+                </li>
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fdata_source.proto">easy_rec/python/protos/data_source.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.KafkaServer"><span class="badge">M</span>KafkaServer</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fdbmtl.proto">easy_rec/python/protos/dbmtl.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.DBMTL"><span class="badge">M</span>DBMTL</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fdcn.proto">easy_rec/python/protos/dcn.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.CrossTower"><span class="badge">M</span>CrossTower</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.DCN"><span class="badge">M</span>DCN</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fdeepfm.proto">easy_rec/python/protos/deepfm.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.DeepFM"><span class="badge">M</span>DeepFM</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fdnn.proto">easy_rec/python/protos/dnn.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.DNN"><span class="badge">M</span>DNN</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fdssm.proto">easy_rec/python/protos/dssm.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.DSSM"><span class="badge">M</span>DSSM</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.DSSMTower"><span class="badge">M</span>DSSMTower</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2feas_serving.proto">easy_rec/python/protos/eas_serving.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.Config"><span class="badge">M</span>Config</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Config.EmbeddingCombinerEntry"><span class="badge">M</span>Config.EmbeddingCombinerEntry</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Config.EmbeddingMaxNormEntry"><span class="badge">M</span>Config.EmbeddingMaxNormEntry</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Config.EmbeddingsEntry"><span class="badge">M</span>Config.EmbeddingsEntry</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Embedding"><span class="badge">M</span>Embedding</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.EmbeddingPart"><span class="badge">M</span>EmbeddingPart</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.EmbeddingPartData"><span class="badge">M</span>EmbeddingPartData</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Model"><span class="badge">M</span>Model</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.ModelInput"><span class="badge">M</span>ModelInput</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2feasy_rec_model.proto">easy_rec/python/protos/easy_rec_model.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.DummyModel"><span class="badge">M</span>DummyModel</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.EasyRecModel"><span class="badge">M</span>EasyRecModel</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.KD"><span class="badge">M</span>KD</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fesmm.proto">easy_rec/python/protos/esmm.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.ESMM"><span class="badge">M</span>ESMM</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2feval.proto">easy_rec/python/protos/eval.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.AUC"><span class="badge">M</span>AUC</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Accuracy"><span class="badge">M</span>Accuracy</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.EvalConfig"><span class="badge">M</span>EvalConfig</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.EvalMetrics"><span class="badge">M</span>EvalMetrics</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.GAUC"><span class="badge">M</span>GAUC</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Max_F1"><span class="badge">M</span>Max_F1</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.MeanAbsoluteError"><span class="badge">M</span>MeanAbsoluteError</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.MeanSquaredError"><span class="badge">M</span>MeanSquaredError</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.RecallAtTopK"><span class="badge">M</span>RecallAtTopK</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.RootMeanSquaredError"><span class="badge">M</span>RootMeanSquaredError</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.SessionAUC"><span class="badge">M</span>SessionAUC</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fexport.proto">easy_rec/python/protos/export.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.ExportConfig"><span class="badge">M</span>ExportConfig</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.MultiValueFields"><span class="badge">M</span>MultiValueFields</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2ffeature_config.proto">easy_rec/python/protos/feature_config.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.AttentionCombiner"><span class="badge">M</span>AttentionCombiner</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.FeatureConfig"><span class="badge">M</span>FeatureConfig</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.FeatureGroupConfig"><span class="badge">M</span>FeatureGroupConfig</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.MultiHeadAttentionCombiner"><span class="badge">M</span>MultiHeadAttentionCombiner</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.SeqAttGroupConfig"><span class="badge">M</span>SeqAttGroupConfig</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.SeqAttMap"><span class="badge">M</span>SeqAttMap</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.SequenceCombiner"><span class="badge">M</span>SequenceCombiner</a>
+                </li>
+              
+              
+                <li>
+                  <a href="#protos.FeatureConfig.FeatureType"><span class="badge">E</span>FeatureConfig.FeatureType</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.WideOrDeep"><span class="badge">E</span>WideOrDeep</a>
+                </li>
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2ffm.proto">easy_rec/python/protos/fm.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.FM"><span class="badge">M</span>FM</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fhyperparams.proto">easy_rec/python/protos/hyperparams.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.ConstantInitializer"><span class="badge">M</span>ConstantInitializer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.GlorotNormalInitializer"><span class="badge">M</span>GlorotNormalInitializer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Initializer"><span class="badge">M</span>Initializer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.L1L2Regularizer"><span class="badge">M</span>L1L2Regularizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.L1Regularizer"><span class="badge">M</span>L1Regularizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.L2Regularizer"><span class="badge">M</span>L2Regularizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.RandomNormalInitializer"><span class="badge">M</span>RandomNormalInitializer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Regularizer"><span class="badge">M</span>Regularizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.TruncatedNormalInitializer"><span class="badge">M</span>TruncatedNormalInitializer</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2floss.proto">easy_rec/python/protos/loss.proto</a>
+            <ul>
+              
+              
+                <li>
+                  <a href="#protos.LossType"><span class="badge">E</span>LossType</a>
+                </li>
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fmind.proto">easy_rec/python/protos/mind.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.Capsule"><span class="badge">M</span>Capsule</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.MIND"><span class="badge">M</span>MIND</a>
+                </li>
+              
+              
+                <li>
+                  <a href="#protos.MIND.UserSeqCombineMethod"><span class="badge">E</span>MIND.UserSeqCombineMethod</a>
+                </li>
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fmmoe.proto">easy_rec/python/protos/mmoe.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.ExpertTower"><span class="badge">M</span>ExpertTower</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.MMoE"><span class="badge">M</span>MMoE</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fmulti_tower.proto">easy_rec/python/protos/multi_tower.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.BSTTower"><span class="badge">M</span>BSTTower</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.DINTower"><span class="badge">M</span>DINTower</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.MultiTower"><span class="badge">M</span>MultiTower</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2foptimizer.proto">easy_rec/python/protos/optimizer.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.AdagradOptimizer"><span class="badge">M</span>AdagradOptimizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.AdamAsyncOptimizer"><span class="badge">M</span>AdamAsyncOptimizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.AdamAsyncWOptimizer"><span class="badge">M</span>AdamAsyncWOptimizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.AdamOptimizer"><span class="badge">M</span>AdamOptimizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.AdamWOptimizer"><span class="badge">M</span>AdamWOptimizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.ConstantLearningRate"><span class="badge">M</span>ConstantLearningRate</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.CosineDecayLearningRate"><span class="badge">M</span>CosineDecayLearningRate</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.ExponentialDecayLearningRate"><span class="badge">M</span>ExponentialDecayLearningRate</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.LearningRate"><span class="badge">M</span>LearningRate</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.ManualStepLearningRate"><span class="badge">M</span>ManualStepLearningRate</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.ManualStepLearningRate.LearningRateSchedule"><span class="badge">M</span>ManualStepLearningRate.LearningRateSchedule</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.MomentumOptimizer"><span class="badge">M</span>MomentumOptimizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.MomentumWOptimizer"><span class="badge">M</span>MomentumWOptimizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Optimizer"><span class="badge">M</span>Optimizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.PolyDecayLearningRate"><span class="badge">M</span>PolyDecayLearningRate</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.RMSPropOptimizer"><span class="badge">M</span>RMSPropOptimizer</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.TransformerLearningRate"><span class="badge">M</span>TransformerLearningRate</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fpipeline.proto">easy_rec/python/protos/pipeline.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.EasyRecConfig"><span class="badge">M</span>EasyRecConfig</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fsimi.proto">easy_rec/python/protos/simi.proto</a>
+            <ul>
+              
+              
+                <li>
+                  <a href="#protos.Similarity"><span class="badge">E</span>Similarity</a>
+                </li>
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fsimple_multi_task.proto">easy_rec/python/protos/simple_multi_task.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.SimpleMultiTask"><span class="badge">M</span>SimpleMultiTask</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2ftower.proto">easy_rec/python/protos/tower.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.BayesTaskTower"><span class="badge">M</span>BayesTaskTower</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.TaskTower"><span class="badge">M</span>TaskTower</a>
+                </li>
+              
+                <li>
+                  <a href="#protos.Tower"><span class="badge">M</span>Tower</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2ftrain.proto">easy_rec/python/protos/train.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.TrainConfig"><span class="badge">M</span>TrainConfig</a>
+                </li>
+              
+              
+                <li>
+                  <a href="#protos.DistributionStrategy"><span class="badge">E</span>DistributionStrategy</a>
+                </li>
+              
+              
+              
+            </ul>
+          </li>
+        
+          
+          <li>
+            <a href="#easy_rec%2fpython%2fprotos%2fwide_and_deep.proto">easy_rec/python/protos/wide_and_deep.proto</a>
+            <ul>
+              
+                <li>
+                  <a href="#protos.WideAndDeep"><span class="badge">M</span>WideAndDeep</a>
+                </li>
+              
+              
+              
+              
+            </ul>
+          </li>
+        
+        <li><a href="#scalar-value-types">Scalar Value Types</a></li>
+      </ul>
+    </div>
+
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/autoint.proto">easy_rec/python/protos/autoint.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.AutoInt">AutoInt</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>multi_head_num</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre>The number of heads Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>multi_head_size</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre>The dimension of heads </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>interacting_layer_num</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre>The number of interacting layers Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/dataset.proto">easy_rec/python/protos/dataset.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.DatasetConfig">DatasetConfig</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>batch_size</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>mini batch size to use for training and evaluation. Default: 32</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>auto_expand_input_fields</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>set auto_expand_input_fields to true to
+auto_expand field[1-21] to field1, field2, ..., field21 Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>label_fields</td>
+                  <td><a href="#string">string</a></td>
+                  <td>repeated</td>
+                  <td><pre>label fields, normally only one field is used.
+For multiple target models such as MMOE
+multiple label_fields will be set. </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>label_sep</td>
+                  <td><a href="#string">string</a></td>
+                  <td>repeated</td>
+                  <td><pre>label separator </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>label_dim</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>repeated</td>
+                  <td><pre>label dimensions which need to be set when there
+are labels have dimension &gt; 1 </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>shuffle</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>whether to shuffle data Default: true</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>shuffle_buffer_size</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>optional</td>
+                  <td><pre>shufffle buffer for better performance, even shuffle buffer is set,
+it is suggested to do full data shuffle before training
+especially when the performance of models is not good. Default: 32</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_epochs</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>The number of times a data source is read. If set to zero, the data source
+will be reused indefinitely. Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>prefetch_size</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>Number of decoded records to prefetch before batching. Default: 512</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>shard</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>shard dataset to 1/num_workers in distribute mode Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>input_type</td>
+                  <td><a href="#protos.DatasetConfig.InputType">DatasetConfig.InputType</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>separator</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>separator of column features, only used for CSVInput*
+not used in OdpsInput*
+binary separators are supported:
+  CTRL&#43;A could be set as &#39;\001&#39;
+  CTRL&#43;B could be set as &#39;\002&#39;
+  CTRL&#43;C could be set as &#39;\003&#39;
+for RTPInput and OdpsRTPInput it is usually set
+to &#39;\002&#39; Default: ,</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_parallel_calls</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>parallel preproces of raw data, avoid using too small
+or too large numbers(suggested be to small than
+number of the cores) Default: 8</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>selected_cols</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>only used for OdpsInput/OdpsInputV2/OdpsRTPInput, comma separated
+for RTPInput, selected_cols use indices as column names
+ such as &#39;1,2,4&#39;, where 1,2 are label columns, and
+ 4 is the feature column, column 0,3 are not used, </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>selected_col_types</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>selected col types, only used for OdpsInput/OdpsInputV2
+to avoid error setting of data types </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>input_fields</td>
+                  <td><a href="#protos.DatasetConfig.Field">DatasetConfig.Field</a></td>
+                  <td>repeated</td>
+                  <td><pre>the input fields must be the same number and in the
+same order as data in csv files or odps tables </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>rtp_separator</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>for RTPInput only Default: ;</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.DatasetConfig.Field">DatasetConfig.Field</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>input_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>input_type</td>
+                  <td><a href="#protos.DatasetConfig.FieldType">DatasetConfig.FieldType</a></td>
+                  <td>required</td>
+                  <td><pre> Default: STRING</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>default_val</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>input_dim</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+        <h3 id="protos.DatasetConfig.FieldType">DatasetConfig.FieldType</h3>
+        <pre></pre>
+        <table class="enum-table">
+          <thead>
+            <tr><td>Name</td><td>Number</td><td>Description</td></tr>
+          </thead>
+          <tbody>
+            
+              <tr>
+                <td>INT32</td>
+                <td>0</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>INT64</td>
+                <td>1</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>STRING</td>
+                <td>2</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>FLOAT</td>
+                <td>4</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>DOUBLE</td>
+                <td>5</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>BOOL</td>
+                <td>6</td>
+                <td><pre></pre></td>
+              </tr>
+            
+          </tbody>
+        </table>
+      
+        <h3 id="protos.DatasetConfig.InputType">DatasetConfig.InputType</h3>
+        <pre></pre>
+        <table class="enum-table">
+          <thead>
+            <tr><td>Name</td><td>Number</td><td>Description</td></tr>
+          </thead>
+          <tbody>
+            
+              <tr>
+                <td>CSVInput</td>
+                <td>0</td>
+                <td><pre>csv format input, could be used in local or hdfs</pre></td>
+              </tr>
+            
+              <tr>
+                <td>CSVInputV2</td>
+                <td>1</td>
+                <td><pre>@Depreciated</pre></td>
+              </tr>
+            
+              <tr>
+                <td>OdpsInput</td>
+                <td>2</td>
+                <td><pre>@Depreciated, has memory leak problem</pre></td>
+              </tr>
+            
+              <tr>
+                <td>OdpsInputV2</td>
+                <td>3</td>
+                <td><pre>odps input, used on pai</pre></td>
+              </tr>
+            
+              <tr>
+                <td>OdpsInputV3</td>
+                <td>9</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>RTPInput</td>
+                <td>4</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>RTPInputV2</td>
+                <td>5</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>OdpsRTPInput</td>
+                <td>6</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>DummyInput</td>
+                <td>7</td>
+                <td><pre>for the purpose to debug performance bottleneck of
+input pipelines</pre></td>
+              </tr>
+            
+              <tr>
+                <td>KafkaInput</td>
+                <td>8</td>
+                <td><pre></pre></td>
+              </tr>
+            
+          </tbody>
+        </table>
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/data_source.proto">easy_rec/python/protos/data_source.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.KafkaServer">KafkaServer</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>server</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>topic</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>group</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>partitions</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>offset</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/dbmtl.proto">easy_rec/python/protos/dbmtl.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.DBMTL">DBMTL</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>bottom_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>optional</td>
+                  <td><pre>shared bottom dnn layer </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>expert_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>optional</td>
+                  <td><pre>mmoe expert dnn layer definition </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_expert</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>number of mmoe experts Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>task_towers</td>
+                  <td><a href="#protos.BayesTaskTower">BayesTaskTower</a></td>
+                  <td>repeated</td>
+                  <td><pre>bayes task tower </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>l2 regularization Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/dcn.proto">easy_rec/python/protos/dcn.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.CrossTower">CrossTower</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>input</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>cross_num</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre>The number of cross layers Default: 3</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.DCN">DCN</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>deep_tower</td>
+                  <td><a href="#protos.Tower">Tower</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>cross_tower</td>
+                  <td><a href="#protos.CrossTower">CrossTower</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>final_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/deepfm.proto">easy_rec/python/protos/deepfm.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.DeepFM">DeepFM</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>final_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>wide_output_dim</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>wide_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>deprecated Default: 0.0001</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dense_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>deprecated Default: 0.0001</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/dnn.proto">easy_rec/python/protos/dnn.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.DNN">DNN</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>hidden_units</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>repeated</td>
+                  <td><pre>hidden units for each layer </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dropout_ratio</td>
+                  <td><a href="#float">float</a></td>
+                  <td>repeated</td>
+                  <td><pre>ratio of dropout </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>activation</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>activation function Default: tf.nn.relu</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>use_bn</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>use batch normalization Default: true</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/dssm.proto">easy_rec/python/protos/dssm.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.DSSM">DSSM</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>user_tower</td>
+                  <td><a href="#protos.DSSMTower">DSSMTower</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>item_tower</td>
+                  <td><a href="#protos.DSSMTower">DSSMTower</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>simi_func</td>
+                  <td><a href="#protos.Similarity">Similarity</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: COSINE</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.DSSMTower">DSSMTower</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>id</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/eas_serving.proto">easy_rec/python/protos/eas_serving.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.Config">Config</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>column_delim</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre>例如输入特征为&#34;1005,109;0;93eaba74&#34;,此时分号分割的为column，
+逗号分割的为每个column的多个feature, 下划线分割为feature名字和对应的value。 </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>feature_delim</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>hash</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre>指定字符串hash分桶的算法，支持HarmHash（对应于tf.strings.to_hash_bucket_fast()）
+和SipHash(对应于tf.strings.to_hash_bucket_strong())两种字符串hash分桶算法 </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>embeddings</td>
+                  <td><a href="#protos.Config.EmbeddingsEntry">Config.EmbeddingsEntry</a></td>
+                  <td>repeated</td>
+                  <td><pre>embedding_name to embedding </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>embedding_max_norm</td>
+                  <td><a href="#protos.Config.EmbeddingMaxNormEntry">Config.EmbeddingMaxNormEntry</a></td>
+                  <td>repeated</td>
+                  <td><pre>指定embedding lookup的结果的最大L2-norm </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>embedding_combiner</td>
+                  <td><a href="#protos.Config.EmbeddingCombinerEntry">Config.EmbeddingCombinerEntry</a></td>
+                  <td>repeated</td>
+                  <td><pre>指定embedding的combiner策略，支持sum, mean和sqrtn </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>model</td>
+                  <td><a href="#protos.Model">Model</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.Config.EmbeddingCombinerEntry">Config.EmbeddingCombinerEntry</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>key</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>value</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.Config.EmbeddingMaxNormEntry">Config.EmbeddingMaxNormEntry</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>key</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>value</td>
+                  <td><a href="#float">float</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.Config.EmbeddingsEntry">Config.EmbeddingsEntry</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>key</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>value</td>
+                  <td><a href="#protos.Embedding">Embedding</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.Embedding">Embedding</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>partition_num</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td></td>
+                  <td><pre>指定该embedding切分的总数 </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>parts</td>
+                  <td><a href="#protos.EmbeddingPart">EmbeddingPart</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.EmbeddingPart">EmbeddingPart</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>embedding_part_path</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre>指定EmbeddingPartData(*.pb)所在的路径 </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>partition_id</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td></td>
+                  <td><pre>指定该embedding part所属第几个part </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>shape</td>
+                  <td><a href="#int64">int64</a></td>
+                  <td>repeated</td>
+                  <td><pre>指定该embedding part的shape(可以从EmbeddingPartData中读取) </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>deploy_strategy</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre>embedding part的部署策略, 支持本地部署（local）和远程部署(remote) </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.EmbeddingPartData">EmbeddingPartData</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>shape</td>
+                  <td><a href="#int64">int64</a></td>
+                  <td>repeated</td>
+                  <td><pre>Shape of the embedding </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>data</td>
+                  <td><a href="#float">float</a></td>
+                  <td>repeated</td>
+                  <td><pre>Data </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.Model">Model</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>model_path</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre>指定模型所在路径，便于加载模型 </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>model_signature_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre>指定模型的sinature的名字 </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>model_inputs</td>
+                  <td><a href="#protos.ModelInput">ModelInput</a></td>
+                  <td>repeated</td>
+                  <td><pre>model input description </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.ModelInput">ModelInput</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>feature_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>embedding_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>placeholder_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>weight_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td></td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/easy_rec_model.proto">easy_rec/python/protos/easy_rec_model.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.DummyModel">DummyModel</h3>
+        <pre>for input performance test</pre>
+
+        
+
+        
+      
+        <h3 id="protos.EasyRecModel">EasyRecModel</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>model_class</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>feature_groups</td>
+                  <td><a href="#protos.FeatureGroupConfig">FeatureGroupConfig</a></td>
+                  <td>repeated</td>
+                  <td><pre>actually input layers, each layer produce a group of feature </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dummy</td>
+                  <td><a href="#protos.DummyModel">DummyModel</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>wide_and_deep</td>
+                  <td><a href="#protos.WideAndDeep">WideAndDeep</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>deepfm</td>
+                  <td><a href="#protos.DeepFM">DeepFM</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>multi_tower</td>
+                  <td><a href="#protos.MultiTower">MultiTower</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>fm</td>
+                  <td><a href="#protos.FM">FM</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dcn</td>
+                  <td><a href="#protos.DCN">DCN</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>autoint</td>
+                  <td><a href="#protos.AutoInt">AutoInt</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dssm</td>
+                  <td><a href="#protos.DSSM">DSSM</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>mind</td>
+                  <td><a href="#protos.MIND">MIND</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>mmoe</td>
+                  <td><a href="#protos.MMoE">MMoE</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>esmm</td>
+                  <td><a href="#protos.ESMM">ESMM</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dbmtl</td>
+                  <td><a href="#protos.DBMTL">DBMTL</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>simple_multi_task</td>
+                  <td><a href="#protos.SimpleMultiTask">SimpleMultiTask</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>seq_att_groups</td>
+                  <td><a href="#protos.SeqAttGroupConfig">SeqAttGroupConfig</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>embedding_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>implemented in easy_rec/python/model/easy_rec_estimator
+add regularization to all variables with &#34;embedding_weights:&#34;
+in name Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>loss_type</td>
+                  <td><a href="#protos.LossType">LossType</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: CLASSIFICATION</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_class</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>use_embedding_variable</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>kd</td>
+                  <td><a href="#protos.KD">KD</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.KD">KD</h3>
+        <pre>for knowledge distillation</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>loss_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>pred_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>pred_is_logits</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>default to be logits Default: true</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>soft_label_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre>for CROSS_ENTROPY_LOSS, soft_label must be logits instead of probs </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>label_is_logits</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>default to be logits Default: true</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>loss_type</td>
+                  <td><a href="#protos.LossType">LossType</a></td>
+                  <td>required</td>
+                  <td><pre>currently only support CROSS_ENTROPY_LOSS and L2_LOSS </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>loss_weight</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>temperature</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>only for loss_type == CROSS_ENTROPY_LOSS Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/esmm.proto">easy_rec/python/protos/esmm.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.ESMM">ESMM</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>groups</td>
+                  <td><a href="#protos.Tower">Tower</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>ctr_tower</td>
+                  <td><a href="#protos.TaskTower">TaskTower</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>cvr_tower</td>
+                  <td><a href="#protos.TaskTower">TaskTower</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/eval.proto">easy_rec/python/protos/eval.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.AUC">AUC</h3>
+        <pre></pre>
+
+        
+
+        
+      
+        <h3 id="protos.Accuracy">Accuracy</h3>
+        <pre></pre>
+
+        
+
+        
+      
+        <h3 id="protos.EvalConfig">EvalConfig</h3>
+        <pre>Message for configuring DetectionModel evaluation jobs (eval.py).</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>num_examples</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>Number of examples to process of evaluation. Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>eval_interval_secs</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>How often to run evaluation. Default: 300</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>max_evals</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>Maximum number of times to run evaluation. If set to 0, will run forever. Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>save_graph</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>Whether the TensorFlow graph used for evaluation should be saved to disk. Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>metrics_set</td>
+                  <td><a href="#protos.EvalMetrics">EvalMetrics</a></td>
+                  <td>repeated</td>
+                  <td><pre>Type of metrics to use for evaluation.
+possible values: </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>eval_online</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>Evaluation online with batch forward data of training Default: false</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.EvalMetrics">EvalMetrics</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>auc</td>
+                  <td><a href="#protos.AUC">AUC</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>recall_at_topk</td>
+                  <td><a href="#protos.RecallAtTopK">RecallAtTopK</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>mean_absolute_error</td>
+                  <td><a href="#protos.MeanAbsoluteError">MeanAbsoluteError</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>mean_squared_error</td>
+                  <td><a href="#protos.MeanSquaredError">MeanSquaredError</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>accuracy</td>
+                  <td><a href="#protos.Accuracy">Accuracy</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>max_f1</td>
+                  <td><a href="#protos.Max_F1">Max_F1</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>root_mean_squared_error</td>
+                  <td><a href="#protos.RootMeanSquaredError">RootMeanSquaredError</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>gauc</td>
+                  <td><a href="#protos.GAUC">GAUC</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>session_auc</td>
+                  <td><a href="#protos.SessionAUC">SessionAUC</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.GAUC">GAUC</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>uid_field</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre>uid field name </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>reduction</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>reduction method for auc of different users
+* &#34;mean&#34;: simple mean of different users
+* &#34;mean_by_sample_num&#34;: weighted mean with sample num of different users
+* &#34;mean_by_positive_num&#34;: weighted mean with positive sample num of different users Default: mean</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.Max_F1">Max_F1</h3>
+        <pre></pre>
+
+        
+
+        
+      
+        <h3 id="protos.MeanAbsoluteError">MeanAbsoluteError</h3>
+        <pre></pre>
+
+        
+
+        
+      
+        <h3 id="protos.MeanSquaredError">MeanSquaredError</h3>
+        <pre></pre>
+
+        
+
+        
+      
+        <h3 id="protos.RecallAtTopK">RecallAtTopK</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>topk</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 5</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.RootMeanSquaredError">RootMeanSquaredError</h3>
+        <pre></pre>
+
+        
+
+        
+      
+        <h3 id="protos.SessionAUC">SessionAUC</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>session_id_field</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre>session id field name </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>reduction</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>reduction: reduction method for auc of different sessions
+* &#34;mean&#34;: simple mean of different sessions
+* &#34;mean_by_sample_num&#34;: weighted mean with sample num of different sessions
+* &#34;mean_by_positive_num&#34;: weighted mean with positive sample num of different sessions Default: mean</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/export.proto">easy_rec/python/protos/export.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.ExportConfig">ExportConfig</h3>
+        <pre>Message for configuring exporting models.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>batch_size</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>optional</td>
+                  <td><pre>batch size used for exported model, -1 indicates batch_size is None
+which is only supported by classification model right now, while
+other models support static batch_size Default: -1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>exporter_type</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>type of exporter [final | latest | best | none] when train_and_evaluation
+final: performs a single export in the end of training
+latest: regularly exports the serving graph and checkpoints
+latest: export the best model according to best_exporter_metric
+none: do not perform export Default: final</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dump_embedding_shape</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>for large embedding models to serve on eas
+embedding lookup is done outside of tensorflow graph;
+so the tensorflow graph contains only the dnn graphs(the attention part included);
+the lookuped results are passed to dnn graphs via embedding placeholders;
+we dump embedding placeholder shapes, so that embedding
+placeholders could be built. Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>best_exporter_metric</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>the metric used to determine the best checkpoint Default: auc</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>metric_bigger</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>metric value the bigger the best Default: true</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>multi_placeholder</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>each feature has a placeholder Default: true</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>exports_to_keep</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>optional</td>
+                  <td><pre>export to keep, only for exporter_type in [best, latest] Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>multi_value_fields</td>
+                  <td><a href="#protos.MultiValueFields">MultiValueFields</a></td>
+                  <td>optional</td>
+                  <td><pre>multi value field list </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>placeholder_named_by_input</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>is placeholder named by input Default: false</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.MultiValueFields">MultiValueFields</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>input_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/feature_config.proto">easy_rec/python/protos/feature_config.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.AttentionCombiner">AttentionCombiner</h3>
+        <pre></pre>
+
+        
+
+        
+      
+        <h3 id="protos.FeatureConfig">FeatureConfig</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>feature_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>input_names</td>
+                  <td><a href="#string">string</a></td>
+                  <td>repeated</td>
+                  <td><pre>input field names: must be included in DatasetConfig.input_fields </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>feature_type</td>
+                  <td><a href="#protos.FeatureConfig.FeatureType">FeatureConfig.FeatureType</a></td>
+                  <td>required</td>
+                  <td><pre> Default: IdFeature</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>embedding_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>embedding_dim</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>hash_bucket_size</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_buckets</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>for categorical_column_with_identity Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>boundaries</td>
+                  <td><a href="#float">float</a></td>
+                  <td>repeated</td>
+                  <td><pre>only for raw features </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>separator</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>separator with in features Default: |</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>kv_separator</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>delimeter to separator key from value </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>seq_multi_sep</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>delimeter to separate sequence multi-values </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>vocab_file</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>vocab_list</td>
+                  <td><a href="#string">string</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>shared_names</td>
+                  <td><a href="#string">string</a></td>
+                  <td>repeated</td>
+                  <td><pre>many other field share this config </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>lookup_max_sel_elem_num</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>optional</td>
+                  <td><pre>lookup max select element number, default 10 Default: 10</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>max_partitions</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>optional</td>
+                  <td><pre>max_partitions Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>combiner</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>combiner Default: mean</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>initializer</td>
+                  <td><a href="#protos.Initializer">Initializer</a></td>
+                  <td>optional</td>
+                  <td><pre>embedding initializer </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>precision</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>optional</td>
+                  <td><pre>number of digits kept after dot in format float/double to string
+scientific format is not used.
+in default it is not allowed to convert float/double to string Default: -1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>min_val</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>normalize raw feature to [0-1] Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>max_val</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>raw_input_dim</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>raw feature of multiple dimensions Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>sequence_combiner</td>
+                  <td><a href="#protos.SequenceCombiner">SequenceCombiner</a></td>
+                  <td>optional</td>
+                  <td><pre>sequence feature combiner </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.FeatureGroupConfig">FeatureGroupConfig</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>group_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>feature_names</td>
+                  <td><a href="#string">string</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>wide_deep</td>
+                  <td><a href="#protos.WideOrDeep">WideOrDeep</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: DEEP</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.MultiHeadAttentionCombiner">MultiHeadAttentionCombiner</h3>
+        <pre></pre>
+
+        
+
+        
+      
+        <h3 id="protos.SeqAttGroupConfig">SeqAttGroupConfig</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>group_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>seq_att_map</td>
+                  <td><a href="#protos.SeqAttMap">SeqAttMap</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.SeqAttMap">SeqAttMap</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>key</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>hist_seq</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.SequenceCombiner">SequenceCombiner</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>attention</td>
+                  <td><a href="#protos.AttentionCombiner">AttentionCombiner</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>multi_head_attention</td>
+                  <td><a href="#protos.MultiHeadAttentionCombiner">MultiHeadAttentionCombiner</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+        <h3 id="protos.FeatureConfig.FeatureType">FeatureConfig.FeatureType</h3>
+        <pre></pre>
+        <table class="enum-table">
+          <thead>
+            <tr><td>Name</td><td>Number</td><td>Description</td></tr>
+          </thead>
+          <tbody>
+            
+              <tr>
+                <td>IdFeature</td>
+                <td>0</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>RawFeature</td>
+                <td>1</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>TagFeature</td>
+                <td>2</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>ComboFeature</td>
+                <td>3</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>LookupFeature</td>
+                <td>4</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>SequenceFeature</td>
+                <td>5</td>
+                <td><pre></pre></td>
+              </tr>
+            
+          </tbody>
+        </table>
+      
+        <h3 id="protos.WideOrDeep">WideOrDeep</h3>
+        <pre></pre>
+        <table class="enum-table">
+          <thead>
+            <tr><td>Name</td><td>Number</td><td>Description</td></tr>
+          </thead>
+          <tbody>
+            
+              <tr>
+                <td>DEEP</td>
+                <td>0</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>WIDE</td>
+                <td>1</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>WIDE_AND_DEEP</td>
+                <td>2</td>
+                <td><pre></pre></td>
+              </tr>
+            
+          </tbody>
+        </table>
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/fm.proto">easy_rec/python/protos/fm.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.FM">FM</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/hyperparams.proto">easy_rec/python/protos/hyperparams.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.ConstantInitializer">ConstantInitializer</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>consts</td>
+                  <td><a href="#float">float</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.GlorotNormalInitializer">GlorotNormalInitializer</h3>
+        <pre></pre>
+
+        
+
+        
+      
+        <h3 id="protos.Initializer">Initializer</h3>
+        <pre>Proto with one-of field for initializers.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>truncated_normal_initializer</td>
+                  <td><a href="#protos.TruncatedNormalInitializer">TruncatedNormalInitializer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>random_normal_initializer</td>
+                  <td><a href="#protos.RandomNormalInitializer">RandomNormalInitializer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>glorot_normal_initializer</td>
+                  <td><a href="#protos.GlorotNormalInitializer">GlorotNormalInitializer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>constant_initializer</td>
+                  <td><a href="#protos.ConstantInitializer">ConstantInitializer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.L1L2Regularizer">L1L2Regularizer</h3>
+        <pre>Configuration proto for L2 Regularizer.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>scale_l1</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>scale_l2</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.L1Regularizer">L1Regularizer</h3>
+        <pre>Configuration proto for L1 Regularizer.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>scale</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.L2Regularizer">L2Regularizer</h3>
+        <pre>Configuration proto for L2 Regularizer.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>scale</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.RandomNormalInitializer">RandomNormalInitializer</h3>
+        <pre>Configuration proto for random normal initializer. See</pre><pre>https://www.tensorflow.org/api_docs/python/tf/random_normal_initializer</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>mean</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>stddev</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.Regularizer">Regularizer</h3>
+        <pre>Proto with one-of field for regularizers.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>l1_regularizer</td>
+                  <td><a href="#protos.L1Regularizer">L1Regularizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularizer</td>
+                  <td><a href="#protos.L2Regularizer">L2Regularizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l1_l2_regularizer</td>
+                  <td><a href="#protos.L1L2Regularizer">L1L2Regularizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.TruncatedNormalInitializer">TruncatedNormalInitializer</h3>
+        <pre>Configuration proto for truncated normal initializer. See</pre><pre>https://www.tensorflow.org/api_docs/python/tf/truncated_normal_initializer</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>mean</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>stddev</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/loss.proto">easy_rec/python/protos/loss.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+
+      
+        <h3 id="protos.LossType">LossType</h3>
+        <pre></pre>
+        <table class="enum-table">
+          <thead>
+            <tr><td>Name</td><td>Number</td><td>Description</td></tr>
+          </thead>
+          <tbody>
+            
+              <tr>
+                <td>CLASSIFICATION</td>
+                <td>0</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>L2_LOSS</td>
+                <td>1</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>SIGMOID_L2_LOSS</td>
+                <td>2</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>CROSS_ENTROPY_LOSS</td>
+                <td>3</td>
+                <td><pre>crossentropy loss/log loss</pre></td>
+              </tr>
+            
+          </tbody>
+        </table>
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/mind.proto">easy_rec/python/protos/mind.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.Capsule">Capsule</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>max_k</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>max number of high capsules Default: 5</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>max_seq_len</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre>max behaviour sequence length </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>high_dim</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre>high capsule embedding vector dimension </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_iters</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>number EM iterations Default: 3</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>routing_logits_scale</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>routing logits scale Default: 20</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>routing_logits_stddev</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>routing logits initial stddev Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.MIND">MIND</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>pre_capsule_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>optional</td>
+                  <td><pre>preprocessing dnn before entering capsule layer </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>user_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre>dnn layers applied on concated results of
+capsule output and user_context(none sequence features) </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>user_seq_combine</td>
+                  <td><a href="#protos.MIND.UserSeqCombineMethod">MIND.UserSeqCombineMethod</a></td>
+                  <td>optional</td>
+                  <td><pre>method to combine several user sequences
+such as item_ids, category_ids Default: SUM</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>item_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre>dnn layers applied on item features </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>capsule_config</td>
+                  <td><a href="#protos.Capsule">Capsule</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>simi_pow</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>similarity power, the paper says that the big
+the better Default: 10</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>simi_func</td>
+                  <td><a href="#protos.Similarity">Similarity</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: COSINE</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+        <h3 id="protos.MIND.UserSeqCombineMethod">MIND.UserSeqCombineMethod</h3>
+        <pre></pre>
+        <table class="enum-table">
+          <thead>
+            <tr><td>Name</td><td>Number</td><td>Description</td></tr>
+          </thead>
+          <tbody>
+            
+              <tr>
+                <td>CONCAT</td>
+                <td>0</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>SUM</td>
+                <td>1</td>
+                <td><pre></pre></td>
+              </tr>
+            
+          </tbody>
+        </table>
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/mmoe.proto">easy_rec/python/protos/mmoe.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.ExpertTower">ExpertTower</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>expert_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.MMoE">MMoE</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>experts</td>
+                  <td><a href="#protos.ExpertTower">ExpertTower</a></td>
+                  <td>repeated</td>
+                  <td><pre>deprecated: original mmoe experts config </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>expert_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>optional</td>
+                  <td><pre>mmoe expert dnn layer definition </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_expert</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>number of mmoe experts Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>task_towers</td>
+                  <td><a href="#protos.TaskTower">TaskTower</a></td>
+                  <td>repeated</td>
+                  <td><pre>task tower </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre>l2 regularization Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/multi_tower.proto">easy_rec/python/protos/multi_tower.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.BSTTower">BSTTower</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>input</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>seq_len</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 5</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>multi_head_size</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 4</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.DINTower">DINTower</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>input</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.MultiTower">MultiTower</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>towers</td>
+                  <td><a href="#protos.Tower">Tower</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>final_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>din_towers</td>
+                  <td><a href="#protos.DINTower">DINTower</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>bst_towers</td>
+                  <td><a href="#protos.BSTTower">BSTTower</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/optimizer.proto">easy_rec/python/protos/optimizer.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.AdagradOptimizer">AdagradOptimizer</h3>
+        <pre>Configuration message for the AdagradOptimizer</pre><pre>See: https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#protos.LearningRate">LearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.AdamAsyncOptimizer">AdamAsyncOptimizer</h3>
+        <pre>Only available on pai-tf, which has better performance than AdamOptimizer</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#protos.LearningRate">LearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>beta1</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.9</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>beta2</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.999</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.AdamAsyncWOptimizer">AdamAsyncWOptimizer</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#protos.LearningRate">LearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>weight_decay</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1e-06</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>beta1</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.9</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>beta2</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.999</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.AdamOptimizer">AdamOptimizer</h3>
+        <pre>Configuration message for the AdamOptimizer</pre><pre>See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#protos.LearningRate">LearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>beta1</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.9</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>beta2</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.999</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.AdamWOptimizer">AdamWOptimizer</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#protos.LearningRate">LearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>weight_decay</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1e-06</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>beta1</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.9</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>beta2</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.999</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.ConstantLearningRate">ConstantLearningRate</h3>
+        <pre>Configuration message for a constant learning rate.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.002</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.CosineDecayLearningRate">CosineDecayLearningRate</h3>
+        <pre>Configuration message for a cosine decaying learning rate as defined in</pre><pre>utils/learning_schedules.py</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate_base</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.002</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>total_steps</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 4000000</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>warmup_learning_rate</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.0002</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>warmup_steps</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 10000</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>hold_base_rate_steps</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.ExponentialDecayLearningRate">ExponentialDecayLearningRate</h3>
+        <pre>Configuration message for an exponentially decaying learning rate.</pre><pre>See https://www.tensorflow.org/versions/master/api_docs/python/train/ \</pre><pre>decaying_the_learning_rate#exponential_decay</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>initial_learning_rate</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.002</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>decay_steps</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 4000000</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>decay_factor</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.95</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>staircase</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: true</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>burnin_learning_rate</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>burnin_steps</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>min_learning_rate</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.LearningRate">LearningRate</h3>
+        <pre>Configuration message for optimizer learning rate.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>constant_learning_rate</td>
+                  <td><a href="#protos.ConstantLearningRate">ConstantLearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>exponential_decay_learning_rate</td>
+                  <td><a href="#protos.ExponentialDecayLearningRate">ExponentialDecayLearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>manual_step_learning_rate</td>
+                  <td><a href="#protos.ManualStepLearningRate">ManualStepLearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>cosine_decay_learning_rate</td>
+                  <td><a href="#protos.CosineDecayLearningRate">CosineDecayLearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>poly_decay_learning_rate</td>
+                  <td><a href="#protos.PolyDecayLearningRate">PolyDecayLearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>transformer_learning_rate</td>
+                  <td><a href="#protos.TransformerLearningRate">TransformerLearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.ManualStepLearningRate">ManualStepLearningRate</h3>
+        <pre>Configuration message for a manually defined learning rate schedule.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>initial_learning_rate</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.002</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>schedule</td>
+                  <td><a href="#protos.ManualStepLearningRate.LearningRateSchedule">ManualStepLearningRate.LearningRateSchedule</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>warmup</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>Whether to linearly interpolate learning rates for steps in
+[0, schedule[0].step]. Default: false</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.ManualStepLearningRate.LearningRateSchedule">ManualStepLearningRate.LearningRateSchedule</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>step</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.002</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.MomentumOptimizer">MomentumOptimizer</h3>
+        <pre>Configuration message for the MomentumOptimizer</pre><pre>See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#protos.LearningRate">LearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>momentum_optimizer_value</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.9</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.MomentumWOptimizer">MomentumWOptimizer</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#protos.LearningRate">LearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>weight_decay</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1e-06</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>momentum_optimizer_value</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.9</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.Optimizer">Optimizer</h3>
+        <pre>Top level optimizer message.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>rms_prop_optimizer</td>
+                  <td><a href="#protos.RMSPropOptimizer">RMSPropOptimizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>momentum_optimizer</td>
+                  <td><a href="#protos.MomentumOptimizer">MomentumOptimizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>adam_optimizer</td>
+                  <td><a href="#protos.AdamOptimizer">AdamOptimizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>momentumw_optimizer</td>
+                  <td><a href="#protos.MomentumWOptimizer">MomentumWOptimizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>adamw_optimizer</td>
+                  <td><a href="#protos.AdamWOptimizer">AdamWOptimizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>adam_async_optimizer</td>
+                  <td><a href="#protos.AdamAsyncOptimizer">AdamAsyncOptimizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>adagrad_optimizer</td>
+                  <td><a href="#protos.AdagradOptimizer">AdagradOptimizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>adam_asyncw_optimizer</td>
+                  <td><a href="#protos.AdamAsyncWOptimizer">AdamAsyncWOptimizer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>use_moving_average</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>moving_average_decay</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.9999</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>embedding_learning_rate_multiplier</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.PolyDecayLearningRate">PolyDecayLearningRate</h3>
+        <pre>Configuration message for a poly decaying learning rate.</pre><pre>See https://www.tensorflow.org/api_docs/python/tf/train/polynomial_decay.</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate_base</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>total_steps</td>
+                  <td><a href="#int64">int64</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>power</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>end_learning_rate</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.RMSPropOptimizer">RMSPropOptimizer</h3>
+        <pre>Configuration message for the RMSPropOptimizer</pre><pre>See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate</td>
+                  <td><a href="#protos.LearningRate">LearningRate</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>momentum_optimizer_value</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.9</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>decay</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.9</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>epsilon</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.TransformerLearningRate">TransformerLearningRate</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>learning_rate_base</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>hidden_size</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>warmup_steps</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>step_scaling_rate</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/pipeline.proto">easy_rec/python/protos/pipeline.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.EasyRecConfig">EasyRecConfig</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>train_input_path</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>kafka_train_input</td>
+                  <td><a href="#protos.KafkaServer">KafkaServer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>eval_input_path</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>kafka_eval_input</td>
+                  <td><a href="#protos.KafkaServer">KafkaServer</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>model_dir</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>train_config</td>
+                  <td><a href="#protos.TrainConfig">TrainConfig</a></td>
+                  <td>optional</td>
+                  <td><pre>train config, including optimizer, weight decay, num_steps and so on </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>eval_config</td>
+                  <td><a href="#protos.EvalConfig">EvalConfig</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>data_config</td>
+                  <td><a href="#protos.DatasetConfig">DatasetConfig</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>feature_configs</td>
+                  <td><a href="#protos.FeatureConfig">FeatureConfig</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>model_config</td>
+                  <td><a href="#protos.EasyRecModel">EasyRecModel</a></td>
+                  <td>required</td>
+                  <td><pre>recommendation model config </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>export_config</td>
+                  <td><a href="#protos.ExportConfig">ExportConfig</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/simi.proto">easy_rec/python/protos/simi.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+
+      
+        <h3 id="protos.Similarity">Similarity</h3>
+        <pre></pre>
+        <table class="enum-table">
+          <thead>
+            <tr><td>Name</td><td>Number</td><td>Description</td></tr>
+          </thead>
+          <tbody>
+            
+              <tr>
+                <td>COSINE</td>
+                <td>0</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>INNER_PRODUCT</td>
+                <td>1</td>
+                <td><pre></pre></td>
+              </tr>
+            
+              <tr>
+                <td>EUCLID</td>
+                <td>2</td>
+                <td><pre></pre></td>
+              </tr>
+            
+          </tbody>
+        </table>
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/simple_multi_task.proto">easy_rec/python/protos/simple_multi_task.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.SimpleMultiTask">SimpleMultiTask</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>task_towers</td>
+                  <td><a href="#protos.TaskTower">TaskTower</a></td>
+                  <td>repeated</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/tower.proto">easy_rec/python/protos/tower.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.BayesTaskTower">BayesTaskTower</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>tower_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre>task name for the task tower </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>label_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>label for the task, default is label_fields by order </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>metrics_set</td>
+                  <td><a href="#protos.EvalMetrics">EvalMetrics</a></td>
+                  <td>repeated</td>
+                  <td><pre>metrics for the task </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>loss_type</td>
+                  <td><a href="#protos.LossType">LossType</a></td>
+                  <td>optional</td>
+                  <td><pre>loss for the task Default: CLASSIFICATION</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_class</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>num_class for multi-class classification loss Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>optional</td>
+                  <td><pre>task specific dnn </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>relation_tower_names</td>
+                  <td><a href="#string">string</a></td>
+                  <td>repeated</td>
+                  <td><pre>related tower names </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>relation_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>optional</td>
+                  <td><pre>relation dnn </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>weight</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>training loss weights Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>task_space_indicator_label</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>label name for indcating the sample space for the task tower </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>in_task_space_weight</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>the loss weight for sample in the task space Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>out_task_space_weight</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>the loss weight for sample out the task space
+
+level for prediction
+required uint32 prediction_level = 13;
+prediction weights
+optional float prediction_weight = 14 [default = 1.0]; Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.TaskTower">TaskTower</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>tower_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre>task name for the task tower </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>label_name</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>label for the task, default is label_fields by order </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>metrics_set</td>
+                  <td><a href="#protos.EvalMetrics">EvalMetrics</a></td>
+                  <td>repeated</td>
+                  <td><pre>metrics for the task </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>loss_type</td>
+                  <td><a href="#protos.LossType">LossType</a></td>
+                  <td>optional</td>
+                  <td><pre>loss for the task Default: CLASSIFICATION</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_class</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>num_class for multi-class classification loss Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>optional</td>
+                  <td><pre>task specific dnn </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>weight</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>training loss weights Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>task_space_indicator_label</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>label name for indcating the sample space for the task tower </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>in_task_space_weight</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>the loss weight for sample in the task space Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>out_task_space_weight</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>the loss weight for sample out the task space Default: 1</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+        <h3 id="protos.Tower">Tower</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>input</td>
+                  <td><a href="#string">string</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/train.proto">easy_rec/python/protos/train.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.TrainConfig">TrainConfig</h3>
+        <pre>Message for configuring DetectionModel training jobs (train.py).</pre><pre>Next id: 25</pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>optimizer_config</td>
+                  <td><a href="#protos.Optimizer">Optimizer</a></td>
+                  <td>optional</td>
+                  <td><pre>optimizer options </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>gradient_clipping_by_norm</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>If greater than 0, clips gradients by this value. Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_steps</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>Number of steps to train the models: if 0, will train the model
+indefinitely. Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>fine_tune_checkpoint</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>Checkpoint to restore variables from. </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>fine_tune_ckpt_var_map</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>sync_replicas</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>Whether to synchronize replicas during training.
+In case so, build a SyncReplicateOptimizer Default: true</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>startup_delay_steps</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre>Number of training steps between replica startup.
+This flag must be set to 0 if sync_replicas is set to true. Default: 15</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>save_checkpoints_steps</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>Step interval for saving checkpoint Default: 1000</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>save_checkpoints_secs</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>Seconds interval for saving checkpoint </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>keep_checkpoint_max</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>Max checkpoints to keep Default: 10</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>save_summary_steps</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>Save summaries every this many steps. Default: 1000</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>log_step_count_steps</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>optional</td>
+                  <td><pre>The frequency global step/sec and the loss will be logged during training. Default: 10</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>is_profiling</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>profiling or not Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>force_restore_shape_compatible</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>if variable shape is incompatible, clip or pad variables in checkpoint Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>train_distribute</td>
+                  <td><a href="#protos.DistributionStrategy">DistributionStrategy</a></td>
+                  <td>optional</td>
+                  <td><pre>DistributionStrategy, available values are &#39;mirrored&#39; and &#39;collective&#39; and &#39;ess&#39;
+- mirrored: MirroredStrategy, single machine and multiple devices;
+- collective: CollectiveAllReduceStrategy, multiple machines and multiple devices. Default: NoStrategy</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>num_gpus_per_worker</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>optional</td>
+                  <td><pre>Number of gpus per machine Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>separate_save</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>summary_model_vars</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>summary model variables or not Default: false</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>protocol</td>
+                  <td><a href="#string">string</a></td>
+                  <td>optional</td>
+                  <td><pre>distribute training protocol [grpc&#43;&#43; | star_server]
+grpc&#43;&#43;: https://help.aliyun.com/document_detail/173157.html?spm=5176.10695662.1996646101.searchclickresult.3ebf450evuaPT3
+star_server: https://help.aliyun.com/document_detail/173154.html?spm=a2c4g.11186623.6.627.39ad7e3342KOX4 </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>inter_op_parallelism_threads</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>optional</td>
+                  <td><pre>inter_op_parallelism_threads Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>intra_op_parallelism_threads</td>
+                  <td><a href="#int32">int32</a></td>
+                  <td>optional</td>
+                  <td><pre>intra_op_parallelism_threads Default: 0</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>tensor_fuse</td>
+                  <td><a href="#bool">bool</a></td>
+                  <td>optional</td>
+                  <td><pre>tensor fusion on PAI-TF Default: false</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+        <h3 id="protos.DistributionStrategy">DistributionStrategy</h3>
+        <pre></pre>
+        <table class="enum-table">
+          <thead>
+            <tr><td>Name</td><td>Number</td><td>Description</td></tr>
+          </thead>
+          <tbody>
+            
+              <tr>
+                <td>NoStrategy</td>
+                <td>0</td>
+                <td><pre>use old SyncReplicasOptimizer for ParameterServer training</pre></td>
+              </tr>
+            
+              <tr>
+                <td>PSStrategy</td>
+                <td>1</td>
+                <td><pre>PSStrategy with multiple gpus on one node could not work
+on pai-tf, could only work on TF &gt;=1.15</pre></td>
+              </tr>
+            
+              <tr>
+                <td>MirroredStrategy</td>
+                <td>2</td>
+                <td><pre>could only work on PaiTF or TF &gt;=1.15
+single worker multiple gpu mode</pre></td>
+              </tr>
+            
+              <tr>
+                <td>CollectiveAllReduceStrategy</td>
+                <td>3</td>
+                <td><pre>Depreciated</pre></td>
+              </tr>
+            
+              <tr>
+                <td>ExascaleStrategy</td>
+                <td>4</td>
+                <td><pre>currently not working good</pre></td>
+              </tr>
+            
+              <tr>
+                <td>MultiWorkerMirroredStrategy</td>
+                <td>5</td>
+                <td><pre>multi worker multi gpu mode
+see tf.distribute.experimental.MultiWorkerMirroredStrategy</pre></td>
+              </tr>
+            
+          </tbody>
+        </table>
+      
+
+      
+
+      
+    
+      
+      <div class="file-heading">
+        <h2 id="easy_rec/python/protos/wide_and_deep.proto">easy_rec/python/protos/wide_and_deep.proto</h2><a href="#title">Top</a>
+      </div>
+      <pre></pre>
+
+      
+        <h3 id="protos.WideAndDeep">WideAndDeep</h3>
+        <pre></pre>
+
+        
+          <table class="field-table">
+            <thead>
+              <tr><td>Field</td><td>Type</td><td>Label</td><td>Description</td></tr>
+            </thead>
+            <tbody>
+              
+                <tr>
+                  <td>wide_output_dim</td>
+                  <td><a href="#uint32">uint32</a></td>
+                  <td>required</td>
+                  <td><pre> Default: 1</pre></td>
+                </tr>
+              
+                <tr>
+                  <td>dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>required</td>
+                  <td><pre> </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>final_dnn</td>
+                  <td><a href="#protos.DNN">DNN</a></td>
+                  <td>optional</td>
+                  <td><pre>if set, the output of dnn and wide part are concatenated and
+passed to the final_dnn; otherwise, they are summarized </pre></td>
+                </tr>
+              
+                <tr>
+                  <td>l2_regularization</td>
+                  <td><a href="#float">float</a></td>
+                  <td>optional</td>
+                  <td><pre> Default: 0.0001</pre></td>
+                </tr>
+              
+            </tbody>
+          </table>
+
+          
+
+        
+      
+
+      
+
+      
+
+      
+    
+
+    <h2 id="scalar-value-types">Scalar Value Types</h2>
+    <table class="scalar-value-types-table">
+      <thead>
+        <tr><td>.proto Type</td><td>Notes</td><td>C++ Type</td><td>Java Type</td><td>Python Type</td></tr>
+      </thead>
+      <tbody>
+        
+          <tr id="double">
+            <td>double</td>
+            <td></td>
+            <td>double</td>
+            <td>double</td>
+            <td>float</td>
+          </tr>
+        
+          <tr id="float">
+            <td>float</td>
+            <td></td>
+            <td>float</td>
+            <td>float</td>
+            <td>float</td>
+          </tr>
+        
+          <tr id="int32">
+            <td>int32</td>
+            <td>Uses variable-length encoding. Inefficient for encoding negative numbers – if your field is likely to have negative values, use sint32 instead.</td>
+            <td>int32</td>
+            <td>int</td>
+            <td>int</td>
+          </tr>
+        
+          <tr id="int64">
+            <td>int64</td>
+            <td>Uses variable-length encoding. Inefficient for encoding negative numbers – if your field is likely to have negative values, use sint64 instead.</td>
+            <td>int64</td>
+            <td>long</td>
+            <td>int/long</td>
+          </tr>
+        
+          <tr id="uint32">
+            <td>uint32</td>
+            <td>Uses variable-length encoding.</td>
+            <td>uint32</td>
+            <td>int</td>
+            <td>int/long</td>
+          </tr>
+        
+          <tr id="uint64">
+            <td>uint64</td>
+            <td>Uses variable-length encoding.</td>
+            <td>uint64</td>
+            <td>long</td>
+            <td>int/long</td>
+          </tr>
+        
+          <tr id="sint32">
+            <td>sint32</td>
+            <td>Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int32s.</td>
+            <td>int32</td>
+            <td>int</td>
+            <td>int</td>
+          </tr>
+        
+          <tr id="sint64">
+            <td>sint64</td>
+            <td>Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int64s.</td>
+            <td>int64</td>
+            <td>long</td>
+            <td>int/long</td>
+          </tr>
+        
+          <tr id="fixed32">
+            <td>fixed32</td>
+            <td>Always four bytes. More efficient than uint32 if values are often greater than 2^28.</td>
+            <td>uint32</td>
+            <td>int</td>
+            <td>int</td>
+          </tr>
+        
+          <tr id="fixed64">
+            <td>fixed64</td>
+            <td>Always eight bytes. More efficient than uint64 if values are often greater than 2^56.</td>
+            <td>uint64</td>
+            <td>long</td>
+            <td>int/long</td>
+          </tr>
+        
+          <tr id="sfixed32">
+            <td>sfixed32</td>
+            <td>Always four bytes.</td>
+            <td>int32</td>
+            <td>int</td>
+            <td>int</td>
+          </tr>
+        
+          <tr id="sfixed64">
+            <td>sfixed64</td>
+            <td>Always eight bytes.</td>
+            <td>int64</td>
+            <td>long</td>
+            <td>int/long</td>
+          </tr>
+        
+          <tr id="bool">
+            <td>bool</td>
+            <td></td>
+            <td>bool</td>
+            <td>boolean</td>
+            <td>boolean</td>
+          </tr>
+        
+          <tr id="string">
+            <td>string</td>
+            <td>A string must always contain UTF-8 encoded or 7-bit ASCII text.</td>
+            <td>string</td>
+            <td>String</td>
+            <td>str/unicode</td>
+          </tr>
+        
+          <tr id="bytes">
+            <td>bytes</td>
+            <td>May contain any arbitrary sequence of bytes.</td>
+            <td>string</td>
+            <td>ByteString</td>
+            <td>str</td>
+          </tr>
+        
+      </tbody>
+    </table>
+  </body>
+</html>
+
diff --git a/docs/source/quick_start/emr_tutorial.md b/docs/source/quick_start/emr_tutorial.md
index 9b395e6a4..f44e70409 100644
--- a/docs/source/quick_start/emr_tutorial.md
+++ b/docs/source/quick_start/emr_tutorial.md
@@ -5,8 +5,8 @@
 输入一般是csv格式的文件。 如下所示，列之间用,分割
 
 - 示例数据:
-  - train: [dwd\_avazu\_ctr\_deepmodel\_train.csv](http://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/dwd_avazu_ctr_deepmodel_train.csv)
-  - test: [dwd\_avazu\_ctr\_deepmodel\_test.csv](http://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/dwd_avazu_ctr_deepmodel_test.csv)
+  - train: [dwd_avazu_ctr_deepmodel_train.csv](http://easyrec.oss-cn-beijing.aliyuncs.com/data/dwd_avazu_ctr_deepmodel_train.csv)
+  - test: [dwd_avazu_ctr_deepmodel_test.csv](http://easyrec.oss-cn-beijing.aliyuncs.com/data/dwd_avazu_ctr_deepmodel_test.csv)
   - 示例:
 
 ```
@@ -17,7 +17,7 @@
 
 ## 创建DataScience集群:
 
-[DataScience集群](https://help.aliyun.com/document_detail/170836.html?spm=a2c4g.11186623.6.867.352e53c5yP1ecR)参考
+[DataScience集群](https://help.aliyun.com/document_detail/170836.html)参考
 
 ## Copy data to HDFS
 
@@ -29,8 +29,8 @@ hadoop fs -put dwd_avazu_ctr_deepmodel_test.csv hdfs://emr-header-1:9000/user/ea
 
 ## 训练:
 
-- 配置文件: [dwd\_avazu\_ctr\_deepmodel.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/emr/dwd_avazu_ctr_deepmodel.config) \*\* \*\*&#160;配置文件采用prototxt格式，内容解析见[配置文件](#Qgqxc)
-- 使用el\_submit提交训练任务，**el\_submit**相关参数请参考[**tf\_on\_yarn**](https://help.aliyun.com/document_detail/93031.html?spm=a2c4g.11186623.6.769.586c73a4PXSmHi)
+- 配置文件: [dwd_avazu_ctr_deepmodel.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/emr/dwd_avazu_ctr_deepmodel.config) \*\* \*\* 配置文件采用prototxt格式，内容解析见[配置文件](#Qgqxc)
+- 使用el_submit提交训练任务，**el_submit**相关参数请参考[**tf_on_yarn**](../tf_on_yarn.md)
 
 ### 开源TF模式
 
@@ -93,11 +93,11 @@ resource:
 
 ```
 
-- [查看任务日志](https://yuque.antfin-inc.com/pai/arch/muywnl)
+- [查看任务日志](../emr_yarn_log.md)
 
 ## 评估:
 
-- 使用el\_submit提交评估任务，**el\_submit**相关参数请参考[**tf\_on\_yarn**](https://help.aliyun.com/document_detail/93031.html?spm=a2c4g.11186623.6.769.586c73a4PXSmHi)
+- 使用el_submit提交评估任务，**el_submit**相关参数请参考[**tf_on_yarn**](../tf_on_yarn.md)
 - **Note: 本示例仅仅展示流程，效果无参考价值。**
 
 ### 开源TF模式
@@ -155,11 +155,11 @@ resource:
 
 ## 导出:
 
-- 使用el\_submit提交导出任务, **el\_submit**相关参数请参考[**tf\_on\_yarn**](https://help.aliyun.com/document_detail/93031.html?spm=a2c4g.11186623.6.769.586c73a4PXSmHi)
+- 使用el_submit提交导出任务, **el_submit**相关参数请参考[**tf_on_yarn**](https://help.aliyun.com/document_detail/93031.html)
 
-\--pipeline\_config\_path: EasyRec配置文件
-\--export\_dir: 导出模型目录&#160;
-\--checkpoint\_path: 指定checkpoint，默认不指定，不指定则使用model\_dir下面最新的checkpoint
+--pipeline_config_path: EasyRec配置文件
+--export_dir: 导出模型目录 
+--checkpoint_path: 指定checkpoint，默认不指定，不指定则使用model_dir下面最新的checkpoint
 
 ### 开源TF模式
 
@@ -265,17 +265,17 @@ pmml.json配置文件内容如下, easyrec是基于tensorflow/paitf的， 因此
 
 ### 5. 构造服务请求
 
-参考 [https://help.aliyun.com/document\_detail/111055.html?spm=a2c4g.11174283.6.772.58971987yxDYC5](https://help.aliyun.com/document_detail/111055.html?spm=a2c4g.11174283.6.772.58971987yxDYC5)
+参考 [https://help.aliyun.com/document_detail/111055.html](https://help.aliyun.com/document_detail/111055.html)
 
 #### 1) 获取模型input output信息
 
 ```
-curl http://pai-eas-vpc.cn-shanghai.aliyuncs.com/api/predict/mnist_saved_model_example | python -mjson.tool
+curl http://pai-eas-vpc.cn-beijing.aliyuncs.com/api/predict/mnist_saved_model_example | python -mjson.tool
 ```
 
 #### 2) python版
 
-参考 [https://github.com/pai-eas/eas-python-sdk?spm=a2c4g.11186623.2.16.29357867eNkrjw](https://github.com/pai-eas/eas-python-sdk?spm=a2c4g.11186623.2.16.29357867eNkrjw)
+参考 [https://github.com/pai-eas/eas-python-sdk](https://github.com/pai-eas/eas-python-sdk)
 
 ```
 #!/usr/bin/env python
@@ -285,7 +285,7 @@ from eas_prediction import StringRequest
 from eas_prediction import TFRequest
 
 if __name__ == '__main__':
-    client = PredictClient('http://1828488879222746.cn-shanghai.pai-eas.aliyuncs.com', 'mnist_saved_model_example')
+    client = PredictClient('http://1828488879222746.cn-beijing.pai-eas.aliyuncs.com', 'mnist_saved_model_example')
     client.set_token('AAAAAAAAAAAAAAABBBBBBBBBBBBBBB==')
     client.init()
 
@@ -300,7 +300,7 @@ if __name__ == '__main__':
 
 #### 3) 其他语言版
 
-参考 [https://help.aliyun.com/document\_detail/111055.html?spm=a2c4g.11186623.6.772.29357867eNkrjw](https://help.aliyun.com/document_detail/111055.html?spm=a2c4g.11186623.6.772.29357867eNkrjw)
+参考 [https://help.aliyun.com/document_detail/111055.html](https://help.aliyun.com/document_detail/111055.html)
 
 ### 配置文件:
 
@@ -499,17 +499,21 @@ model_config:{
 
 #### Config下载
 
-[dwd\_avazu\_ctr\_deepmodel.config](http://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/emr/dwd_avazu_ctr_deepmodel.config)
+[dwd_avazu_ctr_deepmodel.config](http://easyrec.oss-cn-beijing.aliyuncs.com/config/emr/dwd_avazu_ctr_deepmodel.config)
 
 #### ExcelConfig下载
 
 ExcelConfig比Config更加简明
 
-- [dwd\_avazu\_ctr\_deepmodel.xls](http://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/dwd_avazu_ctr_deepmodel.xls)
-- [ExcelConfig 转 Config](https://yuque.antfin-inc.com/pai/arch/ocumlg#BXDJh)
+- [dwd_avazu_ctr_deepmodel.xls](http://easyrec.oss-cn-beijing.aliyuncs.com/data/dwd_avazu_ctr_deepmodel.xls)
+- [ExcelConfig 转 Config](../feature/excel_config.md)
 
 ### 参考手册
 
-[EasyRecConfig参考手册](../proto.html)
-[TF on EMR参考手册](https://help.aliyun.com/document_detail/93031.html?spm=a2c4g.11186623.6.769.586c73a4PXSmHi)
-[DataScience集群手册](https://help.aliyun.com/document_detail/170836.html?spm=a2c4g.11186623.6.867.352e53c5yP1ecR)
+- [EasyRecConfig参考手册](../reference.md)
+
+- [TF on EMR参考手册](../tf_on_yarn.md)
+
+- [DataScience集群手册](https://help.aliyun.com/document_detail/170836.html)
+
+- [EMR Tensorboard](../emr_tensorboard.md)
diff --git a/docs/source/quick_start/local_tutorial.md b/docs/source/quick_start/local_tutorial.md
index 12ba4645b..45216de7a 100644
--- a/docs/source/quick_start/local_tutorial.md
+++ b/docs/source/quick_start/local_tutorial.md
@@ -6,8 +6,8 @@
 
 #### 示例数据
 
-- train: [dwd\_avazu\_ctr\_deepmodel\_train.csv](http://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/dwd_avazu_ctr_deepmodel_train.csv)
-- test: [dwd\_avazu\_ctr\_deepmodel\_test.csv](http://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/dwd_avazu_ctr_deepmodel_test.csv)
+- train: [dwd_avazu_ctr_deepmodel_train.csv](http://easyrec.oss-cn-beijing.aliyuncs.com/data/dwd_avazu_ctr_deepmodel_train.csv)
+- test: [dwd_avazu_ctr_deepmodel_test.csv](http://easyrec.oss-cn-beijing.aliyuncs.com/data/dwd_avazu_ctr_deepmodel_test.csv)
 - 示例:
 
 ```
@@ -19,14 +19,14 @@
 ### 安装包
 
 ```bash
-pip install http://easy-rec.oss-cn-hangzhou.aliyuncs.com/releases/easy_rec-20200922-py2.py3-none-any.whl
+pip install -U https://easyrec.oss-cn-beijing.aliyuncs.com/releases/easy_rec-0.1.0-py2.py3-none-any.whl
 ```
 
 ### 启动命令:
 
 #### 配置文件:
 
-[dwd\_avazu\_ctr\_deepmodel\_local.config](https://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/DeepFM/dwd_avazu_ctr_deepmodel_local.config), 配置文件采用prototxt格式
+[dwd_avazu_ctr_deepmodel_local.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/DeepFM/dwd_avazu_ctr_deepmodel_local.config), 配置文件采用prototxt格式
 
 #### GPU单机单卡:
 
@@ -34,8 +34,8 @@ pip install http://easy-rec.oss-cn-hangzhou.aliyuncs.com/releases/easy_rec-20200
 CUDA_VISIBLE_DEVICES=0 python -m easy_rec.python.train_eval --pipeline_config_path dwd_avazu_ctr_deepmodel_local.config
 ```
 
-- \--pipeline\_config\_path: 训练用的配置文件
-- \--continue\_train: 是否继续训
+- --pipeline_config_path: 训练用的配置文件
+- --continue_train: 是否继续训
 
 #### GPU PS训练
 
@@ -45,7 +45,7 @@ CUDA_VISIBLE_DEVICES=0 python -m easy_rec.python.train_eval --pipeline_config_pa
 - Note: 本地只支持ps, master, worker模式，不支持ps, chief, worker, evaluator模式
 
 ```bash
-wget https://easy-rec.oss-cn-hangzhou.aliyuncs.com/scripts/train_2gpu.sh
+wget https://easyrec.oss-cn-beijing.aliyuncs.com/scripts/train_2gpu.sh
 sh train_2gpu.sh dwd_avazu_ctr_deepmodel_local.config
 ```
 
@@ -280,4 +280,4 @@ model_config:{
 
 #### 参考手册
 
-[EasyRecConfig参考手册](../proto.html)
+[EasyRecConfig参考手册](../reference.md)
diff --git a/docs/source/quick_start/mc_tutorial.md b/docs/source/quick_start/mc_tutorial.md
index 0121714be..69039ef2f 100644
--- a/docs/source/quick_start/mc_tutorial.md
+++ b/docs/source/quick_start/mc_tutorial.md
@@ -4,67 +4,71 @@
 
 输入一般是odps表:
 
-- train: pai\_online\_project.dwd\_avazu\_ctr\_deepmodel\_train
-- test: pai\_online\_project.dwd\_avazu\_ctr\_deepmodel\_test
+- train: pai_online_project.dwd_avazu_ctr_deepmodel_train
+- test: pai_online_project.dwd_avazu_ctr_deepmodel_test
 
 说明：原则上这两张表是自己odps的表，为了方便，以上提供case的两张表在任何地方都可以访问。两个表可以带分区，也可以不带分区。
 
 ### 训练:
 
-- 配置文件: [dwd\_avazu\_ctr\_deepmodel\_ext.config](http://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config), 配置文件采用prototxt格式，内容解析见[配置文件](#Qgqxc)
-  - 修改配置文件里面的**model\_dir**字段为: 自己的实验oss目录
+- 配置文件: [dwd_avazu_ctr_deepmodel_ext.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config), 配置文件采用prototxt格式，内容解析见[配置文件](#Qgqxc)
+  - 修改配置文件里面的**model_dir**字段为: 自己的实验oss目录
 
 ```sql
 pai -name easy_rec_ext -project algo_public
--Dconfig=oss://easy-rec/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config
 -Dcmd=train
+-Dconfig=oss://easyrec/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config
 -Dtables=odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_train,odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_test
 -Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000, "gpu":100, "memory":40000}}'
+-Dwith_evaluator=1
+-Dmodel_dir=oss://easyrec/ckpt/MultiTower
 -Darn=acs:ram::xxx:role/xxx
--Dbuckets=oss://easy-rec/
--DossHost=oss-cn-hangzhou-internal.aliyuncs.com
--Dwith_evaluator=1;
+-Dbuckets=oss://easyrec/
+-DossHost=oss-cn-beijing-internal.aliyuncs.com;
 ```
 
-- \-Dtables: 定义训练表和测试表，默认最后一个表示测试表。
-- \-Dcluster: 定义PS的数目和worker的数目，如果设置了--with\_evaluator，有一个worker将被用于做评估
-- \-Dconfig: 训练用的配置文件
-- \-Dcmd: train 模型训练
-- \-Dwith\_evaluator: 训练时需要评估
-- \-Darn: rolearn  注意这个的arn要替换成客户自己的。可以从dataworks的设置中查看arn。
-- \-Dbuckets: config所在的bucket和保存模型的bucket; 如果有多个bucket，逗号分割
-- \-DossHost: ossHost地址
-- \-Dmodel\_dir: 如果指定了model\_dir将会覆盖config里面的model\_dir，一般在周期性调度的时候使用。
+- -Dcmd: train 模型训练
+- -Dconfig: 训练用的配置文件
+- -Dtables: 定义训练表和测试表，默认最后一个表示测试表。
+- -Dcluster: 定义PS的数目和worker的数目。具体见：[PAI-TF任务参数介绍](https://help.aliyun.com/document_detail/154186.html?spm=a2c4g.11186623.4.3.e56f1adb7AJ9T5)
+- -Dwith_evaluator，训练时定义一个worker将被用于做评估
+- -Dmodel_dir: 如果指定了model_dir将会覆盖config里面的model_dir，一般在周期性调度的时候使用。
+- -Darn: rolearn  注意这个的arn要替换成客户自己的。可以从dataworks的设置中查看arn。
+- -Dbuckets: config所在的bucket和保存模型的bucket; 如果有多个bucket，逗号分割
+- -DossHost: ossHost地址
 
 ### 注意：
 
-- dataworks和pai的project 一样，案例都是pai\_online\_project，用户需要根据自己的环境修改。如果需要使用gpu，PAI的project需要设置开通GPU。链接：[https://pai.data.aliyun.com/console?projectId=&regionId=cn-shanghai\#/visual](https://pai.data.aliyun.com/console?projectId=%C2%AEionId=cn-shanghai#/visual)  ，其中regionId可能不一致。
+- dataworks和pai的project 一样，案例都是pai_online_project，用户需要根据自己的环境修改。如果需要使用gpu，PAI的project需要设置开通GPU。链接：[https://pai.data.aliyun.com/console?projectId=&regionId=cn-beijing#/visual](https://pai.data.aliyun.com/console?projectId=%C2%AEionId=cn-beijing#/visual)  ，其中regionId可能不一致。
 
   ![mc_gpu](../../images/quick_start/mc_gpu.png)
 
-- oss的bucket需要提前开通好，案例中bucket名称是easy-rec。参考：[https://help.aliyun.com/document\_detail/154186.html?spm=a2c4g.11186623.4.3.e56f1adb7AJ9T5](https://help.aliyun.com/document_detail/154186.html?spm=a2c4g.11186623.4.3.e56f1adb7AJ9T5)
+- oss的bucket需要提前开通好，案例中bucket名称是easyrec。创建bucket请参考：[创建存储空间](https://help.aliyun.com/document_detail/31885.html)
 
-- arn需要在PAI-studio的project（当前案例中的project是pai\_online\_project）的OSS访问授权设置页面查看和创建，如下图：
+- arn需要在PAI-studio的project（当前案例中的project是pai_online_project）的OSS访问授权设置页面查看和创建，如下图：
 
-![image.png](https://cdn.nlark.com/yuque/0/2020/png/2764402/1603677843509-0e114d07-387e-469e-9b1e-2cbe9421edbf.png#align=left&display=inline&height=287&margin=%5Bobject%20Object%5D&name=image.png&originHeight=862&originWidth=952&size=266204&status=done&style=none&width=317.3333333333333)
+![image.png](../../images/quick_start/image.png)
 
 ### 评估:
 
-```protobuf
+```sql
 pai -name easy_rec_ext -project algo_public
--Dconfig=oss://easy-rec/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config
 -Dcmd=evaluate
+-Dconfig=oss://easyrec/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config
 -Dtables=odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_test
 -Dcluster='{"worker" : {"count":1, "cpu":1000, "gpu":100, "memory":40000}}'
+-Dmodel_dir=oss://easyrec/ckpt/MultiTower
 -Darn=acs:ram::xxx:role/xxx
--Dbuckets=oss://easy-rec/
--DossHost=oss-cn-hangzhou-internal.aliyuncs.com；
+-Dbuckets=oss://easyrec/
+-DossHost=oss-cn-beijing-internal.aliyuncs.com；
 ```
 
-- \-Dconfig: 同训练
-- \-Dcmd: evaluate 模型评估
-- \-Dtables: 只需要指定测试 tables
-- \-Dcheckpoint\_path: 使用指定的checkpoint\_path
+- -Dcmd: evaluate 模型评估
+- -Dconfig: 同训练
+- -Dtables: 只需要指定测试 tables
+- -Dcluster: 评估不需要PS节点，指定一个worker节点即可
+- -Dmodel_dir: 如果指定了model_dir将会覆盖config里面的model_dir，一般在周期性调度的时候使用
+- -Dcheckpoint_path: 使用指定的checkpoint_path，如oss://easyrec/ckpt/MultiTower/model.ckpt-1000。不指定的话，默认model_dir中最新的ckpt文件。
 
 ### 导出:
 
@@ -74,19 +78,22 @@ pai -name easy_rec_ext -project algo_public
 
 ```sql
 pai -name easy_rec_ext -project algo_public
--Dconfig=oss://easy-rec/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config
 -Dcmd=export
--Dexport_dir=oss://easy-rec/easy_rec_test/export
+-Dconfig=oss://easyrec/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config
+-Dmodel_dir=oss://easyrec/ckpt/MultiTower
+-Dexport_dir=oss://easyrec/ckpt/MultiTower/export
 -Dcluster='{"worker" : {"count":1, "cpu":1000, "memory":40000}}'
 -Darn=acs:ram::xxx:role/xxx
--Dbuckets=oss://easy-rec/
--DossHost=oss-cn-hangzhou-internal.aliyuncs.com
+-Dbuckets=oss://easyrec/
+-DossHost=oss-cn-beijing-internal.aliyuncs.com
 ```
 
-- \-Dconfig: 同训练
-- \-Dcmd: export 模型导出
-- \-Dexport\_dir: 导出的目录
-- \-Dcheckpoint\_path: 使用指定的checkpoint\_path
+- -Dcmd: export 模型导出
+- -Dconfig: 同训练
+- -Dmodel_dir: 同训练
+- -Dexport_dir: 导出的目录
+- -Dcluster: 评估不需要PS节点，指定一个worker节点即可
+- -Dcheckpoint_path: 同评估
 
 ### 配置文件:
 
@@ -97,7 +104,7 @@ pai -name easy_rec_ext -project algo_public
 train_input_path: ""
 eval_input_path: ""
 # 模型保存路径
-model_dir: "oss://easy-rec/easy_rec_test/experiment/dwd_avazu_ctr"
+model_dir: "oss://easyrec/easy_rec_test/experiment/dwd_avazu_ctr"
 ```
 
 #### 数据相关
@@ -279,8 +286,8 @@ model_config:{
 
 ```
 
-配置文件下载：[dwd\_avazu\_ctr\_deepmodel\_ext.config](http://easy-rec.oss-cn-hangzhou.aliyuncs.com/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config)
+配置文件下载：[dwd_avazu_ctr_deepmodel_ext.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/MultiTower/dwd_avazu_ctr_deepmodel_ext.config)
 
 #### 配置参考手册
 
-[EasyRecConfig参考手册](../proto.html)
+[EasyRecConfig参考手册](../reference.md)
diff --git a/docs/source/release.md b/docs/source/release.md
index b36e0554b..c732681ee 100644
--- a/docs/source/release.md
+++ b/docs/source/release.md
@@ -2,14 +2,17 @@
 
 ### Release Notes
 
-| **Version** | **URL** | **Desc** |
-| \-\-\-\-\-\-\-\-\--- | \-\-\-\-\-\-- | \-\-\-\-\-\--- |
-|             |         |          |
+| **Version** | **URL**                                                                                                  | **Desc**                                                                                                        |
+| ----------- | -------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------- |
+| 20200922    | [Download](https://easyrec.oss-cn-beijing.aliyuncs.com/releases/easy_rec-20200922-py2.py3-none-any.whl)  | add support for hpo                                                                                             |
+| 20201102    | [Download](https://easyrec.oss-cn-beijing.aliyuncs.com/releases/easy_rec-20201102-py2.py3-none-any.whl)  | add support for json config file; add dropout and custom activation function for dnn; add support for dssm/mmoe |
+| 20201221    | [Download](https://easyrec.oss-cn-beijing.aliyuncs.com/releases/easy_rec-0.1.0-py2.py3-none-any.whl)     | add new models: DCN, DBMTL, AUTOINT                                                                             |
+| 20210315    | [Download](https://easyrec.oss-cn-beijing.aliyuncs.com/release/whls/easy_rec-0.1.3-py2.py3-none-any.whl) | add support for knowledge distillation, MIND models                                                             |
 
 ### 本地升级
 
 ```bash
-pip install -U https://easy-rec.oss-cn-hangzhou.aliyuncs.com/releases/easy_rec-0.1.0-py2.py3-none-any.whl
+pip install -U https://easyrec.oss-cn-beijing.aliyuncs.com/releases/easy_rec-0.1.0-py2.py3-none-any.whl
 ```
 
 ### EMR EasyRec升级
@@ -17,7 +20,31 @@ pip install -U https://easy-rec.oss-cn-hangzhou.aliyuncs.com/releases/easy_rec-0
 ```bash
 su hadoop
 cd /home/hadoop
-wget https://easy-rec.oss-cn-hangzhou.aliyuncs.com/releases/releases/upgrade_easy_rec.sh -O upgrade_easy_rec.sh
+wget https://easyrec.oss-cn-beijing.aliyuncs.com/releases/releases/upgrade_easy_rec.sh -O upgrade_easy_rec.sh
 chmod a+rx upgrade_easy_rec.sh
-sh upgrade_easy_rec.sh https://easy-rec.oss-cn-hangzhou.aliyuncs.com/releases/easy_rec-0.1.0-py2.py3-none-any.whl
+sh upgrade_easy_rec.sh https://easyrec.oss-cn-beijing.aliyuncs.com/releases/easy_rec-0.1.0-py2.py3-none-any.whl
+```
+
+### PAI(Max Compute) EasyRec升级
+
+如果有xflow的部署权限
+
+```bash
+sh pai_jobs/deploy_ext.sh -V ${VERSION}
+```
+
+如果没有xflow的部署权限
+
+```bash
+sh pai_jobs/deploy_ext.sh -V ${VERSION} -O
+```
+
+执行的时候需要加上
+
+```
+pai -name easy_rec_ext
+-Dres_project=my_project
+-Dversion=${VERSION}
+...
+;
 ```
diff --git a/docs/source/tf_on_yarn.md b/docs/source/tf_on_yarn.md
new file mode 100644
index 000000000..ba0f22972
--- /dev/null
+++ b/docs/source/tf_on_yarn.md
@@ -0,0 +1,62 @@
+# TF ON YARN
+
+## 一、说明
+
+TensorFlow是google开源的用于人工智能的学习系统，大大的降低了机器学习、深度学开发成本，分析人员、开发人员可以使用TensorFlow提供的多种算法实现自己的想法验证、模型设计等。
+TensorFlow on YARN是EMR推出的结合EMR Hadoop大数据处理能力以及TensorFlow深度学习能力，提供用户调度TensorFlow程序在EMR Hadoop之上，进行分布式处理的功能。
+
+## 二、使用说明
+
+说先需要创建[Data Science集群](https://help.aliyun.com/document_detail/170836.html)，目前EMR 3.13.0版本开始支持创建Data Science集群。
+Data Science版本的EMR集群支持GPU调度，所以在Core节点，推荐用户选取GPU机器类型。
+目前TensorFlow支持的版本是1.8，用户选择想要安装的驱动以及cuDNN版本，EMR管控会将对应的驱动和cuDNN进行自动安装。
+
+## 三、任务提交
+
+目前任务提交还需要通过命令行提交，或者通过EMR-Flow提交任务(正在开发中)。
+如果采用命令行提交，提交命令为el_submit, 如下图：
+
+参数说明：
+
+- -t APP_TYPE  提交的任务类型，支持三种类型的任务类型\[tensorflow-ps, tensorflow-mpi, standalone\]，三种类型要配合后面运行模式使用
+
+> tensorflow-ps使用的是原生TensorFlow ps 类型
+
+> tensorflow-mpi使用的是 uber 开源的基于MPI架构的horovod
+> standalone模式是用户将任务调度到YARN集群中启动单机任务，类似于单机运行
+> tensorflow-worker多worker模式，适用于MultiWorkerMirroredStrategy
+
+- -a APP_NAME 提交的任务名称，用户可以根据需要起名
+- -m MODE 提交的运行时环境，目前支持四种类型运行时环境\[local, virtual-env,docker\]
+
+> local 使用的是emr-worker上面的python运行环境，所以如果要使用一些第三方python包需要手动在所有机器上进行安装
+
+> docker 使用的是emr-worker上面的docker运行时，tensorflow运行在docker container内
+
+> virtual-env 使用用户上传的python环境，可以在python环境中安装一些不同于worker节点的python库
+
+- -m_arg MODE_ARG 提交的运行时补充参数，如果运行时是docker，则设置为docker镜像名称，如果是virtual-env，则指定python环境文件名称，tar.gz打包
+- -x Exit 分布式TensorFlow有些API需要用户手动退出PS，在这种情况下用户可以指定-x选项，当所有worker完成训练并成功后，PS节点自动退出
+- -enable_tensorboard 是否在启动训练任务的同时启动TensorBoard
+- -log_tensorboard 如果训练同时启动TensorBoard，需要指定TensorBoard日志位置，需要时HDFS上的位置
+- -conf CONF Hadoop conf位置，默认可以不设，使用EMR默认配置
+- -f FILES 运行TensorFlow所有依赖的文件和文件夹，包含执行脚本，如果设置virtual-env 执行的virtual-env文件。用户可以将所有依赖放置到一个文件夹中，脚本会自动将文件夹按照文件夹层次关系上传到HDFS中
+- -pn TensorFlow启动的参数服务器个数
+- -pc 每个参数服务器申请的CPU核数
+- -pm 每个参数服务器申请的内存大小
+- -wn TensorFlow启动的Worker节点个数
+- -wc 每个Worker申请的CPU核数
+- -wg 每个Worker申请的GPU核数
+- -wm 每个Worker申请的内存个数
+- -wait_time  获取资源最大等待时间，单位分钟，比如-wait_time 1指的是启动master后最多等待一分钟获取所有资源，否则master失败
+- -c COMMAND 执行的命令，如python census.py
+
+进阶选项，用户需要谨慎使用进阶选项，可能造成任务失败
+
+- -wnpg 每个GPU核上同时跑的worker数量(针对tensorflow-ps)
+- -ppn 每个GPU核上同时跑的worker数量（针对horovod）
+  以上两个选项指的是单卡多进程操作，由于共用一张显卡，需要在程序上进行一定限制，否则会造成显卡OOM。
+
+## DEMO
+
+[Mnist Demo](./mnist_demo.md)
diff --git a/docs/source/train.md b/docs/source/train.md
new file mode 100644
index 000000000..f2276639d
--- /dev/null
+++ b/docs/source/train.md
@@ -0,0 +1,155 @@
+# 训练
+
+### train_config
+
+- log_step_count_steps: 200    # 每200轮打印一行log
+
+- optimizer_config     # 优化器相关的参数
+
+  ```protobuf
+  {
+    adam_optimizer: {
+      learning_rate: {
+         exponential_decay_learning_rate {
+            initial_learning_rate: 0.0001
+            decay_steps: 100000
+            decay_factor: 0.5
+            min_learning_rate: 0.0000001
+         }
+    }
+  }
+  ```
+
+- sync_replicas: true  # 是否同步训练，默认是false
+
+  - 使用SyncReplicasOptimizer进行分布式训练(同步模式)
+  - 仅在train_distribute为NoStrategy时可以设置成true，其它情况应该设置为false
+  - PS异步训练也设置为false
+
+- train_distribute: 默认不开启Strategy(NoStrategy), strategy确定分布式执行的方式
+
+  - NoStrategy 不使用Strategy
+  - PSStrategy 异步ParameterServer模式
+  - MirroredStrategy 单机多卡模式，仅在PAI上可以使用，本地和EMR上不能使用
+  - MultiWorkerMirroredStrategy 多机多卡模式，在TF版本>=1.15时可以使用
+
+- num_gpus_per_worker: 仅在MirrorredStrategy, MultiWorkerMirroredStrategy, PSStrategy的时候有用
+
+- num_steps: 1000
+
+  - 总共训练多少轮
+  - num_steps = total_sample_num * num_epochs / batch_size / num_workers
+  - **分布式训练时一定要设置num_steps，否则评估任务会结束不了**
+
+- fine_tune_checkpoint: 需要restore的checkpoint路径，也可以是包含checkpoint的目录，如果目录里面有多个checkpoint，将使用最新的checkpoint
+
+- fine_tune_ckpt_var_map: 需要restore的参数列表文件路径，文件的每一行是{variable_name in current model ckpt}\\t{variable name in old model ckpt}
+
+  - 需要设置fine_tune_ckpt_var_map的情形:
+    - current ckpt和old ckpt不完全匹配, 如embedding的名字不一样:
+      - old: input_layer/shopping_level_embedding/embedding_weights
+      - new: input_layer/shopping_embedding/embedding_weights
+    - 仅需要restore old ckpt里面的部分variable, 如embedding_weights
+  - 可以通过下面的文件查看参数列表
+
+  ```python
+  import tensorflow as tf
+  import os, sys
+
+  ckpt_reader = tf.train.NewCheckpointReader('experiments/model.ckpt-0')
+  ckpt_var2shape_map = ckpt_reader.get_variable_to_shape_map()
+  for key in ckpt_var2shape_map:
+    print(key)
+  ```
+
+- save_checkpoints_steps: 每隔多少轮保存一次checkpoint, 默认是1000
+
+- save_checkpoints_secs: 每隔多少s保存一次checkpoint, 不可以和save_checkpoints_steps同时指定
+
+- keep_checkpoint_max: 最多保存多少个checkpoint, 默认是10
+
+- log_step_count_steps: 每隔多少轮，打印一次训练信息，默认是10
+
+- save_summary_steps: 每隔多少轮，保存一次summary信息，默认是1000
+
+- 更多参数请参考[easy_rec/python/protos/train.proto](./reference.md)
+
+### 训练命令
+
+#### Local
+
+```bash
+python -m easy_rec.python.train_eval --pipeline_config_path dwd_avazu_ctr_deepmodel.config
+```
+
+- --pipeline_config_path: config文件路径
+- --continue_train: restore之前的checkpoint，继续训练
+- --model_dir: 如果指定了model_dir将会覆盖config里面的model_dir，一般在周期性调度的时候使用
+- --edit_config_json: 使用json的方式对config的一些字段进行修改，如:
+  ```sql
+  --edit_config_json='{"train_config.fine_tune_checkpoint": "oss://easyrec/model.ckpt-50"}'
+  ```
+
+#### On PAI
+
+```sql
+pai -name easy_rec_ext -project algo_public
+-Dconfig=oss://easyrec/easy_rec_test/dwd_avazu_ctr_deepmodel_ext.config
+-Dcmd=train
+-Dtables=odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_train,odps://pai_online_project/tables/dwd_avazu_ctr_deepmodel_test
+-Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000, "gpu":100, "memory":40000}}'
+-Darn=acs:ram::xxx:role/ev-ext-test-oss
+-Dbuckets=oss://easyrec/
+-DossHost=oss-cn-beijing-internal.aliyuncs.com
+-Dwith_evaluator=1;
+```
+
+- -Dtrain_tables: 训练表，可以指定多个，逗号分隔
+- -Deval_tables: 评估表，可以指定多个，逗号分隔
+- -Dcluster: 定义PS的数目和worker的数目，如果设置了--eval_method=separate，有一个worker将被用于做评估
+- -Dconfig: 训练用的配置文件
+- -Dcmd: train   模型训练
+- -Deval_method: 训练时需要评估, 可选参数:
+  - separate: 有一个worker被单独用来做评估(不参与训练)
+  - none: 不需要评估
+  - master: 在master结点上做评估，master结点也参与训练
+- -Darn: rolearn  注意这个的arn要替换成客户自己的。可以从dataworks的设置中查看arn。
+- -DossHost: ossHost地址
+- -Dbuckets: config所在的bucket和保存模型的bucket; 如果有多个bucket，逗号分割
+- -Dmodel_dir: 如果指定了model_dir将会覆盖config里面的model_dir，一般在周期性调度的时候使用。
+- -Dedit_config_json: 使用json的方式对config的一些字段进行修改，如:
+  ```sql
+  -Dedit_config_json='{"train_config.fine_tune_checkpoint": "oss://easyrec/model.ckpt-50"}'
+  ```
+- 如果是pai内部版,则不需要指定arn和ossHost, arn和ossHost放在-Dbuckets里面
+  - -Dbuckets=oss://easyrec/?role_arn=acs:ram::xxx:role/ev-ext-test-oss&host=oss-cn-beijing-internal.aliyuncs.com
+
+#### On EMR
+
+单机单卡模式:
+
+```bash
+el_submit -t standalone -a easy_rec_train -f dwd_avazu_ctr_deepmodel.config -m local  -wn 1 -wc 6 -wm 20000  -wg 1 -c "python -m easy_rec.python.train_eval --pipeline_config_path dwd_avazu_ctr_deepmodel.config --continue_train"
+```
+
+- 参数同Local模式
+
+多worker模式:
+
+- 需要在配置文件中设置train_config.train_distribute为MultiWorkerMirroredStrategy
+
+```bash
+el_submit -t standalone -a easy_rec_train -f dwd_avazu_ctr_deepmodel.config -m local  -wn 1 -wc 6 -wm 20000  -wg 2 -c "python -m easy_rec.python.train_eval --pipeline_config_path dwd_avazu_ctr_deepmodel.config --continue_train"
+```
+
+- 参数同Local模式
+
+PS模式:
+
+- 需要在配置文件中设置train_config.sync_replicas为true
+
+```bash
+el_submit -t tensorflow-ps -a easy_rec_train -f dwd_avazu_ctr_deepmodel.config -m local -pn 1 -pc 4 -pm 20000 -wn 3 -wc 6 -wm 20000 -c "python -m easy_rec.python.train_eval --pipeline_config_path dwd_avazu_ctr_deepmodel.config --continue_train"
+```
+
+- 参数同Local模式
diff --git a/easy_rec/__init__.py b/easy_rec/__init__.py
index ebe1d26d1..f25fff05f 100644
--- a/easy_rec/__init__.py
+++ b/easy_rec/__init__.py
@@ -4,6 +4,8 @@
 import os
 import sys
 
+import tensorflow as tf
+
 from easy_rec.version import __version__
 
 curr_dir, _ = os.path.split(__file__)
@@ -18,11 +20,22 @@
 from easy_rec.python.main import export  # isort:skip  # noqa: E402
 from easy_rec.python.main import train_and_evaluate  # isort:skip  # noqa: E402
 
+try:
+  import tensorflow_io.oss
+except Exception:
+  pass
+
 print('easy_rec version: %s' % __version__)
 print('Usage: easy_rec.help()')
 
 _global_config = {}
 
+ops_dir = os.path.join(curr_dir, 'python/ops')
+if tf.__version__.startswith('1.12'):
+  ops_dir = os.path.join(ops_dir, '1.12')
+elif tf.__version__.startswith('1.15'):
+  ops_dir = os.path.join(ops_dir, '1.15')
+
 
 def help():
   print("""
diff --git a/easy_rec/python/builders/hyperparams_builder.py b/easy_rec/python/builders/hyperparams_builder.py
index 0bc0fb5f2..24013098d 100644
--- a/easy_rec/python/builders/hyperparams_builder.py
+++ b/easy_rec/python/builders/hyperparams_builder.py
@@ -69,4 +69,7 @@ def build_initializer(initializer):
         stddev=initializer.random_normal_initializer.stddev)
   if initializer_oneof == 'glorot_normal_initializer':
     return tf.glorot_normal_initializer()
+  if initializer_oneof == 'constant_initializer':
+    return tf.constant_initializer(
+        [x for x in initializer.constant_initializer.consts])
   raise ValueError('Unknown initializer function: {}'.format(initializer_oneof))
diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py
new file mode 100644
index 000000000..d984eb08f
--- /dev/null
+++ b/easy_rec/python/builders/loss_builder.py
@@ -0,0 +1,84 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+import tensorflow as tf
+
+from easy_rec.python.protos.loss_pb2 import LossType
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+def build(loss_type, label, pred, loss_weight=1.0, num_class=1):
+  if loss_type == LossType.CLASSIFICATION:
+    if num_class == 1:
+      return tf.losses.sigmoid_cross_entropy(
+          label, logits=pred, weights=loss_weight)
+    else:
+      return tf.losses.sparse_softmax_cross_entropy(
+          labels=label, logits=pred, weights=loss_weight)
+  elif loss_type == LossType.CROSS_ENTROPY_LOSS:
+    return tf.losses.log_loss(label, pred, weights=loss_weight)
+  elif loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]:
+    logging.info('%s is used' % LossType.Name(loss_type))
+    return tf.losses.mean_squared_error(
+        labels=label, predictions=pred, weights=loss_weight)
+  else:
+    raise ValueError('invalid loss type: %s' % LossType.Name(loss_type))
+
+
+def build_kd_loss(kds, prediction_dict, label_dict):
+  """Build knowledge distillation loss.
+
+  Args:
+    kds: list of knowledge distillation object of type KD.
+    prediction_dict: dict of predict_name to predict tensors.
+    label_dict: ordered dict of label_name to label tensors.
+
+  Return:
+    knowledge distillation loss will be add to loss_dict with key: kd_loss.
+  """
+  loss_dict = {}
+  for kd in kds:
+    assert kd.pred_name in prediction_dict, \
+        'invalid predict_name: %s available ones: %s' % (
+            kd.pred_name, ','.join(prediction_dict.keys()))
+
+    loss_name = kd.loss_name
+    if not loss_name:
+      loss_name = 'kd_loss_' + kd.pred_name.replace('/', '_')
+      loss_name += '_' + kd.soft_label_name.replace('/', '_')
+
+    label = label_dict[kd.soft_label_name]
+    pred = prediction_dict[kd.pred_name]
+
+    if kd.loss_type == LossType.CROSS_ENTROPY_LOSS:
+      if not kd.label_is_logits:
+        label = tf.math.log(label + 1e-7)
+      if not kd.pred_is_logits:
+        pred = tf.math.log(pred + 1e-7)
+
+    if kd.temperature > 0 and kd.loss_type == LossType.CROSS_ENTROPY_LOSS:
+      label = label / kd.temperature
+      pred = pred / kd.temperature
+
+    if kd.loss_type == LossType.CROSS_ENTROPY_LOSS:
+      num_class = 1 if len(pred.get_shape()) < 2 else pred.get_shape()[-1]
+      if num_class > 1:
+        label = tf.nn.softmax(label)
+        pred = tf.nn.softmax(pred)
+      elif num_class == 1:
+        label = tf.nn.sigmoid(label)
+        pred = tf.nn.sigmoid(label)
+
+    if kd.loss_type == LossType.CROSS_ENTROPY_LOSS:
+      loss_dict[loss_name] = tf.losses.log_loss(
+          label, pred, weights=kd.loss_weight)
+    elif kd.loss_type == LossType.L2_LOSS:
+      loss_dict[loss_name] = tf.losses.mean_squared_error(
+          labels=label, predictions=pred, weights=kd.loss_weight)
+    else:
+      assert False, 'unsupported loss type for kd: %s' % LossType.Name(
+          kd.loss_type)
+  return loss_dict
diff --git a/easy_rec/python/builders/optimizer_builder.py b/easy_rec/python/builders/optimizer_builder.py
index cbb1c4599..dfc52d9ea 100644
--- a/easy_rec/python/builders/optimizer_builder.py
+++ b/easy_rec/python/builders/optimizer_builder.py
@@ -14,8 +14,11 @@
 # limitations under the License.
 # ==============================================================================
 """Functions to build training optimizers."""
+import logging
+
 import tensorflow as tf
 
+from easy_rec.python.compat import weight_decay_optimizers
 from easy_rec.python.core import learning_schedules
 
 if tf.__version__ >= '2.0':
@@ -62,6 +65,40 @@ def build(optimizer_config):
     optimizer = tf.train.AdamOptimizer(
         learning_rate, beta1=config.beta1, beta2=config.beta2)
 
+  if optimizer_type == 'adamw_optimizer':
+    config = optimizer_config.adamw_optimizer
+    learning_rate = _create_learning_rate(config.learning_rate)
+    summary_vars.append(learning_rate)
+    logging.info('adamw_optimizer weight_decay = %.8f' % config.weight_decay)
+    optimizer = weight_decay_optimizers.AdamWOptimizer(
+        weight_decay=config.weight_decay,
+        learning_rate=learning_rate,
+        beta1=config.beta1,
+        beta2=config.beta2)
+
+  if optimizer_type == 'adam_asyncw_optimizer':
+    config = optimizer_config.adam_asyncw_optimizer
+    learning_rate = _create_learning_rate(config.learning_rate)
+    summary_vars.append(learning_rate)
+    logging.info('adam_asyncw_optimizer weight_decay = %.8f' %
+                 config.weight_decay)
+    optimizer = weight_decay_optimizers.AdamAsyncWOptimizer(
+        weight_decay=config.weight_decay,
+        learning_rate=learning_rate,
+        beta1=config.beta1,
+        beta2=config.beta2)
+
+  if optimizer_type == 'momentumw_optimizer':
+    config = optimizer_config.momentumw_optimizer
+    learning_rate = _create_learning_rate(config.learning_rate)
+    summary_vars.append(learning_rate)
+    logging.info('momentumw_optimizer weight_decay = %.8f' %
+                 config.weight_decay)
+    optimizer = weight_decay_optimizers.MomentumWOptimizer(
+        weight_decay=config.weight_decay,
+        learning_rate=learning_rate,
+        momentum=config.momentum_optimizer_value)
+
   if optimizer_type == 'adagrad_optimizer':
     config = optimizer_config.adagrad_optimizer
     learning_rate = _create_learning_rate(config.learning_rate)
@@ -78,6 +115,10 @@ def build(optimizer_config):
   if optimizer is None:
     raise ValueError('Optimizer %s not supported.' % optimizer_type)
 
+  if optimizer_config.use_moving_average:
+    optimizer = tf.contrib.opt.MovingAverageOptimizer(
+        optimizer, average_decay=optimizer_config.moving_average_decay)
+
   return optimizer, summary_vars
 
 
diff --git a/easy_rec/python/builders/strategy_builder.py b/easy_rec/python/builders/strategy_builder.py
index c38969373..66c1a8b73 100644
--- a/easy_rec/python/builders/strategy_builder.py
+++ b/easy_rec/python/builders/strategy_builder.py
@@ -34,5 +34,8 @@ def build(train_config):
         num_gpus_per_worker=train_config.num_gpus_per_worker)
   # works under tf1.15 and tf2.x
   elif train_config.train_distribute == DistributionStrategy.PSStrategy:
-    distribution = tf.distribute.experimental.ParameterServerStrategy()
+    if tf.__version__ <= '1.15':
+      distribution = tf.contrib.distribute.ParameterServerStrategy()
+    else:
+      distribution = tf.distribute.experimental.ParameterServerStrategy()
   return distribution
diff --git a/easy_rec/python/compat/exporter.py b/easy_rec/python/compat/exporter.py
index a1f184638..d8e3ed418 100644
--- a/easy_rec/python/compat/exporter.py
+++ b/easy_rec/python/compat/exporter.py
@@ -31,6 +31,8 @@
 from tensorflow.python.platform import tf_logging
 from tensorflow.python.summary import summary_iterator
 
+from easy_rec.python.utils import io_util
+
 
 def _loss_smaller(best_eval_result, current_eval_result):
   """Compares two evaluation results and returns true if the 2nd one is smaller.
@@ -76,6 +78,13 @@ def _verify_compare_fn_args(compare_fn):
                      (compare_fn, non_valid_args))
 
 
+def _get_ckpt_version(path):
+  _, tmp_name = os.path.split(path)
+  tmp_name, _ = os.path.splitext(tmp_name)
+  ver = tmp_name.split('-')[-1]
+  return int(ver)
+
+
 class BestExporter(Exporter):
   """This class exports the serving graph and checkpoints of the best models.
 
@@ -215,18 +224,54 @@ def export(self, estimator, export_path, checkpoint_path, eval_result,
       self._garbage_collect_exports(export_path)
       # cp best checkpoints to best folder
       model_dir, _ = os.path.split(checkpoint_path)
-      best_dir = os.path.join(model_dir, 'best_ckpt')
+      # add / is to be compatiable with oss
+      best_dir = os.path.join(model_dir, 'best_ckpt/')
       tf_logging.info('Copy best checkpoint %s to %s' %
                       (checkpoint_path, best_dir))
-      if gfile.Exists(best_dir):
-        gfile.DeleteRecursively(best_dir)
-      gfile.MakeDirs(best_dir)
+      if not gfile.Exists(best_dir):
+        gfile.MakeDirs(best_dir)
       for tmp_file in gfile.Glob(checkpoint_path + '.*'):
         _, file_name = os.path.split(tmp_file)
-        gfile.Copy(tmp_file, os.path.join(best_dir, file_name))
+        # skip temporary files
+        if 'tempstate' in file_name:
+          continue
+        dst_path = os.path.join(best_dir, file_name)
+        tf_logging.info('Copy file %s to %s' % (tmp_file, dst_path))
+        try:
+          gfile.Copy(tmp_file, dst_path)
+        except Exception as ex:
+          tf_logging.warn('Copy file %s to %s failed:  %s' %
+                          (tmp_file, dst_path, str(ex)))
+      self._garbage_collect_ckpts(best_dir)
 
     return export_result
 
+  def _garbage_collect_ckpts(self, best_dir):
+    """Deletes older best ckpts, retaining only a given number of the most recent.
+
+    Args:
+      best_dir: the directory where the n best ckpts are saved.
+    """
+    if self._exports_to_keep is None:
+      return
+
+    # delete older checkpoints
+    tmp_files = gfile.Glob(os.path.join(best_dir, 'model.ckpt-*.meta'))
+    if len(tmp_files) <= self._exports_to_keep:
+      return
+
+    tmp_steps = [_get_ckpt_version(x) for x in tmp_files]
+    tmp_steps = sorted(tmp_steps)
+    drop_num = len(tmp_steps) - self._exports_to_keep
+    tf_logging.info(
+        'garbage_collect_ckpts: steps: %s export_to_keep: %d drop num: %d' %
+        (str(tmp_steps), self._exports_to_keep, drop_num))
+    for ver in tmp_steps[:drop_num]:
+      tmp_prefix = os.path.join(best_dir, 'model.ckpt-%d.*' % ver)
+      for tmp_file in gfile.Glob(tmp_prefix):
+        tf_logging.info('Remove ckpt file: ' + tmp_file)
+        gfile.Remove(tmp_file)
+
   def _garbage_collect_exports(self, export_dir_base):
     """Deletes older exports, retaining only a given number of the most recent.
 
@@ -253,7 +298,7 @@ def _export_version_parser(path):
     for p in delete_filter(
         gc._get_paths(export_dir_base, parser=_export_version_parser)):
       try:
-        gfile.DeleteRecursively(p.path)
+        gfile.DeleteRecursively(io_util.fix_oss_dir(p.path))
       except errors_impl.NotFoundError as e:
         tf_logging.warn('Can not delete %s recursively: %s', p.path, e)
     # pylint: enable=protected-access
@@ -422,7 +467,7 @@ def _export_version_parser(path):
     for p in delete_filter(
         gc._get_paths(export_dir_base, parser=_export_version_parser)):
       try:
-        gfile.DeleteRecursively(p.path)
+        gfile.DeleteRecursively(io_util.fix_oss_dir(p.path))
       except errors_impl.NotFoundError as e:
         tf_logging.warn('Can not delete %s recursively: %s', p.path, e)
     # pylint: enable=protected-access
diff --git a/easy_rec/python/compat/feature_column/feature_column.py b/easy_rec/python/compat/feature_column/feature_column.py
index 6123649f4..dff24efe4 100644
--- a/easy_rec/python/compat/feature_column/feature_column.py
+++ b/easy_rec/python/compat/feature_column/feature_column.py
@@ -896,7 +896,7 @@ def model_fn(features, ...):
                          categorical_column.name))
   if initializer is None:
     initializer = init_ops.truncated_normal_initializer(
-        mean=0.0, stddev=1 / math.sqrt(dimension))
+        mean=0.0, stddev=0.01 / math.sqrt(dimension))
 
   embedding_shape = categorical_column._num_buckets, dimension  # pylint: disable=protected-access
 
@@ -2583,10 +2583,19 @@ def _get_dense_tensor_internal(self,
               partitioner=self.partitioner,
               collections=weight_collections)
         else:
+          # at eval or inference time, it is necessary to set
+          # the initializers to zeros, so that new key will
+          # get zero embedding
+          import os
+          if os.environ.get('tf.estimator.mode', '') != \
+             os.environ.get('tf.estimator.ModeKeys.TRAIN', 'train'):
+            initializer = init_ops.zeros_initializer()
+          else:
+            initializer = self.initializer
           embedding_weights = variable_scope.get_embedding_variable(
               name='embedding_weights',
               embedding_dim=self.dimension,
-              initializer=self.initializer,
+              initializer=initializer,
               trainable=self.trainable and trainable,
               partitioner=self.partitioner,
               collections=weight_collections)
diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py
index 0dd0ef28c..57a1e85e9 100644
--- a/easy_rec/python/compat/feature_column/feature_column_v2.py
+++ b/easy_rec/python/compat/feature_column/feature_column_v2.py
@@ -898,7 +898,7 @@ def model_fn(features, ...):
                          categorical_column.name))
   if initializer is None:
     initializer = init_ops.truncated_normal_initializer(
-        mean=0.0, stddev=1 / math.sqrt(dimension))
+        mean=0.0, stddev=0.01 / math.sqrt(dimension))
 
   return EmbeddingColumn(
       categorical_column=categorical_column,
@@ -1034,7 +1034,7 @@ def model_fn(features, ...):
     raise ValueError('initializer must be callable if specified.')
   if initializer is None:
     initializer = init_ops.truncated_normal_initializer(
-        mean=0.0, stddev=1. / math.sqrt(dimension))
+        mean=0.0, stddev=0.01 / math.sqrt(dimension))
 
   # Sort the columns so the default collection name is deterministic even if the
   # user passes columns from an unsorted collection, such as dict.values().
@@ -1207,7 +1207,7 @@ def model_fn(features, ...):
     raise ValueError('initializer must be callable if specified.')
   if initializer is None:
     initializer = init_ops.truncated_normal_initializer(
-        mean=0.0, stddev=1. / math.sqrt(dimension))
+        mean=0.0, stddev=0.01 / math.sqrt(dimension))
 
   # Sort the columns so the default collection name is deterministic even if the
   # user passes columns from an unsorted collection, such as dict.values().
@@ -3087,10 +3087,19 @@ def _old_get_dense_tensor_internal(self, sparse_tensors, weight_collections,
           partitioner=self.partitioner,
           collections=weight_collections)
     else:
+      # at eval or inference time, it is necessary to set
+      # the initializers to zeros, so that new key will
+      # get zero embedding
+      import os
+      if os.environ.get('tf.estimator.mode', '') != \
+         os.environ.get('tf.estimator.ModeKeys.TRAIN', 'train'):
+        initializer = init_ops.zeros_initializer()
+      else:
+        initializer = self.initializer
       embedding_weights = variable_scope.get_embedding_variable(
           name='embedding_weights',
           embedding_dim=self.dimension,
-          initializer=self.initializer,
+          initializer=initializer,
           trainable=self.trainable and trainable,
           partitioner=self.partitioner,
           collections=weight_collections)
diff --git a/easy_rec/python/compat/optimizers.py b/easy_rec/python/compat/optimizers.py
index 5796033bd..21fede4b8 100644
--- a/easy_rec/python/compat/optimizers.py
+++ b/easy_rec/python/compat/optimizers.py
@@ -74,6 +74,7 @@ def optimize_loss(loss,
                   name=None,
                   summaries=None,
                   colocate_gradients_with_ops=False,
+                  not_apply_grad_after_first_step=False,
                   increment_global_step=True):
   """Given loss and parameters for optimizer, returns a training op.
 
@@ -139,6 +140,8 @@ def optimize_loss(loss,
       OPTIMIZER_SUMMARIES.
     colocate_gradients_with_ops: If True, try colocating gradients with the
       corresponding op.
+    not_apply_grad_after_first_step: If true, do not apply gradient apply gradient
+      after first step, for chief_redundant.
     increment_global_step: Whether to increment `global_step`. If your model
       calls `optimize_loss` multiple times per training step (e.g. to optimize
       different parts of the model), use this arg to avoid incrementing
@@ -296,13 +299,19 @@ def optimize_loss(loss,
                      clip_ops.global_norm(list(zip(*gradients))[0]))
 
     # Create gradient updates.
-    grad_updates = opt.apply_gradients(
-        gradients,
-        global_step=global_step if increment_global_step else None,
-        name='train')
-
-    # Ensure the train_tensor computes grad_updates.
-    train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)
+    def _apply_grad():
+      grad_updates = opt.apply_gradients(
+          gradients,
+          global_step=global_step if increment_global_step else None,
+          name='train')
+      return control_flow_ops.with_dependencies([grad_updates], loss)
+
+    if not_apply_grad_after_first_step:
+      train_tensor = control_flow_ops.cond(global_step > 0, lambda: loss,
+                                           _apply_grad)
+    else:
+      # Ensure the train_tensor computes grad_updates.
+      train_tensor = _apply_grad()
 
     return train_tensor
 
diff --git a/easy_rec/python/compat/weight_decay_optimizers.py b/easy_rec/python/compat/weight_decay_optimizers.py
new file mode 100755
index 000000000..d29dce5bb
--- /dev/null
+++ b/easy_rec/python/compat/weight_decay_optimizers.py
@@ -0,0 +1,475 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base class to make optimizers weight decay ready."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.training import adam
+from tensorflow.python.training import momentum as momentum_opt
+from tensorflow.python.training import optimizer
+from tensorflow.python.util.tf_export import tf_export
+
+
+class DecoupledWeightDecayExtension(object):
+  """This class allows to extend optimizers with decoupled weight decay.
+
+  It implements the decoupled weight decay described by Loshchilov & Hutter
+  (https://arxiv.org/pdf/1711.05101.pdf), in which the weight decay is
+  decoupled from the optimization steps w.r.t. to the loss function.
+  For SGD variants, this simplifies hyperparameter search since it decouples
+  the settings of weight decay and learning rate.
+  For adaptive gradient algorithms, it regularizes variables with large
+  gradients more than L2 regularization would, which was shown to yield better
+  training loss and generalization error in the paper above.
+
+  This class alone is not an optimizer but rather extends existing
+  optimizers with decoupled weight decay. We explicitly define the two examples
+  used in the above paper (SGDW and AdamW), but in general this can extend
+  any OptimizerX by using
+  `extend_with_weight_decay(OptimizerX, weight_decay=weight_decay)`.
+  In order for it to work, it must be the first class the Optimizer with
+  weight decay inherits from, e.g.
+
+  ```python
+  class AdamWOptimizer(DecoupledWeightDecayExtension, adam.AdamOptimizer):
+    def __init__(self, weight_decay, *args, **kwargs):
+      super(AdamWOptimizer, self).__init__(weight_decay, *args, **kwargs).
+  ```
+
+  Note that this extension decays weights BEFORE applying the update based
+  on the gradient, i.e. this extension only has the desired behaviour for
+  optimizers which do not depend on the value of'var' in the update step!
+
+  Note: when applying a decay to the learning rate, be sure to manually apply
+  the decay to the `weight_decay` as well. For example:
+
+  ```python
+    schedule =
+    tf.compat.v1.train.piecewise_constant(tf.compat.v1.train.get_global_step(),
+                                           [10000, 15000], [1e-0, 1e-1, 1e-2])
+    lr = 1e-1 * schedule()
+    wd = lambda: 1e-4 * schedule()
+
+    # ...
+
+    optimizer = tf.contrib.opt.MomentumWOptimizer(learning_rate=lr,
+                                                  weight_decay=wd,
+                                                  momentum=0.9,
+                                                  use_nesterov=True)
+  ```
+  """
+
+  def __init__(self, weight_decay, **kwargs):
+    """Construct the extension class that adds weight decay to an optimizer.
+
+    Args:
+      weight_decay: A `Tensor` or a floating point value, the factor by which a
+        variable is decayed in the update step.
+      **kwargs: Optional list or tuple or set of `Variable` objects to decay.
+    """
+    self._decay_var_list = None  # is set in minimize or apply_gradients
+    self._weight_decay = weight_decay
+    # The tensors are initialized in call to _prepare
+    self._weight_decay_tensor = None
+    super(DecoupledWeightDecayExtension, self).__init__(**kwargs)
+
+  def minimize(self,
+               loss,
+               global_step=None,
+               var_list=None,
+               gate_gradients=optimizer.Optimizer.GATE_OP,
+               aggregation_method=None,
+               colocate_gradients_with_ops=False,
+               name=None,
+               grad_loss=None,
+               decay_var_list=None):
+    """Add operations to minimize `loss` by updating `var_list` with decay.
+
+    This function is the same as Optimizer.minimize except that it allows to
+    specify the variables that should be decayed using decay_var_list.
+    If decay_var_list is None, all variables in var_list are decayed.
+
+    For more information see the documentation of Optimizer.minimize.
+
+    Args:
+      loss: A `Tensor` containing the value to minimize.
+      global_step: Optional `Variable` to increment by one after the variables
+        have been updated.
+      var_list: Optional list or tuple of `Variable` objects to update to
+        minimize `loss`.  Defaults to the list of variables collected in the
+        graph under the key `GraphKeys.TRAINABLE_VARIABLES`.
+      gate_gradients: How to gate the computation of gradients.  Can be
+        `GATE_NONE`, `GATE_OP`, or  `GATE_GRAPH`.
+      aggregation_method: Specifies the method used to combine gradient terms.
+        Valid values are defined in the class `AggregationMethod`.
+      colocate_gradients_with_ops: If True, try colocating gradients with the
+        corresponding op.
+      name: Optional name for the returned operation.
+      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+      decay_var_list: Optional list of decay variables.
+
+    Returns:
+      An Operation that updates the variables in `var_list`.  If `global_step`
+      was not `None`, that operation also increments `global_step`.
+    """
+    self._decay_var_list = set(decay_var_list) if decay_var_list else False
+    return super(DecoupledWeightDecayExtension, self).minimize(
+        loss,
+        global_step=global_step,
+        var_list=var_list,
+        gate_gradients=gate_gradients,
+        aggregation_method=aggregation_method,
+        colocate_gradients_with_ops=colocate_gradients_with_ops,
+        name=name,
+        grad_loss=grad_loss)
+
+  def apply_gradients(self,
+                      grads_and_vars,
+                      global_step=None,
+                      name=None,
+                      decay_var_list=None):
+    """Apply gradients to variables and decay the variables.
+
+    This function is the same as Optimizer.apply_gradients except that it
+    allows to specify the variables that should be decayed using
+    decay_var_list. If decay_var_list is None, all variables in var_list
+    are decayed.
+
+    For more information see the documentation of Optimizer.apply_gradients.
+
+    Args:
+      grads_and_vars: List of (gradient, variable) pairs as returned by
+        `compute_gradients()`.
+      global_step: Optional `Variable` to increment by one after the variables
+        have been updated.
+      name: Optional name for the returned operation.  Default to the name
+        passed to the `Optimizer` constructor.
+      decay_var_list: Optional list of decay variables.
+
+    Returns:
+      An `Operation` that applies the specified gradients. If `global_step`
+      was not None, that operation also increments `global_step`.
+    """
+    self._decay_var_list = set(decay_var_list) if decay_var_list else False
+    return super(DecoupledWeightDecayExtension, self).apply_gradients(
+        grads_and_vars, global_step=global_step, name=name)
+
+  def _prepare(self):
+    weight_decay = self._weight_decay
+    if callable(weight_decay):
+      weight_decay = weight_decay()
+    self._weight_decay_tensor = ops.convert_to_tensor(
+        weight_decay, name='weight_decay')
+    # Call the optimizers _prepare function.
+    super(DecoupledWeightDecayExtension, self)._prepare()
+
+  def _decay_weights_op(self, var):
+    if not self._decay_var_list or var in self._decay_var_list:
+      return var.assign_sub(self._weight_decay * var, self._use_locking)
+    return control_flow_ops.no_op()
+
+  def _decay_weights_sparse_op(self, var, indices, scatter_add):
+    if not self._decay_var_list or var in self._decay_var_list:
+      update = -self._weight_decay * array_ops.gather(var, indices)
+      return scatter_add(var, indices, update, self._use_locking)
+    return control_flow_ops.no_op()
+
+  # Here, we overwrite the apply functions that the base optimizer calls.
+  # super().apply_x resolves to the apply_x function of the BaseOptimizer.
+  def _apply_dense(self, grad, var):
+    with ops.control_dependencies([self._decay_weights_op(var)]):
+      return super(DecoupledWeightDecayExtension, self)._apply_dense(grad, var)
+
+  def _resource_apply_dense(self, grad, var):
+    with ops.control_dependencies([self._decay_weights_op(var)]):
+      return super(DecoupledWeightDecayExtension,
+                   self)._resource_apply_dense(grad, var)
+
+  def _apply_sparse(self, grad, var):
+    scatter_add = state_ops.scatter_add
+    decay_op = self._decay_weights_sparse_op(var, grad.indices, scatter_add)
+    with ops.control_dependencies([decay_op]):
+      return super(DecoupledWeightDecayExtension, self)._apply_sparse(grad, var)
+
+  def _resource_scatter_add(self, x, i, v, _=None):
+    # last argument allows for one overflow argument, to have the same function
+    # signature as state_ops.scatter_add
+    with ops.control_dependencies(
+        [resource_variable_ops.resource_scatter_add(x.handle, i, v)]):
+      return x.value()
+
+  def _resource_apply_sparse(self, grad, var, indices):
+    scatter_add = self._resource_scatter_add
+    decay_op = self._decay_weights_sparse_op(var, indices, scatter_add)
+    with ops.control_dependencies([decay_op]):
+      return super(DecoupledWeightDecayExtension,
+                   self)._resource_apply_sparse(grad, var, indices)
+
+
+def extend_with_decoupled_weight_decay(base_optimizer):
+  """Factory function returning an optimizer class with decoupled weight decay.
+
+  Returns an optimizer class. An instance of the returned class computes the
+  update step of `base_optimizer` and additionally decays the weights.
+  E.g., the class returned by
+  `extend_with_decoupled_weight_decay(tf.compat.v1.train.AdamOptimizer)` is
+  equivalent to
+  `tf.contrib.opt.AdamWOptimizer`.
+
+  The API of the new optimizer class slightly differs from the API of the
+  base optimizer:
+  - The first argument to the constructor is the weight decay rate.
+  - `minimize` and `apply_gradients` accept the optional keyword argument
+    `decay_var_list`, which specifies the variables that should be decayed.
+    If `None`, all variables that are optimized are decayed.
+
+  Usage example:
+  ```python
+  # MyAdamW is a new class
+  MyAdamW = extend_with_decoupled_weight_decay(tf.compat.v1.train.AdamOptimizer)
+  # Create a MyAdamW object
+  optimizer = MyAdamW(weight_decay=0.001, learning_rate=0.001)
+  sess.run(optimizer.minimize(loss, decay_variables=[var1, var2]))
+
+  Note that this extension decays weights BEFORE applying the update based
+  on the gradient, i.e. this extension only has the desired behaviour for
+  optimizers which do not depend on the value of'var' in the update step!
+  ```
+
+  Args:
+    base_optimizer: An optimizer class that inherits from tf.train.Optimizer.
+
+  Returns:
+    A new optimizer class that inherits from DecoupledWeightDecayExtension
+    and base_optimizer.
+  """
+
+  class OptimizerWithDecoupledWeightDecay(DecoupledWeightDecayExtension,
+                                          base_optimizer):
+    """Base_optimizer with decoupled weight decay.
+
+    This class computes the update step of `base_optimizer` and
+    additionally decays the variable with the weight decay being decoupled from
+    the optimization steps w.r.t. to the loss function, as described by
+    Loshchilov & Hutter (https://arxiv.org/pdf/1711.05101.pdf).
+    For SGD variants, this simplifies hyperparameter search since
+    it decouples the settings of weight decay and learning rate.
+    For adaptive gradient algorithms, it regularizes variables with large
+    gradients more than L2 regularization would, which was shown to yield
+    better training loss and generalization error in the paper above.
+    """
+
+    def __init__(self, weight_decay, *args, **kwargs):
+      # super delegation is necessary here
+      # pylint: disable=useless-super-delegation
+      super(OptimizerWithDecoupledWeightDecay,
+            self).__init__(weight_decay, *args, **kwargs)
+      # pylint: enable=useless-super-delegation
+
+  return OptimizerWithDecoupledWeightDecay
+
+
+@tf_export('contrib.opt.MomentumWOptimizer')
+class MomentumWOptimizer(DecoupledWeightDecayExtension,
+                         momentum_opt.MomentumOptimizer):
+  """Optimizer that implements the Momentum algorithm with weight_decay.
+
+  This is an implementation of the SGDW optimizer described in "Fixing
+  Weight Decay Regularization in Adam" by Loshchilov & Hutter
+  (https://arxiv.org/abs/1711.05101)
+  ([pdf])(https://arxiv.org/pdf/1711.05101.pdf).
+  It computes the update step of `train.MomentumOptimizer` and additionally
+  decays the variable. Note that this is different from adding
+  L2 regularization on the variables to the loss. Decoupling the weight decay
+  from other hyperparameters (in particular the learning rate) simplifies
+  hyperparameter search.
+
+  For further information see the documentation of the Momentum Optimizer.
+
+  Note that this optimizer can also be instantiated as
+  ```python
+  extend_with_weight_decay(tf.compat.v1.train.MomentumOptimizer,
+                           weight_decay=weight_decay)
+  ```
+  """
+
+  def __init__(self,
+               weight_decay,
+               learning_rate,
+               momentum,
+               use_locking=False,
+               name='MomentumW',
+               use_nesterov=False):
+    """Construct a new MomentumW optimizer.
+
+    For further information see the documentation of the Momentum Optimizer.
+
+    Args:
+      weight_decay:  A `Tensor` or a floating point value.  The weight decay.
+      learning_rate: A `Tensor` or a floating point value.  The learning rate.
+      momentum: A `Tensor` or a floating point value.  The momentum.
+      use_locking: If `True` use locks for update operations.
+      name: Optional name prefix for the operations created when applying
+        gradients.  Defaults to "Momentum".
+      use_nesterov: If `True` use Nesterov Momentum. See [Sutskever et al.,
+        2013](
+        http://jmlr.org/proceedings/papers/v28/sutskever13.pdf). This
+          implementation always computes gradients at the value of the
+          variable(s) passed to the optimizer. Using Nesterov Momentum makes the
+          variable(s) track the values called `theta_t + mu*v_t` in the paper.
+          @compatibility(eager) When eager execution is enabled, learning_rate,
+          weight_decay and momentum can each be a callable that takes no
+          arguments and returns the actual value to use. This can be useful for
+          changing these values across different invocations of optimizer
+          functions. @end_compatibility
+    """
+    super(MomentumWOptimizer, self).__init__(
+        weight_decay,
+        learning_rate=learning_rate,
+        momentum=momentum,
+        use_locking=use_locking,
+        name=name,
+        use_nesterov=use_nesterov)
+
+
+@tf_export('contrib.opt.AdamWOptimizer')
+class AdamWOptimizer(DecoupledWeightDecayExtension, adam.AdamOptimizer):
+  """Optimizer that implements the Adam algorithm with weight decay.
+
+  This is an implementation of the AdamW optimizer described in ["Fixing
+  Weight Decay Regularization in Adam" by Loshchilov & Hutter]
+  (https://arxiv.org/abs/1711.05101)
+  ([pdf](https://arxiv.org/pdf/1711.05101.pdf)).
+
+  It computes the update step of `train.AdamOptimizer` and additionally decays
+  the variable. Note that this is different from adding L2 regularization on
+  the variables to the loss: it regularizes variables with large
+  gradients more than L2 regularization would, which was shown to yield better
+  training loss and generalization error in the paper above.
+
+  For further information see the documentation of the Adam Optimizer.
+
+  Note that this optimizer can also be instantiated as
+  ```python
+  extend_with_weight_decay(tf.compat.v1.train.AdamOptimizer,
+  weight_decay=weight_decay)
+  ```
+  """
+
+  def __init__(self,
+               weight_decay,
+               learning_rate=0.001,
+               beta1=0.9,
+               beta2=0.999,
+               epsilon=1e-8,
+               use_locking=False,
+               name='AdamW'):
+    """Construct a new AdamW optimizer.
+
+    For further information see the documentation of the Adam Optimizer.
+
+    Args:
+      weight_decay:  A `Tensor` or a floating point value.  The weight decay.
+      learning_rate: A Tensor or a floating point value.  The learning rate.
+      beta1: A float value or a constant float tensor. The exponential decay
+        rate for the 1st moment estimates.
+      beta2: A float value or a constant float tensor. The exponential decay
+        rate for the 2nd moment estimates.
+      epsilon: A small constant for numerical stability. This epsilon is
+        "epsilon hat" in the Kingma and Ba paper (in the formula just before
+        Section 2.1), not the epsilon in Algorithm 1 of the paper.
+      use_locking: If True use locks for update operations.
+      name: Optional name for the operations created when applying gradients.
+        Defaults to "Adam".
+    """
+    super(AdamWOptimizer, self).__init__(
+        weight_decay,
+        learning_rate=learning_rate,
+        beta1=beta1,
+        beta2=beta2,
+        epsilon=epsilon,
+        use_locking=use_locking,
+        name=name)
+
+
+try:
+  from tensorflow.python.training import AdamAsyncOptimizer
+
+  @tf_export('contrib.opt.AdamAsyncWOptimizer')
+  class AdamAsyncWOptimizer(DecoupledWeightDecayExtension, AdamAsyncOptimizer):
+    """Optimizer that implements the Adam algorithm with weight decay.
+
+    This is an implementation of the AdamW optimizer described in ["Fixing
+    Weight Decay Regularization in Adam" by Loshchilov & Hutter]
+    (https://arxiv.org/abs/1711.05101)
+    ([pdf](https://arxiv.org/pdf/1711.05101.pdf)).
+
+    It computes the update step of `train.AdamOptimizer` and additionally decays
+    the variable. Note that this is different from adding L2 regularization on
+    the variables to the loss: it regularizes variables with large
+    gradients more than L2 regularization would, which was shown to yield better
+    training loss and generalization error in the paper above.
+
+    For further information see the documentation of the Adam Optimizer.
+
+    Note that this optimizer can also be instantiated as
+    ```python
+    extend_with_weight_decay(tf.compat.v1.train.AdamAsyncWOptimizer,
+    weight_decay=weight_decay)
+    ```
+    """
+
+    def __init__(self,
+                 weight_decay,
+                 learning_rate=0.001,
+                 beta1=0.9,
+                 beta2=0.999,
+                 epsilon=1e-8,
+                 use_locking=False,
+                 name='AdamAsyncW'):
+      """Construct a new AdamW optimizer.
+
+      For further information see the documentation of the Adam Optimizer.
+
+      Args:
+        weight_decay:  A `Tensor` or a floating point value.  The weight decay.
+        learning_rate: A Tensor or a floating point value.  The learning rate.
+        beta1: A float value or a constant float tensor. The exponential decay
+          rate for the 1st moment estimates.
+        beta2: A float value or a constant float tensor. The exponential decay
+          rate for the 2nd moment estimates.
+        epsilon: A small constant for numerical stability. This epsilon is
+          "epsilon hat" in the Kingma and Ba paper (in the formula just before
+          Section 2.1), not the epsilon in Algorithm 1 of the paper.
+        use_locking: If True use locks for update operations.
+        name: Optional name for the operations created when applying gradients.
+          Defaults to "Adam".
+      """
+      super(AdamAsyncWOptimizer, self).__init__(
+          weight_decay,
+          learning_rate=learning_rate,
+          beta1=beta1,
+          beta2=beta2,
+          epsilon=epsilon,
+          use_locking=use_locking,
+          name=name)
+except ImportError:
+  pass
diff --git a/easy_rec/python/core/metrics.py b/easy_rec/python/core/metrics.py
new file mode 100644
index 000000000..ee5acd2d6
--- /dev/null
+++ b/easy_rec/python/core/metrics.py
@@ -0,0 +1,129 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from collections import defaultdict
+
+import numpy as np
+import tensorflow as tf
+from sklearn import metrics as sklearn_metrics
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+def max_f1(label, predictions):
+  """Calculate the largest F1 metric under different thresholds.
+
+  Args:
+    label: Ground truth (correct) target values.
+    predictions: Estimated targets as returned by a model.
+  """
+  num_thresholds = 200
+  kepsilon = 1e-7
+  thresholds = [
+      (i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)
+  ]
+  thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon]
+
+  f1_scores = []
+  precision_update_ops = []
+  recall_update_ops = []
+  for threshold in thresholds:
+    pred = (predictions > threshold)
+    precision, precision_update_op = tf.metrics.precision(
+        labels=label, predictions=pred, name='precision_%s' % threshold)
+    recall, recall_update_op = tf.metrics.recall(
+        labels=label, predictions=pred, name='recall_%s' % threshold)
+    f1_score = (2 * precision * recall) / (precision + recall + 1e-12)
+    precision_update_ops.append(precision_update_op)
+    recall_update_ops.append(recall_update_op)
+    f1_scores.append(f1_score)
+
+  f1 = tf.math.reduce_max(tf.stack(f1_scores))
+  f1_update_op = tf.group(precision_update_ops + recall_update_ops)
+  return f1, f1_update_op
+
+
+def _separated_auc_impl(labels, predictions, keys, reduction='mean'):
+  """Computes the AUC group by the key separately.
+
+  Args:
+    labels: A `Tensor` whose shape matches `predictions`. Will be cast to
+      `bool`.
+    predictions: A floating point `Tensor` of arbitrary shape and whose values
+      are in the range `[0, 1]`.
+    keys: keys to be group by, A int or string `Tensor` whose shape matches `predictions`.
+    reduction: reduction metric for auc of different keys
+      * "mean": simple mean of different keys
+      * "mean_by_sample_num": weighted mean with sample num of different keys
+      * "mean_by_positive_num": weighted mean with positive sample num of different keys
+  """
+  assert reduction in ['mean', 'mean_by_sample_num', 'mean_by_positive_num'], \
+      'reduction method must in mean | mean_by_sample_num | mean_by_positive_num'
+  separated_label = defaultdict(list)
+  separated_prediction = defaultdict(list)
+  separated_weights = defaultdict(int)
+
+  def update_pyfunc(labels, predictions, keys):
+    for label, prediction, key in zip(labels, predictions, keys):
+      separated_label[key].append(label)
+      separated_prediction[key].append(prediction)
+      if reduction == 'mean':
+        separated_weights[key] = 1
+      elif reduction == 'mean_by_sample_num':
+        separated_weights[key] += 1
+      elif reduction == 'mean_by_positive_num':
+        separated_weights[key] += label
+
+  def value_pyfunc():
+    metrics = []
+    weights = []
+    for key in separated_label.keys():
+      per_label = np.asarray(separated_label[key]).reshape([-1])
+      per_prediction = np.asarray(separated_prediction[key]).reshape([-1])
+      if np.all(per_label == 1) or np.all(per_label == 0):
+        continue
+      metric = sklearn_metrics.roc_auc_score(per_label, per_prediction)
+      metrics.append(metric)
+      weights.append(separated_weights[key])
+    if len(metrics) > 0:
+      return np.average(metrics, weights=weights).astype(np.float32)
+    else:
+      return np.float32(0.0)
+
+  update_op = tf.py_func(update_pyfunc, [labels, predictions, keys], [])
+  value_op = tf.py_func(value_pyfunc, [], tf.float32)
+  return value_op, update_op
+
+
+def gauc(labels, predictions, uids, reduction='mean'):
+  """Computes the AUC group by user separately.
+
+  Args:
+    labels: A `Tensor` whose shape matches `predictions`. Will be cast to
+      `bool`.
+    predictions: A floating point `Tensor` of arbitrary shape and whose values
+      are in the range `[0, 1]`.
+    uids: user ids, A int or string `Tensor` whose shape matches `predictions`.
+    reduction: reduction method for auc of different users
+      * "mean": simple mean of different users
+      * "mean_by_sample_num": weighted mean with sample num of different users
+      * "mean_by_positive_num": weighted mean with positive sample num of different users
+  """
+  return _separated_auc_impl(labels, predictions, uids, reduction)
+
+
+def session_auc(labels, predictions, session_ids, reduction='mean'):
+  """Computes the AUC group by session separately.
+
+  Args:
+    labels: A `Tensor` whose shape matches `predictions`. Will be cast to
+      `bool`.
+    predictions: A floating point `Tensor` of arbitrary shape and whose values
+      are in the range `[0, 1]`.
+    session_ids: session ids, A int or string `Tensor` whose shape matches `predictions`.
+    reduction: reduction method for auc of different sessions
+      * "mean": simple mean of different sessions
+      * "mean_by_sample_num": weighted mean with sample num of different sessions
+      * "mean_by_positive_num": weighted mean with positive sample num of different sessions
+  """
+  return _separated_auc_impl(labels, predictions, session_ids, reduction)
diff --git a/easy_rec/python/core/sampler.py b/easy_rec/python/core/sampler.py
new file mode 100644
index 000000000..d5c857ced
--- /dev/null
+++ b/easy_rec/python/core/sampler.py
@@ -0,0 +1,576 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from __future__ import division
+from __future__ import print_function
+
+import json
+import logging
+import math
+import os
+import threading
+
+import numpy as np
+import tensorflow as tf
+
+from easy_rec.python.protos.dataset_pb2 import DatasetConfig
+
+try:
+  import graphlearn as gl
+except Exception:
+  logging.info(
+      'GraphLearn is not installed. You can install it by "pip install http://odps-release.cn-hangzhou.oss-cdn.aliyun-inc.com/graphlearn/tunnel/graphlearn-0.7-cp27-cp27mu-linux_x86_64.whl."'  # noqa: E501
+  )
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+def _get_gl_type(field_type):
+  type_map = {
+      DatasetConfig.INT32: 'int',
+      DatasetConfig.INT64: 'int',
+      DatasetConfig.STRING: 'string',
+      DatasetConfig.BOOL: 'int',
+      DatasetConfig.FLOAT: 'float',
+      DatasetConfig.DOUBLE: 'float'
+  }
+  assert field_type in type_map, 'invalid type: %s' % field_type
+  return type_map[field_type]
+
+
+def _get_np_type(field_type):
+  type_map = {
+      DatasetConfig.INT32: np.int32,
+      DatasetConfig.INT64: np.int64,
+      DatasetConfig.STRING: np.str,
+      DatasetConfig.BOOL: np.bool,
+      DatasetConfig.FLOAT: np.float32,
+      DatasetConfig.DOUBLE: np.double
+  }
+  assert field_type in type_map, 'invalid type: %s' % field_type
+  return type_map[field_type]
+
+
+def _get_tf_type(field_type):
+  type_map = {
+      DatasetConfig.INT32: tf.int32,
+      DatasetConfig.INT64: tf.int64,
+      DatasetConfig.STRING: tf.string,
+      DatasetConfig.BOOL: tf.bool,
+      DatasetConfig.FLOAT: tf.float32,
+      DatasetConfig.DOUBLE: tf.double
+  }
+  assert field_type in type_map, 'invalid type: %s' % field_type
+  return type_map[field_type]
+
+
+class BaseSampler(object):
+  _instance_lock = threading.Lock()
+
+  def __init__(self, fields, num_sample, num_eval_sample=None):
+    self._g = None
+    self._sampler = None
+    # TODO(hongsheng.jhs): check eval mode or not?
+    self._num_sample = num_sample
+    self._num_eval_sample = num_eval_sample if num_eval_sample else num_sample
+    self._build_field_types(fields)
+
+  def _init_graph(self):
+    if 'TF_CONFIG' in os.environ:
+      tf_config = json.loads(os.environ['TF_CONFIG'])
+      if 'ps' in tf_config['cluster']:
+        # ps mode
+        tf_config = json.loads(os.environ['TF_CONFIG'])
+        ps_count = len(tf_config['cluster']['ps'])
+        task_count = len(tf_config['cluster']['worker']) + 2
+        cluster = {'server_count': ps_count, 'client_count': task_count}
+        if tf_config['task']['type'] in ['chief', 'master']:
+          self._g.init(cluster=cluster, job_name='client', task_index=0)
+        elif tf_config['task']['type'] == 'worker':
+          self._g.init(
+              cluster=cluster,
+              job_name='client',
+              task_index=tf_config['task']['index'] + 2)
+        # TODO(hongsheng.jhs): check cluster has evaluator or not?
+        elif tf_config['task']['type'] == 'evaluator':
+          self._g.init(
+              cluster=cluster,
+              job_name='client',
+              task_index=tf_config['task']['index'] + 1)
+          if self._num_eval_sample is not None and self._num_eval_sample > 0:
+            self._num_sample = self._num_eval_sample
+        elif tf_config['task']['type'] == 'ps':
+          self._g.init(
+              cluster=cluster,
+              job_name='server',
+              task_index=tf_config['task']['index'])
+      else:
+        # worker mode
+        task_count = len(tf_config['cluster']['worker']) + 1
+        if tf_config['task']['type'] in ['chief', 'master']:
+          self._g.init(task_index=0, task_count=task_count)
+        elif tf_config['task']['type'] == 'worker':
+          self._g.init(
+              task_index=tf_config['task']['index'] + 1, task_count=task_count)
+        # TODO(hongsheng.jhs): check cluster has evaluator or not?
+    else:
+      # local mode
+      self._g.init()
+
+  def _build_field_types(self, fields):
+    self._attr_names = []
+    self._attr_types = []
+    self._attr_gl_types = []
+    self._attr_np_types = []
+    self._attr_tf_types = []
+    for i, field in enumerate(fields):
+      self._attr_names.append(field.input_name)
+      self._attr_types.append(field.input_type)
+      self._attr_gl_types.append(_get_gl_type(field.input_type))
+      self._attr_np_types.append(_get_np_type(field.input_type))
+      self._attr_tf_types.append(_get_tf_type(field.input_type))
+
+  @classmethod
+  def instance(cls, *args, **kwargs):
+    with cls._instance_lock:
+      if not hasattr(cls, '_instance'):
+        cls._instance = cls(*args, **kwargs)
+    return cls._instance
+
+  def __del__(self):
+    self._g.close()
+
+  def _parse_nodes(self, nodes):
+    features = []
+    int_idx = 0
+    float_idx = 0
+    string_idx = 0
+    for attr_gl_type, attr_np_type in zip(self._attr_gl_types,
+                                          self._attr_np_types):
+      if attr_gl_type == 'int':
+        feature = nodes.int_attrs[:, :, int_idx]
+        int_idx += 1
+      elif attr_gl_type == 'float':
+        feature = nodes.float_attrs[:, :, float_idx]
+        float_idx += 1
+      elif attr_gl_type == 'string':
+        feature = nodes.string_attrs[:, :, string_idx]
+        string_idx += 1
+      else:
+        raise ValueError('Unknown attr type %s' % attr_gl_type)
+      feature = np.reshape(feature,
+                           [-1])[:self._num_sample].astype(attr_np_type)
+      features.append(feature)
+    return features
+
+  def _parse_sparse_nodes(self, nodes):
+    features = []
+    int_idx = 0
+    float_idx = 0
+    string_idx = 0
+    for attr_gl_type, attr_np_type in zip(self._attr_gl_types,
+                                          self._attr_np_types):
+      if attr_gl_type == 'int':
+        feature = nodes.int_attrs[:, int_idx]
+        int_idx += 1
+      elif attr_gl_type == 'float':
+        feature = nodes.float_attrs[:, float_idx]
+        float_idx += 1
+      elif attr_gl_type == 'string':
+        feature = nodes.string_attrs[:, string_idx]
+        string_idx += 1
+      else:
+        raise ValueError('Unknown attr type %s' % attr_gl_type)
+      feature = feature.astype(attr_np_type)
+      features.append(feature)
+    return features, nodes.indices
+
+
+class NegativeSampler(BaseSampler):
+  """Negative Sampler.
+
+  Weighted random sampling items not in batch.
+
+  Args:
+    data_path: item feature data path. id:int64 | weight:float | attrs:string.
+    fields: item input fields.
+    num_sample: number of negative samples.
+    batch_size: mini-batch size.
+    attr_delimiter: delimiter of feature string.
+    num_eval_sample: number of negative samples for evaluator.
+  """
+
+  def __init__(self,
+               data_path,
+               fields,
+               num_sample,
+               batch_size,
+               attr_delimiter=':',
+               num_eval_sample=None):
+    super(NegativeSampler, self).__init__(fields, num_sample, num_eval_sample)
+    self._batch_size = batch_size
+    self._g = gl.Graph().node(
+        tf.compat.as_str(data_path),
+        node_type='item',
+        decoder=gl.Decoder(
+            attr_types=self._attr_gl_types,
+            weighted=True,
+            attr_delimiter=attr_delimiter))
+    self._init_graph()
+
+    expand_factor = int(math.ceil(self._num_sample / batch_size))
+    self._sampler = self._g.negative_sampler(
+        'item', expand_factor, strategy='node_weight')
+
+  def _get_impl(self, ids):
+    # assert len(ids) == self._batch_size
+    # tf.logging.info("ids: %s", len(ids))
+    ids = np.array(ids, dtype=np.int64)
+    nodes = self._sampler.get(ids)
+    features = self._parse_nodes(nodes)
+    return features
+
+  def get(self, ids):
+    """Sampling method.
+
+    Args:
+      ids: item id tensor.
+
+    Returns:
+      Negative sampled feature dict.
+    """
+    sampled_values = tf.py_func(self._get_impl, [ids], self._attr_tf_types)
+    result_dict = {}
+    for k, t, v in zip(self._attr_names, self._attr_tf_types, sampled_values):
+      if t == tf.string:
+        # string convert from np array to tensor will be padded with \000, we need remove it
+        v = tf.regex_replace(v, '\000', '')
+      v.set_shape([self._num_sample])
+      result_dict[k] = v
+    return result_dict
+
+
+class NegativeSamplerV2(BaseSampler):
+  """Negative Sampler V2.
+
+  Weighted random sampling items which do not have positive edge with the user.
+
+  Args:
+    user_data_path: user node data path. id:int64 | weight:float.
+    item_data_path: item feature data path. id:int64 | weight:float | attrs:string.
+    edge_data_path: positive edge data path. userid:int64 | itemid:int64 | weight:float
+    fields: item input fields.
+    num_sample: number of negative samples.
+    batch_size: mini-batch size.
+    attr_delimiter: delimiter of feature string.
+    num_eval_sample: number of negative samples for evaluator.
+  """
+
+  def __init__(self,
+               user_data_path,
+               item_data_path,
+               edge_data_path,
+               fields,
+               num_sample,
+               batch_size,
+               attr_delimiter=':',
+               num_eval_sample=None):
+    super(NegativeSamplerV2, self).__init__(fields, num_sample, num_eval_sample)
+    self._batch_size = batch_size
+    self._g = gl.Graph() \
+        .node(tf.compat.as_str(user_data_path),
+              node_type='user',
+              decoder=gl.Decoder(weighted=True)) \
+        .node(tf.compat.as_str(item_data_path),
+              node_type='item',
+              decoder=gl.Decoder(
+                  attr_types=self._attr_gl_types,
+                  weighted=True,
+                  attr_delimiter=attr_delimiter)) \
+        .edge(tf.compat.as_str(edge_data_path),
+              edge_type=('user', 'item', 'edge'),
+              decoder=gl.Decoder(weighted=True))
+    self._init_graph()
+
+    expand_factor = int(math.ceil(self._num_sample / batch_size))
+    self._sampler = self._g.negative_sampler(
+        'edge', expand_factor, strategy='random', conditional=True)
+
+  def _get_impl(self, src_ids, dst_ids):
+    src_ids = np.array(src_ids, dtype=np.int64)
+    dst_ids = np.array(dst_ids, dtype=np.int64)
+    nodes = self._sampler.get(src_ids, dst_ids)
+    features = self._parse_nodes(nodes)
+    return features
+
+  def get(self, src_ids, dst_ids):
+    """Sampling method.
+
+    Args:
+      src_ids: user id tensor.
+      dst_ids: item id tensor.
+
+    Returns:
+      Negative sampled feature dict.
+    """
+    sampled_values = tf.py_func(self._get_impl, [src_ids, dst_ids],
+                                self._attr_tf_types)
+    result_dict = {}
+    for k, t, v in zip(self._attr_names, self._attr_tf_types, sampled_values):
+      if t == tf.string:
+        # string convert from np array to tensor will be padded with \000, we need remove it
+        v = tf.regex_replace(v, '\000', '')
+      v.set_shape([self._num_sample])
+      result_dict[k] = v
+    return result_dict
+
+
+class HardNegativeSampler(BaseSampler):
+  """HardNegativeSampler.
+
+  Weighted random sampling items not in batch as negative samples, and sampling
+  destination nodes in hard_neg_edge as hard negative samples
+
+  Args:
+    user_data_path: user node data path. id:int64 | weight:float.
+    item_data_path: item feature data path. id:int64 | weight:float | attrs:string.
+    hard_neg_edge_data_path: hard negative edge data path. userid:int64 | itemid:int64 | weight:float
+    fields: item input fields.
+    num_sample: number of negative samples.
+    num_hard_sample: maximum number of hard negative samples.
+    batch_size: mini-batch size.
+    attr_delimiter: delimiter of feature string.
+    num_eval_sample: number of negative samples for evaluator.
+  """
+
+  def __init__(self,
+               user_data_path,
+               item_data_path,
+               hard_neg_edge_data_path,
+               fields,
+               num_sample,
+               num_hard_sample,
+               batch_size,
+               attr_delimiter=':',
+               num_eval_sample=None):
+    super(HardNegativeSampler, self).__init__(fields, num_sample,
+                                              num_eval_sample)
+    self._batch_size = batch_size
+    self._g = gl.Graph() \
+        .node(tf.compat.as_str(user_data_path),
+              node_type='user',
+              decoder=gl.Decoder(weighted=True)) \
+        .node(tf.compat.as_str(item_data_path),
+              node_type='item',
+              decoder=gl.Decoder(
+                  attr_types=self._attr_gl_types,
+                  weighted=True,
+                  attr_delimiter=attr_delimiter)) \
+        .edge(tf.compat.as_str(hard_neg_edge_data_path),
+              edge_type=('user', 'item', 'hard_neg_edge'),
+              decoder=gl.Decoder(weighted=True))
+    self._init_graph()
+
+    expand_factor = int(math.ceil(self._num_sample / batch_size))
+    self._neg_sampler = self._g.negative_sampler(
+        'item', expand_factor, strategy='node_weight')
+    self._hard_neg_sampler = self._g.neighbor_sampler(['hard_neg_edge'],
+                                                      num_hard_sample,
+                                                      strategy='full')
+
+  def _get_impl(self, src_ids, dst_ids):
+    src_ids = np.array(src_ids, dtype=np.int64)
+    dst_ids = np.array(dst_ids, dtype=np.int64)
+    nodes = self._neg_sampler.get(dst_ids)
+    neg_features = self._parse_nodes(nodes)
+    sparse_nodes = self._hard_neg_sampler.get(src_ids).layer_nodes(1)
+    hard_neg_features, hard_neg_indices = self._parse_sparse_nodes(sparse_nodes)
+
+    results = []
+    for i, v in enumerate(hard_neg_features):
+      results.append(np.concatenate([neg_features[i], v], axis=-1))
+    results.append(hard_neg_indices)
+    return results
+
+  def get(self, src_ids, dst_ids):
+    """Sampling method.
+
+    Args:
+      src_ids: user id tensor.
+      dst_ids: item id tensor.
+
+    Returns:
+      Sampled feature dict. The first batch_size is negative samples, remainder is hard negative samples
+    """
+    output_types = self._attr_tf_types + [tf.int64]
+    output_values = tf.py_func(self._get_impl, [src_ids, dst_ids], output_types)
+    result_dict = {}
+    for k, t, v in zip(self._attr_names, self._attr_tf_types,
+                       output_values[:-1]):
+      if t == tf.string:
+        # string convert from np array to tensor will be padded with \000, we need remove it
+        v = tf.regex_replace(v, '\000', '')
+      v.set_shape([None])
+      result_dict[k] = v
+
+    hard_neg_indices = output_values[-1]
+    hard_neg_indices.set_shape([None, 2])
+    result_dict['hard_neg_indices'] = hard_neg_indices
+    return result_dict
+
+
+class HardNegativeSamplerV2(BaseSampler):
+  """HardNegativeSampler.
+
+  Weighted random sampling items which  do not have positive edge with the user., and sampling
+  destination nodes in hard_neg_edge as hard negative samples
+
+  Args:
+    user_data_path: user node data path. id:int64 | weight:float.
+    item_data_path: item feature data path. id:int64 | weight:float | attrs:string.
+    edge_data_path: positive  edge data path. userid:int64 | itemid:int64 | weight:float
+    hard_neg_edge_data_path: hard negative edge data path. userid:int64 | itemid:int64 | weight:float
+    fields: item input fields.
+    num_sample: number of negative samples.
+    num_hard_sample: maximum number of hard negative samples.
+    batch_size: mini-batch size.
+    attr_delimiter: delimiter of feature string.
+    num_eval_sample: number of negative samples for evaluator.
+  """
+
+  def __init__(self,
+               user_data_path,
+               item_data_path,
+               edge_data_path,
+               hard_neg_edge_data_path,
+               fields,
+               num_sample,
+               num_hard_sample,
+               batch_size,
+               attr_delimiter=':',
+               num_eval_sample=None):
+    super(HardNegativeSamplerV2, self).__init__(fields, num_sample,
+                                                num_eval_sample)
+    self._batch_size = batch_size
+    self._g = gl.Graph() \
+        .node(tf.compat.as_str(user_data_path),
+              node_type='user',
+              decoder=gl.Decoder(weighted=True)) \
+        .node(tf.compat.as_str(item_data_path),
+              node_type='item',
+              decoder=gl.Decoder(
+                  attr_types=self._attr_gl_types,
+                  weighted=True,
+                  attr_delimiter=attr_delimiter)) \
+        .edge(tf.compat.as_str(edge_data_path),
+              edge_type=('user', 'item', 'edge'),
+              decoder=gl.Decoder(weighted=True))  \
+        .edge(tf.compat.as_str(hard_neg_edge_data_path),
+              edge_type=('user', 'item', 'hard_neg_edge'),
+              decoder=gl.Decoder(weighted=True))
+    self._init_graph()
+
+    expand_factor = int(math.ceil(self._num_sample / batch_size))
+    self._neg_sampler = self._g.negative_sampler(
+        'edge', expand_factor, strategy='random', conditional=True)
+    self._hard_neg_sampler = self._g.neighbor_sampler(['hard_neg_edge'],
+                                                      num_hard_sample,
+                                                      strategy='full')
+
+  def _get_impl(self, src_ids, dst_ids):
+    src_ids = np.array(src_ids, dtype=np.int64)
+    dst_ids = np.array(dst_ids, dtype=np.int64)
+    nodes = self._neg_sampler.get(src_ids, dst_ids)
+    neg_features = self._parse_nodes(nodes)
+    sparse_nodes = self._hard_neg_sampler.get(src_ids).layer_nodes(1)
+    hard_neg_features, hard_neg_indices = self._parse_sparse_nodes(sparse_nodes)
+
+    results = []
+    for i, v in enumerate(hard_neg_features):
+      results.append(np.concatenate([neg_features[i], v], axis=-1))
+    results.append(hard_neg_indices)
+    return results
+
+  def get(self, src_ids, dst_ids):
+    """Sampling method.
+
+    Args:
+      src_ids: user id tensor.
+      dst_ids: item id tensor.
+
+    Returns:
+      Sampled feature dict. The first batch_size is negative samples, remainder is hard negative samples
+    """
+    output_types = self._attr_tf_types + [tf.int64]
+    output_values = tf.py_func(self._get_impl, [src_ids, dst_ids], output_types)
+    result_dict = {}
+    for k, t, v in zip(self._attr_names, self._attr_tf_types,
+                       output_values[:-1]):
+      if t == tf.string:
+        # string convert from np array to tensor will be padded with \000, we need remove it
+        v = tf.regex_replace(v, '\000', '')
+      v.set_shape([None])
+      result_dict[k] = v
+
+    hard_neg_indices = output_values[-1]
+    hard_neg_indices.set_shape([None, 2])
+    result_dict['hard_neg_indices'] = hard_neg_indices
+    return result_dict
+
+
+def build(data_config):
+  if not data_config.HasField('sampler'):
+    return None
+  sampler_type = data_config.WhichOneof('sampler')
+  sampler_config = getattr(data_config, sampler_type)
+  if sampler_type == 'negative_sampler':
+    input_fields = {f.input_name: f for f in data_config.input_fields}
+    attr_fields = [input_fields[name] for name in sampler_config.attr_fields]
+    return NegativeSampler.instance(
+        data_path=sampler_config.input_path,
+        fields=attr_fields,
+        num_sample=sampler_config.num_sample,
+        batch_size=data_config.batch_size,
+        attr_delimiter=sampler_config.attr_delimiter,
+        num_eval_sample=sampler_config.num_eval_sample)
+  elif sampler_type == 'negative_sampler_v2':
+    input_fields = {f.input_name: f for f in data_config.input_fields}
+    attr_fields = [input_fields[name] for name in sampler_config.attr_fields]
+    return NegativeSamplerV2.instance(
+        user_data_path=sampler_config.user_input_path,
+        item_data_path=sampler_config.item_input_path,
+        edge_data_path=sampler_config.pos_edge_input_path,
+        fields=attr_fields,
+        num_sample=sampler_config.num_sample,
+        batch_size=data_config.batch_size,
+        attr_delimiter=sampler_config.attr_delimiter,
+        num_eval_sample=sampler_config.num_eval_sample)
+  elif sampler_type == 'hard_negative_sampler':
+    input_fields = {f.input_name: f for f in data_config.input_fields}
+    attr_fields = [input_fields[name] for name in sampler_config.attr_fields]
+    return HardNegativeSampler.instance(
+        user_data_path=sampler_config.user_input_path,
+        item_data_path=sampler_config.item_input_path,
+        hard_neg_edge_data_path=sampler_config.hard_neg_edge_input_path,
+        fields=attr_fields,
+        num_sample=sampler_config.num_sample,
+        num_hard_sample=sampler_config.num_hard_sample,
+        batch_size=data_config.batch_size,
+        attr_delimiter=sampler_config.attr_delimiter,
+        num_eval_sample=sampler_config.num_eval_sample)
+  elif sampler_type == 'hard_negative_sampler_v2':
+    input_fields = {f.input_name: f for f in data_config.input_fields}
+    attr_fields = [input_fields[name] for name in sampler_config.attr_fields]
+    return HardNegativeSamplerV2.instance(
+        user_data_path=sampler_config.user_input_path,
+        item_data_path=sampler_config.item_input_path,
+        edge_data_path=sampler_config.pos_edge_input_path,
+        hard_neg_edge_data_path=sampler_config.hard_neg_edge_input_path,
+        fields=attr_fields,
+        num_sample=sampler_config.num_sample,
+        num_hard_sample=sampler_config.num_hard_sample,
+        batch_size=data_config.batch_size,
+        attr_delimiter=sampler_config.attr_delimiter,
+        num_eval_sample=sampler_config.num_eval_sample)
+  else:
+    raise ValueError('Unknown sampler %s' % sampler_type)
diff --git a/easy_rec/python/eval.py b/easy_rec/python/eval.py
index 5093ace64..920b3b001 100644
--- a/easy_rec/python/eval.py
+++ b/easy_rec/python/eval.py
@@ -2,9 +2,11 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 import logging
+import os
 
 import six
 import tensorflow as tf
+from tensorflow.python.lib.io import file_io
 
 from easy_rec.python.main import evaluate
 
@@ -22,15 +24,29 @@
     'checkpoint_path', None, 'checkpoint to be evaled '
     ' if not specified, use the latest checkpoint in '
     'train_config.model_dir')
-tf.app.flags.DEFINE_string(
+tf.app.flags.DEFINE_multi_string(
     'eval_input_path', None, 'eval data path, if specified will '
     'override pipeline_config.eval_input_path')
-tf.app.flags.mark_flag_as_required('pipeline_config_path')
+tf.app.flags.DEFINE_string('model_dir', None, help='will update the model_dir')
+tf.app.flags.DEFINE_string('odps_config', None, help='odps config path')
 FLAGS = tf.app.flags.FLAGS
 
 
 def main(argv):
-  eval_result = evaluate(FLAGS.pipeline_config_path, FLAGS.checkpoint_path,
+  if FLAGS.odps_config:
+    os.environ['ODPS_CONFIG_FILE_PATH'] = FLAGS.odps_config
+
+  assert FLAGS.model_dir or FLAGS.pipeline_config_path, 'At least one of model_dir and pipeline_config_path exists.'
+  if FLAGS.model_dir:
+    pipeline_config_path = os.path.join(FLAGS.model_dir, 'pipeline.config')
+    if file_io.file_exists(pipeline_config_path):
+      logging.info('update pipeline_config_path to %s' % pipeline_config_path)
+    else:
+      pipeline_config_path = FLAGS.pipeline_config_path
+  else:
+    pipeline_config_path = FLAGS.pipeline_config_path
+
+  eval_result = evaluate(pipeline_config_path, FLAGS.checkpoint_path,
                          FLAGS.eval_input_path)
   for key in sorted(eval_result):
     # skip logging binary data
diff --git a/easy_rec/python/export.py b/easy_rec/python/export.py
index e4c1f2810..bd4842f8b 100644
--- a/easy_rec/python/export.py
+++ b/easy_rec/python/export.py
@@ -1,8 +1,10 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import logging
+import os
 
 import tensorflow as tf
+from tensorflow.python.lib.io import file_io
 
 from easy_rec.python.main import export
 
@@ -18,14 +20,58 @@
 tf.app.flags.DEFINE_string('checkpoint_path', '', 'checkpoint to be exported')
 tf.app.flags.DEFINE_string('export_dir', None,
                            'directory where model should be exported to')
+tf.app.flags.DEFINE_string('redis_url', None, 'export to redis url, host:port')
+tf.app.flags.DEFINE_string('redis_passwd', None, 'export to redis passwd')
+tf.app.flags.DEFINE_integer('redis_threads', 0, 'export to redis threads')
+tf.app.flags.DEFINE_integer('redis_batch_size', 256,
+                            'export to redis batch_size')
+tf.app.flags.DEFINE_integer('redis_timeout', 600,
+                            'export to redis time_out in seconds')
+tf.app.flags.DEFINE_integer('redis_expire', 24,
+                            'export to redis expire time in hour')
+tf.app.flags.DEFINE_string('redis_embedding_version', '',
+                           'redis embedding version')
+tf.app.flags.DEFINE_integer('redis_write_kv', 1,
+                            'whether to write embedding to redis')
+tf.app.flags.DEFINE_string('asset_files', '', 'more files to add to asset')
+tf.app.flags.DEFINE_bool('verbose', False, 'print more debug information')
 
-tf.app.flags.mark_flag_as_required('pipeline_config_path')
+tf.app.flags.DEFINE_string('model_dir', None, help='will update the model_dir')
 tf.app.flags.mark_flag_as_required('export_dir')
 FLAGS = tf.app.flags.FLAGS
 
 
 def main(argv):
-  export(FLAGS.export_dir, FLAGS.pipeline_config_path, FLAGS.checkpoint_path)
+  redis_params = {}
+  if FLAGS.redis_url:
+    redis_params['redis_url'] = FLAGS.redis_url
+  if FLAGS.redis_passwd:
+    redis_params['redis_passwd'] = FLAGS.redis_passwd
+  if FLAGS.redis_threads > 0:
+    redis_params['redis_threads'] = FLAGS.redis_threads
+  if FLAGS.redis_batch_size > 0:
+    redis_params['redis_batch_size'] = FLAGS.redis_batch_size
+  if FLAGS.redis_expire > 0:
+    redis_params['redis_expire'] = FLAGS.redis_expire
+  if FLAGS.redis_embedding_version:
+    redis_params['redis_embedding_version'] = FLAGS.redis_embedding_version
+  if FLAGS.redis_write_kv == 0:
+    redis_params['redis_write_kv'] = False
+  else:
+    redis_params['redis_write_kv'] = True
+
+  assert FLAGS.model_dir or FLAGS.pipeline_config_path, 'At least one of model_dir and pipeline_config_path exists.'
+  if FLAGS.model_dir:
+    pipeline_config_path = os.path.join(FLAGS.model_dir, 'pipeline.config')
+    if file_io.file_exists(pipeline_config_path):
+      logging.info('update pipeline_config_path to %s' % pipeline_config_path)
+    else:
+      pipeline_config_path = FLAGS.pipeline_config_path
+  else:
+    pipeline_config_path = FLAGS.pipeline_config_path
+
+  export(FLAGS.export_dir, pipeline_config_path, FLAGS.checkpoint_path,
+         FLAGS.asset_files, FLAGS.verbose, **redis_params)
 
 
 if __name__ == '__main__':
diff --git a/easy_rec/python/feature_column/feature_column.py b/easy_rec/python/feature_column/feature_column.py
index 9e2be6468..4ce335fd4 100644
--- a/easy_rec/python/feature_column/feature_column.py
+++ b/easy_rec/python/feature_column/feature_column.py
@@ -13,6 +13,7 @@
 
 if tf.__version__ >= '2.0':
   min_max_variable_partitioner = tf.compat.v1.min_max_variable_partitioner
+  tf = tf.compat.v1
 else:
   min_max_variable_partitioner = tf.min_max_variable_partitioner
 
@@ -47,6 +48,7 @@ def __init__(self,
     self._wide_deep_dict = wide_deep_dict
     self._deep_columns = {}
     self._wide_columns = {}
+    self._sequence_columns = {}
 
     self._share_embed_names = {}
     self._share_embed_infos = {}
@@ -166,6 +168,10 @@ def wide_columns(self):
   def deep_columns(self):
     return self._deep_columns
 
+  @property
+  def sequence_columns(self):
+    return self._sequence_columns
+
   def is_wide(self, config):
     if config.HasField('feature_name'):
       feature_name = config.feature_name
@@ -245,6 +251,17 @@ def parse_tag_feature(self, config):
     if config.HasField('hash_bucket_size'):
       tag_fc = feature_column.categorical_column_with_hash_bucket(
           config.input_names[0], hash_bucket_size, dtype=tf.string)
+    elif config.vocab_list:
+      tag_fc = feature_column.categorical_column_with_vocabulary_list(
+          config.input_names[0],
+          default_value=0,
+          vocabulary_list=config.vocab_list)
+    elif config.vocab_file:
+      tag_fc = feature_column.categorical_column_with_vocabulary_file(
+          config.input_names[0],
+          default_value=0,
+          vocabulary_file=config.vocab_file,
+          vocabulary_size=self._get_vocab_size(config.vocab_file))
     else:
       tag_fc = feature_column.categorical_column_with_identity(
           config.input_names[0], config.num_buckets, default_value=0)
@@ -252,6 +269,10 @@ def parse_tag_feature(self, config):
     if len(config.input_names) > 1:
       tag_fc = feature_column.weighted_categorical_column(
           tag_fc, weight_feature_key=config.input_names[1], dtype=tf.float32)
+    elif config.HasField('kv_separator'):
+      wgt_name = config.input_names[0] + '_WEIGHT'
+      tag_fc = feature_column.weighted_categorical_column(
+          tag_fc, weight_feature_key=wgt_name, dtype=tf.float32)
 
     if self.is_wide(config):
       self._add_wide_embedding_column(tag_fc, config)
@@ -268,7 +289,8 @@ def parse_raw_feature(self, config):
     """
     feature_name = config.feature_name if config.HasField('feature_name') \
         else config.input_names[0]
-    fc = feature_column.numeric_column(config.input_names[0])
+    fc = feature_column.numeric_column(
+        config.input_names[0], shape=(config.raw_input_dim,))
 
     bounds = None
     if config.boundaries:
@@ -290,7 +312,9 @@ def parse_raw_feature(self, config):
         self._add_deep_embedding_column(fc, config)
     else:
       tmp_id_col = feature_column.categorical_column_with_identity(
-          config.input_names[0] + '_raw_proj_id', 1, default_value=0)
+          config.input_names[0] + '_raw_proj_id',
+          config.raw_input_dim,
+          default_value=0)
       wgt_fc = feature_column.weighted_categorical_column(
           tmp_id_col,
           weight_feature_key=config.input_names[0] + '_raw_proj_val',
@@ -344,10 +368,24 @@ def parse_sequence_feature(self, config):
     """
     feature_name = config.feature_name if config.HasField('feature_name') \
         else config.input_names[0]
-    assert config.HasField('hash_bucket_size')
-    hash_bucket_size = config.hash_bucket_size
-    fc = sequence_feature_column.sequence_categorical_column_with_hash_bucket(
-        feature_name, hash_bucket_size, dtype=tf.string)
+    if config.HasField('hash_bucket_size'):
+      hash_bucket_size = config.hash_bucket_size
+      fc = sequence_feature_column.sequence_categorical_column_with_hash_bucket(
+          config.input_names[0], hash_bucket_size, dtype=tf.string)
+    elif config.vocab_list:
+      fc = sequence_feature_column.sequence_categorical_column_with_vocabulary_list(
+          config.input_names[0],
+          default_value=0,
+          vocabulary_list=config.vocab_list)
+    elif config.vocab_file:
+      fc = sequence_feature_column.sequence_categorical_column_with_vocabulary_file(
+          config.input_names[0],
+          default_value=0,
+          vocabulary_file=config.vocab_file,
+          vocabulary_size=self._get_vocab_size(config.vocab_file))
+    else:
+      fc = sequence_feature_column.sequence_categorical_column_with_identity(
+          config.input_names[0], config.num_buckets, default_value=0)
 
     assert config.embedding_dim > 0
     initializer = None
@@ -359,11 +397,18 @@ def parse_sequence_feature(self, config):
         combiner=config.combiner,
         initializer=initializer,
         partitioner=self._build_partitioner(config.max_partitions))
+    fc.sequence_combiner = config.sequence_combiner if config.HasField(
+        'sequence_combiner') else None
     self._deep_columns[feature_name] = fc
+    self._sequence_columns[feature_name] = fc
 
   def _build_partitioner(self, max_partitions):
     if max_partitions > 1:
-      return min_max_variable_partitioner(max_partitions=max_partitions)
+      if self._use_embedding_variable:
+        # pai embedding_variable should use fixed_size_partitioner
+        return tf.fixed_size_partitioner(num_shards=max_partitions)
+      else:
+        return min_max_variable_partitioner(max_partitions=max_partitions)
     else:
       return None
 
diff --git a/easy_rec/python/feature_column/feature_group.py b/easy_rec/python/feature_column/feature_group.py
index 2cac52db3..b04a635ad 100644
--- a/easy_rec/python/feature_column/feature_group.py
+++ b/easy_rec/python/feature_column/feature_group.py
@@ -30,9 +30,18 @@ def feature_names(self):
     return self._config.feature_names
 
   def select_columns(self, fc):
-    columns = fc.wide_columns if self._config.wide_deep == WideOrDeep.WIDE \
-        else fc.deep_columns
-    return [columns[x] for x in self._config.feature_names]
+    if self._config.wide_deep == WideOrDeep.WIDE:
+      wide_columns = [fc.wide_columns[x] for x in self._config.feature_names]
+      return wide_columns, []
+    else:
+      sequence_columns = []
+      deep_columns = []
+      for x in self._config.feature_names:
+        if x in fc.sequence_columns:
+          sequence_columns.append(fc.sequence_columns[x])
+        else:
+          deep_columns.append(fc.deep_columns[x])
+      return deep_columns, sequence_columns
 
   def _auto_expand_feature_name(self):
     features = [x for x in self._config.feature_names]
diff --git a/easy_rec/python/hpo/generate_hpo_sql.py b/easy_rec/python/hpo/generate_hpo_sql.py
index a38ca4249..ee8262050 100644
--- a/easy_rec/python/hpo/generate_hpo_sql.py
+++ b/easy_rec/python/hpo/generate_hpo_sql.py
@@ -12,6 +12,10 @@
       '--config_path', type=str, help='config path', default=None)
   parser.add_argument(
       '--tables', type=str, help='train_table and test_table', default=None)
+  parser.add_argument(
+      '--train_tables', type=str, help='train_tables', default=None)
+  parser.add_argument(
+      '--eval_tables', type=str, help='eval_tables', default=None)
   parser.add_argument(
       '--cluster',
       type=str,
@@ -33,15 +37,28 @@
       type=str,
       help='algorithm project name',
       default='algo_public')
+  parser.add_argument(
+      '--algo_res_proj', type=str, help='algo resource project', default=None)
+  parser.add_argument(
+      '--algo_version', type=str, help='algo version', default=None)
 
   args = parser.parse_args()
 
   with open(args.sql_path, 'w') as fout:
     fout.write('pai -name easy_rec_ext -project %s\n' % args.algo_proj_name)
-    fout.write('  -Dres_project=%s\n' % args.algo_proj_name)
+    if args.algo_res_proj:
+      fout.write('  -Dres_project=%s\n' % args.algo_res_proj)
+    else:
+      fout.write('  -Dres_project=%s\n' % args.algo_proj_name)
+    if args.algo_version:
+      fout.write('  -Dversion=%s\n' % args.algo_version)
     fout.write('  -Dconfig=%s\n' % args.config_path)
     fout.write('  -Dcmd=train\n')
-    fout.write('  -Dtables=%s\n' % args.tables)
+    if args.tables:
+      fout.write('  -Dtables=%s\n' % args.tables)
+    else:
+      fout.write('  -Dtrain_tables=%s\n' % args.train_tables)
+      fout.write('  -Deval_tables=%s\n' % args.eval_tables)
     fout.write('  -Dcluster=\'%s\'\n' % args.cluster)
     fout.write('  -Darn=%s\n' % args.role_arn)
     fout.write('  -Dbuckets=%s\n' % args.bucket)
diff --git a/easy_rec/python/hpo/pai_hpo.py b/easy_rec/python/hpo/pai_hpo.py
index c4d60e699..12db919ad 100644
--- a/easy_rec/python/hpo/pai_hpo.py
+++ b/easy_rec/python/hpo/pai_hpo.py
@@ -70,13 +70,10 @@ def get_tuner(data, max_parallel, max_trial_num):
   return tuner
 
 
-def hpo_config(config_path, hyperparams, environment, exp_dir, tables, cluster,
-               algo_proj_name, metric_name, odps_config_path):
-  earlystop = {
-      'type': 'large_is_better',
-      'threshold': 0.99,
-      'max_runtime': 2400
-  }
+def hpo_config(config_path, hyperparams, environment, exp_dir, tables,
+               train_tables, eval_tables, cluster, algo_proj_name,
+               algo_res_proj, algo_version, metric_name, odps_config_path):
+  earlystop = {'type': 'large_is_better', 'max_runtime': 3600 * 12}
   algorithm = {
       'type': 'gp',
       'initial_trials_num': 4,
@@ -109,25 +106,39 @@ def _add_prefix(table_name):
     else:
       return table_name
 
-  tables = [_add_prefix(x) for x in tables.split(',') if x != '']
-  tables = ','.join(tables)
-  logging.info('will tune on data: %s' % tables)
+  if tables:
+    tables = [_add_prefix(x) for x in tables.split(',') if x != '']
+    tables = ','.join(tables)
+    logging.info('will tune on data: %s' % tables)
+  else:
+    train_tables = [_add_prefix(x) for x in train_tables.split(',') if x != '']
+    train_tables = ','.join(train_tables)
+    eval_tables = [_add_prefix(x) for x in eval_tables.split(',') if x != '']
+    eval_tables = ','.join(eval_tables)
 
   sql_path = '%s/train_ext_hpo_{{ trial.id }}.sql' % tmp_dir
-  prepare_sql_task = {
-      'type':
-          'BashTask',
-      'cmd': [
-          'python', '-m', 'easy_rec.python.hpo.generate_hpo_sql', '--sql_path',
-          sql_path, '--config_path', config_path, '--tables', tables,
-          '--cluster', cluster, '--bucket', bucket, '--hpo_param_path',
-          os.path.join(bucket, param_path), '--hpo_metric_save_path',
-          os.path.join(bucket, metric_path), '--model_dir',
-          os.path.join(bucket, model_path), '--oss_host',
-          environment['oss_endpoint'], '--role_arn', environment['role_arn'],
-          '--algo_proj_name', algo_proj_name
-      ]
-  }
+  cmd_args = [
+      'python', '-m', 'easy_rec.python.hpo.generate_hpo_sql', '--sql_path',
+      sql_path, '--config_path', config_path, '--cluster', cluster, '--bucket',
+      bucket, '--hpo_param_path',
+      os.path.join(bucket, param_path), '--hpo_metric_save_path',
+      os.path.join(bucket, metric_path), '--model_dir',
+      os.path.join(bucket,
+                   model_path), '--oss_host', environment['oss_endpoint'],
+      '--role_arn', environment['role_arn'], '--algo_proj_name', algo_proj_name
+  ]
+
+  if tables:
+    cmd_args.extend(['--tables', tables])
+  if train_tables and eval_tables:
+    cmd_args.extend(
+        ['--train_tables', train_tables, '--eval_tables', eval_tables])
+
+  if algo_res_proj:
+    cmd_args.extend(['--algo_res_proj', algo_res_proj])
+  if algo_version:
+    cmd_args.extend(['--algo_version', algo_version])
+  prepare_sql_task = {'type': 'BashTask', 'cmd': cmd_args}
 
   train_task = {
       'type': 'BashTask',
@@ -167,6 +178,10 @@ def _add_prefix(table_name):
       '--config_path', type=str, help='pipeline config', default=None)
   parser.add_argument(
       '--tables', type=str, help='train table and test table', default=None)
+  parser.add_argument(
+      '--train_tables', type=str, help='train tables', default=None)
+  parser.add_argument(
+      '--eval_tables', type=str, help='eval tables', default=None)
   parser.add_argument(
       '--exp_dir', type=str, help='hpo experiment directory', default=None)
   parser.add_argument(
@@ -180,6 +195,10 @@ def _add_prefix(table_name):
       type=str,
       help='algo project name',
       default='algo_public')
+  parser.add_argument(
+      '--algo_version', type=str, help='algo version', default=None)
+  parser.add_argument(
+      '--algo_res_proj', type=str, help='algo resource project', default=None)
   parser.add_argument(
       '--metric_name', type=str, help='evaluate metric name', default='auc')
   parser.add_argument(
@@ -236,6 +255,7 @@ def _add_prefix(table_name):
 
   if args.bucket.startswith('oss://'):
     args.bucket = args.bucket[len('oss://'):]
+  args.bucket = args.bucket.strip('/')
 
   environment = {
       'access_id': odps_config['access_id'],
@@ -256,11 +276,14 @@ def _add_prefix(table_name):
 
   assert args.config_path is not None
   assert args.exp_dir is not None
-  assert args.tables is not None
+  assert args.tables is not None or (args.train_tables is not None and
+                                     args.eval_tables is not None)
 
   data, tmp_dir = hpo_config(args.config_path, hyperparams, environment,
-                             args.exp_dir, args.tables, args.cluster,
-                             args.algo_proj_name, args.metric_name,
+                             args.exp_dir, args.tables, args.train_tables,
+                             args.eval_tables, args.cluster,
+                             args.algo_proj_name, args.algo_res_proj,
+                             args.algo_version, args.metric_name,
                              args.odps_config)
   hpo_util.kill_old_proc(tmp_dir, platform='pai')
 
diff --git a/easy_rec/python/inference/predictor.py b/easy_rec/python/inference/predictor.py
index 2bbb34d21..9c2248c96 100644
--- a/easy_rec/python/inference/predictor.py
+++ b/easy_rec/python/inference/predictor.py
@@ -166,12 +166,23 @@ def _build_model(self):
           # each input_info is a tuple of input_id, name, data_type
           input_info = []
           if len(inputs.items()) > 1:
-            for name, tensor in inputs.items():
+            for gid, item in enumerate(inputs.items()):
+              name, tensor = item
               logging.info('Load input binding: %s -> %s' % (name, tensor.name))
               input_name = tensor.name
               input_name, _ = input_name.split(':')
-              input_id = input_name.split('_')[-1]
-              input_id = int(input_id)
+              try:
+                input_id = input_name.split('_')[-1]
+                input_id = int(input_id)
+              except Exception:
+                # support for models that are not exported by easy_rec
+                # in which case, the order of inputs may not be the
+                # same as they are defined, thereforce, list input
+                # could not be supported, only dict input could be supported
+                logging.warning(
+                    'could not determine input_id from input_name: %s' %
+                    input_name)
+                input_id = gid
               input_info.append((input_id, name, tensor.dtype))
               self._inputs_map[name] = self._graph.get_tensor_by_name(
                   tensor.name)
@@ -455,6 +466,9 @@ def batch(self, data_list):
         for key in data:
           batch_input[key].append(data[key])
       elif isinstance(data, list):
+        assert len(self._predictor_impl.input_names) == len(data), \
+            'input fields number incorrect, should be %d, but %d' \
+            % (len(self._predictor_impl.input_names), len(data))
         for key, v in zip(self._predictor_impl.input_names, data):
           if key != '':
             batch_input[key].append(v)
diff --git a/easy_rec/python/input/batch_tfrecord_input.py b/easy_rec/python/input/batch_tfrecord_input.py
new file mode 100644
index 000000000..620b91a59
--- /dev/null
+++ b/easy_rec/python/input/batch_tfrecord_input.py
@@ -0,0 +1,109 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+import tensorflow as tf
+
+from easy_rec.python.input.input import Input
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class BatchTFRecordInput(Input):
+  """BatchTFRecordInput use for batch read from tfrecord.
+
+  For example, there is a tfrecord which one feature(key)
+  correspond to n data(value).
+  batch_size needs to be a multiple of n.
+  """
+
+  def __init__(self,
+               data_config,
+               feature_config,
+               input_path,
+               task_index=0,
+               task_num=1):
+    super(BatchTFRecordInput, self).__init__(data_config, feature_config,
+                                             input_path, task_index, task_num)
+    assert data_config.HasField(
+        'n_data_batch_tfrecord'), 'Need to set n_data_batch_tfrecord in config.'
+    self._input_shapes = [x.input_shape for x in data_config.input_fields]
+    self.feature_desc = {}
+    for x, t, d, s in zip(self._input_fields, self._input_field_types,
+                          self._input_field_defaults, self._input_shapes):
+      d = self.get_type_defaults(t, d)
+      t = self.get_tf_type(t)
+      self.feature_desc[x] = tf.io.FixedLenSequenceFeature(
+          dtype=t, shape=s, allow_missing=True)
+
+  def _parse_tfrecord(self, example):
+    try:
+      _, features, _ = tf.parse_sequence_example(
+          example, sequence_features=self.feature_desc)
+    except AttributeError:
+      _, features, _ = tf.io.parse_sequence_example(
+          example, sequence_features=self.feature_desc)
+    # Below code will reduce one dimension when the data dimension > 2.
+    features = dict(
+        (key,
+         tf.reshape(value, [
+             -1,
+         ] + [x for i, x in enumerate(value.shape) if i not in (0, 1)])) for (
+             key, value) in features.items())
+    return features
+
+  def _build(self, mode, params):
+    file_paths = tf.gfile.Glob(self._input_path)
+    assert len(file_paths) > 0, 'match no files with %s' % self._input_path
+
+    num_parallel_calls = self._data_config.num_parallel_calls
+    data_compression_type = self._data_config.data_compression_type
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      logging.info('train files[%d]: %s' %
+                   (len(file_paths), ','.join(file_paths)))
+      dataset = tf.data.Dataset.from_tensor_slices(file_paths)
+      if self._data_config.shuffle:
+        # shuffle input files
+        dataset = dataset.shuffle(len(file_paths))
+      # too many readers read the same file will cause performance issues
+      # as the same data will be read multiple times
+      parallel_num = min(num_parallel_calls, len(file_paths))
+      dataset = dataset.interleave(
+          lambda x: tf.data.TFRecordDataset(
+              x, compression_type=data_compression_type),
+          cycle_length=parallel_num,
+          num_parallel_calls=parallel_num)
+      dataset = dataset.shard(self._task_num, self._task_index)
+      if self._data_config.shuffle:
+        dataset = dataset.shuffle(
+            self._data_config.shuffle_buffer_size,
+            seed=2020,
+            reshuffle_each_iteration=True)
+      dataset = dataset.repeat(self.num_epochs)
+    else:
+      logging.info('eval files[%d]: %s' %
+                   (len(file_paths), ','.join(file_paths)))
+      dataset = tf.data.TFRecordDataset(
+          file_paths, compression_type=data_compression_type)
+      dataset = dataset.repeat(1)
+
+    # We read n data from tfrecord one time.
+    cur_batch = self._data_config.batch_size // self._data_config.n_data_batch_tfrecord
+    cur_batch = max(1, cur_batch)
+    dataset = dataset.batch(cur_batch)
+    dataset = dataset.map(
+        self._parse_tfrecord, num_parallel_calls=num_parallel_calls)
+
+    dataset = dataset.prefetch(buffer_size=self._prefetch_size)
+    dataset = dataset.map(
+        map_func=self._preprocess, num_parallel_calls=num_parallel_calls)
+
+    dataset = dataset.prefetch(buffer_size=self._prefetch_size)
+
+    if mode != tf.estimator.ModeKeys.PREDICT:
+      dataset = dataset.map(lambda x:
+                            (self._get_features(x), self._get_labels(x)))
+    else:
+      dataset = dataset.map(lambda x: (self._get_features(x)))
+    return dataset
diff --git a/easy_rec/python/input/csv_input.py b/easy_rec/python/input/csv_input.py
index 4af3624bb..2f4a2a88c 100644
--- a/easy_rec/python/input/csv_input.py
+++ b/easy_rec/python/input/csv_input.py
@@ -7,7 +7,10 @@
 from easy_rec.python.input.input import Input
 
 if tf.__version__ >= '2.0':
+  ignore_errors = tf.data.experimental.ignore_errors()
   tf = tf.compat.v1
+else:
+  ignore_errors = tf.contrib.data.ignore_errors()
 
 
 class CSVInput(Input):
@@ -52,7 +55,9 @@ def _check_data(line):
     return inputs
 
   def _build(self, mode, params):
-    file_paths = tf.gfile.Glob(self._input_path)
+    file_paths = []
+    for x in self._input_path.split(','):
+      file_paths.extend(tf.gfile.Glob(x))
     assert len(file_paths) > 0, 'match no files with %s' % self._input_path
 
     num_parallel_calls = self._data_config.num_parallel_calls
@@ -70,7 +75,12 @@ def _build(self, mode, params):
           tf.data.TextLineDataset,
           cycle_length=parallel_num,
           num_parallel_calls=parallel_num)
-      dataset = dataset.shard(self._task_num, self._task_index)
+
+      if self._data_config.chief_redundant:
+        dataset = dataset.shard(
+            max(self._task_num - 1, 1), max(self._task_index - 1, 0))
+      else:
+        dataset = dataset.shard(self._task_num, self._task_index)
       if self._data_config.shuffle:
         dataset = dataset.shuffle(
             self._data_config.shuffle_buffer_size,
@@ -86,6 +96,8 @@ def _build(self, mode, params):
     dataset = dataset.batch(self._data_config.batch_size)
     dataset = dataset.map(
         self._parse_csv, num_parallel_calls=num_parallel_calls)
+    if self._data_config.ignore_error:
+      dataset = dataset.apply(ignore_errors)
     dataset = dataset.prefetch(buffer_size=self._prefetch_size)
     dataset = dataset.map(
         map_func=self._preprocess, num_parallel_calls=num_parallel_calls)
diff --git a/easy_rec/python/input/csv_input_ex.py b/easy_rec/python/input/csv_input_ex.py
new file mode 100644
index 000000000..3be5c0d46
--- /dev/null
+++ b/easy_rec/python/input/csv_input_ex.py
@@ -0,0 +1,68 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import numpy as np
+import tensorflow as tf
+
+from easy_rec.python.input.csv_input import CSVInput
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class CSVInputEx(CSVInput):
+
+  def __init__(self,
+               data_config,
+               feature_config,
+               input_path,
+               task_index=0,
+               task_num=1):
+    super(CSVInputEx, self).__init__(data_config, feature_config, input_path,
+                                     task_index, task_num)
+
+  def _parse_csv(self, line):
+    record_defaults = [
+        self.get_type_defaults(t, v)
+        for t, v in zip(self._input_field_types, self._input_field_defaults)
+    ]
+
+    def _check_data(line):
+      sep = self._data_config.separator
+      if type(sep) != type(str):
+        sep = sep.encode('utf-8')
+      field_num = len(line[0].split(sep))
+      assert field_num == len(record_defaults), \
+          'sep[%s] maybe invalid: field_num=%d, required_num=%d' % \
+          (sep, field_num, len(record_defaults))
+      return True
+
+    fields = tf.string_split(
+        line, self._data_config.separator, skip_empty=False)
+    tmp_fields = tf.reshape(fields.values, [-1, len(record_defaults)])
+    fields = []
+    for i in range(len(record_defaults)):
+      if type(record_defaults[i]) == int:
+        fields.append(
+            tf.string_to_number(
+                tmp_fields[:, i], tf.int64, name='field_as_int_%d' % i))
+      elif type(record_defaults[i]) in [float, np.float32, np.float64]:
+        fields.append(
+            tf.string_to_number(
+                tmp_fields[:, i], tf.float32, name='field_as_flt_%d' % i))
+      elif type(record_defaults[i]) in [str, type(u''), bytes]:
+        fields.append(tmp_fields[:, i])
+      elif type(record_defaults[i]) == bool:
+        fields.append(
+            tf.logical_or(
+                tf.equal(tmp_fields[:, i], 'True'),
+                tf.equal(tmp_fields[:, i], 'true')))
+      else:
+        assert 'invalid types: %s' % str(type(record_defaults[i]))
+
+    keep_ids = [
+        self._input_fields.index(x)
+        for x in self._label_fields + self._effective_fields
+    ]
+    inputs = {self._input_fields[x]: fields[x] for x in keep_ids}
+
+    return inputs
diff --git a/easy_rec/python/input/csv_input_v2.py b/easy_rec/python/input/csv_input_v2.py
index fd96f1f85..80f80734c 100644
--- a/easy_rec/python/input/csv_input_v2.py
+++ b/easy_rec/python/input/csv_input_v2.py
@@ -40,7 +40,11 @@ def _build(self, mode, params):
           sloppy=is_train)
 
     if mode == tf.estimator.ModeKeys.TRAIN:
-      dataset = dataset.shard(self._task_num, self._task_index)
+      if self._data_config.chief_redundant:
+        dataset = dataset.shard(
+            max(self._task_num - 1, 1), max(self._task_index - 1, 0))
+      else:
+        dataset = dataset.shard(self._task_num, self._task_index)
     else:
       dataset = dataset.repeat(1)
 
diff --git a/easy_rec/python/input/datahub_input.py b/easy_rec/python/input/datahub_input.py
new file mode 100644
index 000000000..d1c6af958
--- /dev/null
+++ b/easy_rec/python/input/datahub_input.py
@@ -0,0 +1,135 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+import pdb
+import sys
+import time
+import traceback
+
+import numpy as np
+import tensorflow as tf
+
+from easy_rec.python.input.input import Input
+from easy_rec.python.utils import odps_util
+
+try:
+  import common_io
+except Exception:
+  common_io = None
+try:
+  from datahub import DataHub
+  from datahub.exceptions import DatahubException
+  from datahub.models import FieldType, RecordSchema, TupleRecord, BlobRecord, CursorType, RecordType
+except Exception:
+  data_hub = None
+
+
+class DataHubInput(Input):
+  """Common IO based interface, could run at local or on data science."""
+
+  def __init__(self,
+               data_config,
+               feature_config,
+               datahub_config,
+               task_index=0,
+               task_num=1):
+    super(DataHubInput, self).__init__(data_config, feature_config, '',
+                                       task_index, task_num)
+    if DataHub is None:
+      logging.error('please install datahub: ',
+                    'pip install pydatahub ;Python 3.6 recommended')
+    try:
+      self._datahub_config = datahub_config
+      if self._datahub_config is None:
+        pass
+      self._datahub = DataHub(self._datahub_config.akId, \
+      self._datahub_config.akSecret, self._datahub_config.region)
+      self._num_epoch = 0
+    except:
+      pass
+
+  def _parse_record(self, *fields):
+    fields = list(fields)
+    inputs = {self._input_fields[x]: fields[x] for x in self._effective_fids}
+    for x in self._label_fids:
+      inputs[self._input_fields[x]] = fields[x]
+    return inputs
+
+  def _datahub_generator(self):
+    logging.info('start epoch[%d]' % self._num_epoch)
+    self._num_epoch += 1
+    odps_util.check_input_field_and_types(self._data_config)
+    record_defaults = [
+        self.get_type_defaults(x, v)
+        for x, v in zip(self._input_field_types, self._input_field_defaults)
+    ]
+    batch_defaults = [
+        np.array([x] * self._data_config.batch_size) for x in record_defaults
+    ]
+    try:
+      self._datahub.wait_shards_ready(self._datahub_config.project,
+                                      self._datahub_config.topic)
+      topic_result = self._datahub.get_topic(self._datahub_config.project,
+                                             self._datahub_config.topic)
+      if topic_result.record_type != RecordType.TUPLE:
+        logging.error('topic type illegal !')
+      record_schema = topic_result.record_schema
+      shard_result = self._datahub.list_shard(self._datahub_config.project,
+                                              self._datahub_config.topic)
+      shards = shard_result.shards
+      for shard in shards:
+        shard_id = shard._shard_id
+        cursor_result = self._datahub.get_cursor(self._datahub_config.project,
+                                                 self._datahub_config.topic,
+                                                 shard_id, CursorType.OLDEST)
+        cursor = cursor_result.cursor
+        limit = self._data_config.batch_size
+        while True:
+          get_result = self._datahub.get_tuple_records(self._datahub_config.project \
+  , self._datahub_config.topic, shard_id, record_schema, cursor, limit)
+          batch_data_np = [x.copy() for x in batch_defaults]
+          for row_id, record in enumerate(get_result.records):
+            for col_id in range(len(record_defaults)):
+              if record.values[col_id] not in ['', 'Null', None]:
+                batch_data_np[col_id][row_id] = record.values[col_id]
+          yield tuple(batch_data_np)
+          if 0 == get_result.record_count:
+            time.sleep(1)
+          cursor = get_result.next_cursor
+    except DatahubException as e:
+      logging.error(e)
+
+  def _build(self, mode, params):
+    # get input type
+    list_type = [self.get_tf_type(x) for x in self._input_field_types]
+    list_type = tuple(list_type)
+    list_shapes = [tf.TensorShape([None]) for x in range(0, len(list_type))]
+    list_shapes = tuple(list_shapes)
+    # read datahub
+    dataset = tf.data.Dataset.from_generator(
+        self._datahub_generator,
+        output_types=list_type,
+        output_shapes=list_shapes)
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      dataset = dataset.shuffle(
+          self._data_config.shuffle_buffer_size,
+          seed=2020,
+          reshuffle_each_iteration=True)
+      dataset = dataset.repeat(self.num_epochs)
+    else:
+      dataset = dataset.repeat(1)
+    dataset = dataset.map(
+        self._parse_record,
+        num_parallel_calls=self._data_config.num_parallel_calls)
+    # preprocess is necessary to transform data
+    # so that they could be feed into FeatureColumns
+    dataset = dataset.map(
+        map_func=self._preprocess,
+        num_parallel_calls=self._data_config.num_parallel_calls)
+    dataset = dataset.prefetch(buffer_size=self._prefetch_size)
+    if mode != tf.estimator.ModeKeys.PREDICT:
+      dataset = dataset.map(lambda x:
+                            (self._get_features(x), self._get_labels(x)))
+    else:
+      dataset = dataset.map(lambda x: (self._get_features(x)))
+    return dataset
diff --git a/easy_rec/python/input/dummy_input.py b/easy_rec/python/input/dummy_input.py
index 889807779..bc95be436 100644
--- a/easy_rec/python/input/dummy_input.py
+++ b/easy_rec/python/input/dummy_input.py
@@ -20,9 +20,11 @@ def __init__(self,
                feature_config,
                input_path,
                task_index=0,
-               task_num=1):
+               task_num=1,
+               input_vals={}):
     super(DummyInput, self).__init__(data_config, feature_config, input_path,
                                      task_index, task_num)
+    self._input_vals = input_vals
 
   def _build(self, mode, params):
     """Build fake constant input.
@@ -41,7 +43,10 @@ def _build(self, mode, params):
                                           self._input_field_defaults):
       tf_type = self.get_tf_type(field_type)
       def_val = self.get_type_defaults(field_type, default_val=def_val)
-      tensor = tf.constant([def_val] * self._batch_size, dtype=tf_type)
+      if field in self._input_vals:
+        tensor = self._input_vals[field]
+      else:
+        tensor = tf.constant([def_val] * self._batch_size, dtype=tf_type)
       features[field] = tensor
     parse_dict = self._preprocess(features)
     return self._get_features(parse_dict), self._get_labels(parse_dict)
diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py
index 41588302d..65b5136ab 100644
--- a/easy_rec/python/input/input.py
+++ b/easy_rec/python/input/input.py
@@ -1,6 +1,7 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import json
+import logging
 import os
 from abc import abstractmethod
 from collections import OrderedDict
@@ -10,8 +11,10 @@
 import tensorflow as tf
 
 import easy_rec
+from easy_rec.python.core import sampler as sampler_lib
 from easy_rec.python.protos.dataset_pb2 import DatasetConfig
 from easy_rec.python.utils import config_util
+from easy_rec.python.utils import constant
 from easy_rec.python.utils.load_class import get_register_class_meta
 
 if tf.__version__ >= '2.0':
@@ -31,6 +34,10 @@ def __init__(self,
                task_num=1):
     self._data_config = data_config
 
+    # tf.estimator.ModeKeys.*, only available before
+    # calling self._build
+    self._mode = None
+
     if self._data_config.auto_expand_input_fields:
       input_fields = [x for x in self._data_config.input_fields]
       while len(self._data_config.input_fields) > 0:
@@ -44,11 +51,17 @@ def __init__(self,
           self._data_config.input_fields.append(one_field)
 
     self._input_fields = [x.input_name for x in data_config.input_fields]
+    self._input_dims = [x.input_dim for x in data_config.input_fields]
     self._input_field_types = [x.input_type for x in data_config.input_fields]
     self._input_field_defaults = [
         x.default_val for x in data_config.input_fields
     ]
     self._label_fields = list(data_config.label_fields)
+    self._label_sep = list(data_config.label_sep)
+    self._label_dim = list(data_config.label_dim)
+    if len(self._label_dim) < len(self._label_fields):
+      for x in range(len(self._label_fields) - len(self._label_dim)):
+        self._label_dim.append(1)
 
     self._batch_size = data_config.batch_size
     self._prefetch_size = data_config.prefetch_size
@@ -60,20 +73,46 @@ def __init__(self,
 
     # findout effective fields
     self._effective_fields = []
+    self._multi_value_types = {}
+
     for fc in self._feature_configs:
       for input_name in fc.input_names:
         assert input_name in self._input_fields, 'invalid input_name in %s' % str(
             fc)
-        self._effective_fields.append(input_name)
+        if input_name not in self._effective_fields:
+          self._effective_fields.append(input_name)
+
+      if fc.feature_type in [fc.TagFeature, fc.SequenceFeature]:
+        if fc.hash_bucket_size > 0:
+          self._multi_value_types[fc.input_names[0]] = tf.string
+        else:
+          self._multi_value_types[fc.input_names[0]] = tf.int64
+        if len(fc.input_names) > 1:
+          self._multi_value_types[fc.input_names[1]] = tf.float32
+
+      if fc.feature_type == fc.RawFeature:
+        self._multi_value_types[fc.input_names[0]] = tf.float32
+
+    # add sample weight to effective fields
+    if self._data_config.HasField('sample_weight'):
+      self._effective_fields.append(self._data_config.sample_weight)
+
     self._effective_fids = [
         self._input_fields.index(x) for x in self._effective_fields
     ]
 
     self._label_fids = [self._input_fields.index(x) for x in self._label_fields]
 
-    # appended fields
+    # virtual fields generated by self._preprocess
+    # which will be inputs to feature columns
     self._appended_fields = []
 
+    # sampler
+    self._sampler = None
+    if input_path is not None:
+      # build sampler only when train and eval
+      self._sampler = sampler_lib.build(data_config)
+
   @property
   def num_epochs(self):
     if self._data_config.num_epochs > 0:
@@ -105,8 +144,10 @@ def get_type_defaults(self, field_type, default_val=''):
     assert field_type in type_defaults, 'invalid type: %s' % field_type
     if default_val == '':
       default_val = type_defaults[field_type]
-    if field_type in [DatasetConfig.INT32, DatasetConfig.INT64]:
+    if field_type == DatasetConfig.INT32:
       return int(default_val)
+    elif field_type == DatasetConfig.INT64:
+      return np.int64(default_val)
     elif field_type == DatasetConfig.STRING:
       return default_val
     elif field_type == DatasetConfig.BOOL:
@@ -118,36 +159,65 @@ def get_type_defaults(self, field_type, default_val=''):
 
     return type_defaults[field_type]
 
-  def create_multi_placeholders(self):
+  def create_multi_placeholders(self,
+                                placeholder_named_by_input,
+                                export_fields_name=None):
+    """Create multiply placeholders on export.
+
+    Args:
+      placeholder_named_by_input: If it is true, placeholder is named by the input feature,
+          otherwise the placeholder name if input_XX. Default: false.
+      export_fields_name: TagFeature / SeqFeature list that needs to be converted into
+          2D placeholders when exporting.
+    """
+    self._mode = tf.estimator.ModeKeys.PREDICT
+    effective_fids = list(self._effective_fids)
+    if self._data_config.HasField('sample_weight'):
+      effective_fids = effective_fids[:-1]
     inputs = {}
-    for fid in self._effective_fids:
-      ftype = self._input_field_types[fid]
-      tf_type = self.get_tf_type(ftype)
+    for fid in effective_fids:
       input_name = self._input_fields[fid]
-      finput = tf.placeholder(tf_type, [None], name='input_%d' % fid)
+      if placeholder_named_by_input:
+        placeholder_name = input_name
+      else:
+        placeholder_name = 'input_%d' % fid
+      if input_name in export_fields_name:
+        tf_type = self._multi_value_types[input_name]
+        logging.info('multi value input_name: %s, dtype: %s' %
+                     (input_name, tf_type))
+        finput = tf.placeholder(tf_type, [None, None], name=placeholder_name)
+      else:
+        ftype = self._input_field_types[fid]
+        tf_type = self.get_tf_type(ftype)
+        finput = tf.placeholder(tf_type, [None], name=placeholder_name)
       inputs[input_name] = finput
-    features = self._preprocess(inputs)
+    features = {x: inputs[x] for x in inputs}
+    features = self._preprocess(features)
     return inputs, features
 
   def create_placeholders(self):
+    self._mode = tf.estimator.ModeKeys.PREDICT
     inputs_placeholder = tf.placeholder(tf.string, [None], name='features')
     input_vals = tf.string_split(
         inputs_placeholder, self._data_config.separator,
         skip_empty=False).values
+    effective_fids = list(self._effective_fids)
+    if self._data_config.HasField('sample_weight'):
+      effective_fids = effective_fids[:-1]
     input_vals = tf.reshape(
-        input_vals, [-1, len(self._input_fields) - 1], name='input_reshape')
+        input_vals, [-1, len(effective_fids)], name='input_reshape')
     features = {}
-    for fid in self._effective_fids:
+    for tmp_id, fid in enumerate(effective_fids):
       ftype = self._input_field_types[fid]
       tf_type = self.get_tf_type(ftype)
       input_name = self._input_fields[fid]
       if tf_type in [tf.float32, tf.double, tf.int32, tf.int64]:
         features[input_name] = tf.string_to_number(
-            input_vals[:, fid - 1],
+            input_vals[:, tmp_id],
             tf_type,
             name='input_str_to_%s' % tf_type.name)
       else:
-        features[input_name] = input_vals[:, fid - 1]
+        features[input_name] = input_vals[:, tmp_id]
     features = self._preprocess(features)
     return {'features': inputs_placeholder}, features
 
@@ -183,10 +253,18 @@ def _get_features(self, fields):
     field_dict = {x: fields[x] for x in self._effective_fields if x in fields}
     for k in self._appended_fields:
       field_dict[k] = fields[k]
+    if constant.SAMPLE_WEIGHT in fields:
+      logging.info('will use field %s as sample weight' %
+                   self._data_config.sample_weight)
+      field_dict[constant.SAMPLE_WEIGHT] = fields[constant.SAMPLE_WEIGHT]
     return field_dict
 
   def _get_labels(self, fields):
-    return OrderedDict([(x, fields[x]) for x in self._label_fields])
+    return OrderedDict([
+        (x, tf.squeeze(fields[x], axis=1) if len(fields[x].get_shape()) == 2 and
+         fields[x].get_shape()[1] == 1 else fields[x])
+        for x in self._label_fields
+    ])
 
   def _preprocess(self, field_dict):
     """Preprocess the feature columns.
@@ -197,13 +275,35 @@ def _preprocess(self, field_dict):
 
     Args:
       field_dict: string to tensor, tensors are dense,
-          could be of shape [batch_size], or of shape []
+          could be of shape [batch_size], [batch_size, None], or of shape []
 
     Returns:
       output_dict: some of the tensors are transformed into sparse tensors,
           such as input tensors of tag features and lookup features
     """
     parsed_dict = {}
+
+    if self._sampler is not None:
+      sampler_type = self._data_config.WhichOneof('sampler')
+      sampler_config = getattr(self._data_config, sampler_type)
+      item_ids = field_dict[sampler_config.item_id_field]
+      if sampler_type == 'negative_sampler':
+        sampled = self._sampler.get(item_ids)
+      elif sampler_type == 'negative_sampler_v2':
+        user_ids = field_dict[sampler_config.user_id_field]
+        sampled = self._sampler.get(user_ids, item_ids)
+      elif sampler_type.startswith('hard_negative_sampler'):
+        user_ids = field_dict[sampler_config.user_id_field]
+        sampled = self._sampler.get(user_ids, item_ids)
+      else:
+        raise ValueError('Unknown sampler %s' % sampler_type)
+      for k, v in sampled.items():
+        if k in field_dict:
+          field_dict[k] = tf.concat([field_dict[k], v], axis=0)
+        else:
+          parsed_dict[k] = v
+          self._appended_fields.append(k)
+
     for fc in self._feature_configs:
       feature_name = fc.feature_name
       feature_type = fc.feature_type
@@ -211,33 +311,59 @@ def _preprocess(self, field_dict):
       if feature_type == fc.TagFeature:
         input_0 = fc.input_names[0]
         field = field_dict[input_0]
-        if len(field.get_shape()) == 0:
-          field = tf.expand_dims(field, axis=0)
-        parsed_dict[input_0] = tf.string_split(field, fc.separator)
-        if not fc.HasField('hash_bucket_size'):
-          vals = tf.string_to_number(
-              parsed_dict[input_0].values,
-              tf.int32,
-              name='tag_fea_%s' % input_0)
-          parsed_dict[input_0] = tf.sparse.SparseTensor(
-              parsed_dict[input_0].indices, vals,
-              parsed_dict[input_0].dense_shape)
-        if len(fc.input_names) > 1:
-          input_1 = fc.input_names[1]
-          field = field_dict[input_1]
+        # Construct the output of TagFeature according to the dimension of field_dict.
+        # When the input field exceeds 2 dimensions, convert TagFeature to 2D output.
+        if len(field.get_shape()) < 2 or field.get_shape()[-1] == 1:
           if len(field.get_shape()) == 0:
             field = tf.expand_dims(field, axis=0)
-          field = tf.string_split(field, fc.separator)
-          field_vals = tf.string_to_number(
-              field.values, tf.float32, name='tag_wgt_%s' % input_1)
-          assert_op = tf.assert_equal(
-              tf.shape(field_vals)[0],
-              tf.shape(parsed_dict[input_0].values)[0],
-              message='tag_feature_kv_size_not_eq')
-          with tf.control_dependencies([assert_op]):
-            field = tf.sparse.SparseTensor(field.indices, field_vals,
-                                           field.dense_shape)
-          parsed_dict[input_1] = field
+          elif len(field.get_shape()) == 2:
+            field = tf.squeeze(field, axis=-1)
+          parsed_dict[input_0] = tf.string_split(field, fc.separator)
+          if fc.HasField('kv_separator'):
+            indices = parsed_dict[input_0].indices
+            tmp_kvs = parsed_dict[input_0].values
+            tmp_kvs = tf.string_split(
+                tmp_kvs, fc.kv_separator, skip_empty=False)
+            tmp_kvs = tf.reshape(tmp_kvs.values, [-1, 2])
+            tmp_ks, tmp_vs = tmp_kvs[:, 0], tmp_kvs[:, 1]
+            tmp_vs = tf.string_to_number(
+                tmp_vs, tf.float32, name='kv_tag_wgt_str_2_flt_%s' % input_0)
+            parsed_dict[input_0] = tf.sparse.SparseTensor(
+                indices, tmp_ks, parsed_dict[input_0].dense_shape)
+            input_wgt = input_0 + '_WEIGHT'
+            parsed_dict[input_wgt] = tf.sparse.SparseTensor(
+                indices, tmp_vs, parsed_dict[input_0].dense_shape)
+            self._appended_fields.append(input_wgt)
+          if not fc.HasField('hash_bucket_size'):
+            vals = tf.string_to_number(
+                parsed_dict[input_0].values,
+                tf.int32,
+                name='tag_fea_%s' % input_0)
+            parsed_dict[input_0] = tf.sparse.SparseTensor(
+                parsed_dict[input_0].indices, vals,
+                parsed_dict[input_0].dense_shape)
+          if len(fc.input_names) > 1:
+            input_1 = fc.input_names[1]
+            field = field_dict[input_1]
+            if len(field.get_shape()) == 0:
+              field = tf.expand_dims(field, axis=0)
+            field = tf.string_split(field, fc.separator)
+            field_vals = tf.string_to_number(
+                field.values, tf.float32, name='tag_wgt_str_2_flt_%s' % input_1)
+            assert_op = tf.assert_equal(
+                tf.shape(field_vals)[0],
+                tf.shape(parsed_dict[input_0].values)[0],
+                message='tag_feature_kv_size_not_eq_%s' % input_0)
+            with tf.control_dependencies([assert_op]):
+              field = tf.sparse.SparseTensor(field.indices,
+                                             tf.identity(field_vals),
+                                             field.dense_shape)
+            parsed_dict[input_1] = field
+        else:
+          parsed_dict[input_0] = field_dict[input_0]
+          if len(fc.input_names) > 1:
+            input_1 = fc.input_names[1]
+            parsed_dict[input_1] = field_dict[input_1]
       elif feature_type == fc.LookupFeature:
         assert feature_name is not None and feature_name != ''
         assert len(fc.input_names) == 2
@@ -245,12 +371,50 @@ def _preprocess(self, field_dict):
       elif feature_type == fc.SequenceFeature:
         input_0 = fc.input_names[0]
         field = field_dict[input_0]
-        parsed_dict[input_0] = tf.string_split(field, fc.separator)
+        # Construct the output of SeqFeature according to the dimension of field_dict.
+        # When the input field exceeds 2 dimensions, convert SeqFeature to 2D output.
+        if len(field.get_shape()) < 2:
+          parsed_dict[input_0] = tf.strings.split(field, fc.separator)
+          if fc.HasField('seq_multi_sep'):
+            indices = parsed_dict[input_0].indices
+            values = parsed_dict[input_0].values
+            multi_vals = tf.string_split(values, fc.seq_multi_sep)
+            indices_1 = multi_vals.indices
+            indices = tf.gather(indices, indices_1[:, 0])
+            out_indices = tf.concat([indices, indices_1[:, 1:]], axis=1)
+            # 3 dimensional sparse tensor
+            out_shape = tf.concat(
+                [parsed_dict[input_0].dense_shape, multi_vals.dense_shape[1:]],
+                axis=0)
+            parsed_dict[input_0] = tf.sparse.SparseTensor(
+                out_indices, multi_vals.values, out_shape)
+          if fc.num_buckets > 0:
+            parsed_dict[input_0] = tf.sparse.SparseTensor(
+                parsed_dict[input_0].indices,
+                tf.string_to_number(
+                    parsed_dict[input_0].values,
+                    tf.int64,
+                    name='sequence_str_2_int_%s' % input_0),
+                parsed_dict[input_0].dense_shape)
+        else:
+          parsed_dict[input_0] = field
       elif feature_type == fc.RawFeature:
         input_0 = fc.input_names[0]
         if field_dict[input_0].dtype == tf.string:
-          parsed_dict[input_0] = tf.string_to_number(field_dict[input_0],
-                                                     tf.float32)
+          if fc.raw_input_dim > 1:
+            tmp_fea = tf.string_split(field_dict[input_0], fc.separator)
+            tmp_vals = tf.string_to_number(
+                tmp_fea.values,
+                tf.float32,
+                name='multi_raw_fea_to_flt_%s' % input_0)
+            parsed_dict[input_0] = tf.sparse_to_dense(
+                tmp_fea.indices,
+                [tf.shape(field_dict[input_0])[0], fc.raw_input_dim],
+                tmp_vals,
+                default_value=0)
+          else:
+            parsed_dict[input_0] = tf.string_to_number(field_dict[input_0],
+                                                       tf.float32)
         elif field_dict[input_0].dtype in [
             tf.int32, tf.int64, tf.double, tf.float32
         ]:
@@ -261,25 +425,30 @@ def _preprocess(self, field_dict):
         if fc.max_val > fc.min_val:
           parsed_dict[input_0] = (parsed_dict[input_0] - fc.min_val) /\
                                  (fc.max_val - fc.min_val)
-        if not fc.boundaries and fc.num_buckets <= 1:
+        if not fc.boundaries and fc.num_buckets <= 1 and \
+            self._data_config.sample_weight != input_0:
           # may need by wide model and deep model to project
           # raw values to a vector, it maybe better implemented
           # by a ProjectionColumn later
           sample_num = tf.to_int64(tf.shape(parsed_dict[input_0])[0])
           indices_0 = tf.range(sample_num, dtype=tf.int64)
-          indices_1 = tf.zeros([sample_num], dtype=tf.int64)
-          indices_0 = tf.expand_dims(indices_0, axis=1)
-          indices_1 = tf.expand_dims(indices_1, axis=1)
+          indices_1 = tf.range(fc.raw_input_dim, dtype=tf.int64)
+          indices_0 = indices_0[:, None]
+          indices_1 = indices_1[None, :]
+          indices_0 = tf.tile(indices_0, [1, fc.raw_input_dim])
+          indices_1 = tf.tile(indices_1, [sample_num, 1])
+          indices_0 = tf.reshape(indices_0, [-1, 1])
+          indices_1 = tf.reshape(indices_1, [-1, 1])
           indices = tf.concat([indices_0, indices_1], axis=1)
 
           parsed_dict[input_0 + '_raw_proj_id'] = tf.SparseTensor(
               indices=indices,
-              values=tf.zeros_like(parsed_dict[input_0], dtype=tf.int32),
-              dense_shape=[sample_num, 1])
+              values=indices_1[:, 0],
+              dense_shape=[sample_num, fc.raw_input_dim])
           parsed_dict[input_0 + '_raw_proj_val'] = tf.SparseTensor(
               indices=indices,
-              values=parsed_dict[input_0],
-              dense_shape=[sample_num, 1])
+              values=tf.reshape(parsed_dict[input_0], [-1]),
+              dense_shape=[sample_num, fc.raw_input_dim])
           self._appended_fields.append(input_0 + '_raw_proj_id')
           self._appended_fields.append(input_0 + '_raw_proj_val')
       elif feature_type == fc.IdFeature:
@@ -304,22 +473,39 @@ def _preprocess(self, field_dict):
             else:
               parsed_dict[input_0] = tf.as_string(
                   field_dict[input_0], precision=precision)
+        elif fc.num_buckets > 0:
+          if parsed_dict[input_0].dtype == tf.string:
+            parsed_dict[input_0] = tf.string_to_number(
+                parsed_dict[input_0], tf.int32, name='%s_str_2_int' % input_0)
       else:
         for input_name in fc.input_names:
           parsed_dict[input_name] = field_dict[input_name]
 
-    for input_name in self._label_fields:
+    for input_id, input_name in enumerate(self._label_fields):
       if input_name not in field_dict:
         continue
       if field_dict[input_name].dtype == tf.string:
+        if self._label_dim[input_id] > 1:
+          logging.info('will split labels[%d]=%s' % (input_id, input_name))
+          parsed_dict[input_name] = tf.string_split(
+              field_dict[input_name], self._label_sep[input_id]).values
+          parsed_dict[input_name] = tf.reshape(parsed_dict[input_name],
+                                               [-1, self._label_dim[input_id]])
+        else:
+          parsed_dict[input_name] = field_dict[input_name]
         parsed_dict[input_name] = tf.string_to_number(
-            field_dict[input_name], tf.float32, name=input_name)
+            parsed_dict[input_name], tf.float32, name=input_name)
       else:
         assert field_dict[input_name].dtype in [
             tf.float32, tf.double, tf.int32, tf.int64
         ], 'invalid label dtype: %s' % str(field_dict[input_name].dtype)
         parsed_dict[input_name] = field_dict[input_name]
 
+    if self._data_config.HasField('sample_weight'):
+      if self._mode != tf.estimator.ModeKeys.PREDICT:
+        parsed_dict[constant.SAMPLE_WEIGHT] = field_dict[
+            self._data_config.sample_weight]
+
     return parsed_dict
 
   def _lookup_preprocess(self, fc, field_dict):
@@ -410,6 +596,7 @@ def _input_fn(mode=None, params=None, config=None):
       if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL,
                   tf.estimator.ModeKeys.PREDICT):
         # build dataset from self._config.input_path
+        self._mode = mode
         dataset = self._build(mode, params)
         return dataset
       elif mode is None:  # serving_input_receiver_fn for export SavedModel
@@ -418,7 +605,13 @@ def _input_fn(mode=None, params=None, config=None):
           return tf.estimator.export.ServingInputReceiver(
               embed_inputs, embed_inputs)
         elif export_config.multi_placeholder:
-          inputs, features = self.create_multi_placeholders()
+          if export_config.multi_value_fields:
+            export_fields_name = export_config.multi_value_fields.input_name
+          else:
+            export_fields_name = None
+          placeholder_named_by_input = export_config.placeholder_named_by_input
+          inputs, features = self.create_multi_placeholders(
+              placeholder_named_by_input, export_fields_name)
           return tf.estimator.export.ServingInputReceiver(features, inputs)
         else:
           inputs, features = self.create_placeholders()
diff --git a/easy_rec/python/input/kafka_input.py b/easy_rec/python/input/kafka_input.py
index 6ea71bbeb..63bf5a4d2 100644
--- a/easy_rec/python/input/kafka_input.py
+++ b/easy_rec/python/input/kafka_input.py
@@ -66,9 +66,12 @@ def _build(self, mode, params):
       train = self._kafka
       topics = []
       i = self._task_index
+      assert len(train.offset) == 1 or len(train.offset) == train.partitions, \
+          'number of train.offset must be 1 or train.partitions'
       while i < train.partitions:
-        topics.append(train.topic + ':' + str(i) + ':' + str(train.offset) +
-                      ':-1')
+        offset_i = train.offset[i] if i < len(
+            train.offset) else train.offset[-1]
+        topics.append(train.topic + ':' + str(i) + ':' + str(offset_i) + ':-1')
         i = i + self._task_num
 
       logging.info(
@@ -86,7 +89,10 @@ def _build(self, mode, params):
       eval = self._kafka
       topics = []
       i = 0
+      assert len(eval.offset) == 1 or len(eval.offset) == eval.partitions, \
+          'number of eval.offset must be 1 or eval.partitions'
       while i < eval.partitions:
+        offset_i = eval.offset[i] if i < len(eval.offset) else eval.offset[-1]
         topics.append(eval.topic + ':' + str(i) + ':' + str(eval.offset) +
                       ':-1')
         i = i + 1
diff --git a/easy_rec/python/input/odps_input.py b/easy_rec/python/input/odps_input.py
index bded134dc..d70cbd42d 100644
--- a/easy_rec/python/input/odps_input.py
+++ b/easy_rec/python/input/odps_input.py
@@ -5,6 +5,11 @@
 from easy_rec.python.input.input import Input
 from easy_rec.python.utils import odps_util
 
+try:
+  import pai
+except Exception:
+  pass
+
 
 class OdpsInput(Input):
 
@@ -22,19 +27,38 @@ def _build(self, mode, params):
     odps_util.check_input_field_and_types(self._data_config)
 
     selected_cols = ','.join(self._input_fields)
-    reader = tf.TableRecordReader(
-        csv_delimiter=self._data_config.separator,
-        selected_cols=selected_cols,
-        slice_count=self._task_num,
-        slice_id=self._task_index)
+    if self._data_config.chief_redundant and \
+        mode == tf.estimator.ModeKeys.TRAIN:
+      reader = tf.TableRecordReader(
+          csv_delimiter=self._data_config.separator,
+          selected_cols=selected_cols,
+          slice_count=max(self._task_num - 1, 1),
+          slice_id=max(self._task_index - 1, 0))
+    else:
+      reader = tf.TableRecordReader(
+          csv_delimiter=self._data_config.separator,
+          selected_cols=selected_cols,
+          slice_count=self._task_num,
+          slice_id=self._task_index)
+
     if type(self._input_path) != list:
       self._input_path = [x for x in self._input_path.split(',')]
     if mode == tf.estimator.ModeKeys.TRAIN:
-      file_queue = tf.train.string_input_producer(
-          self._input_path,
-          num_epochs=self.num_epochs,
-          capacity=1000,
-          shuffle=self._data_config.shuffle)
+      if self._data_config.pai_worker_queue:
+        work_queue = pai.data.WorkQueue(
+            self._input_path,
+            num_epochs=self.num_epochs,
+            shuffle=self._data_config.shuffle,
+            num_slices=self._data_config.pai_worker_slice_num * self._task_num)
+        work_queue.add_summary()
+        file_queue = work_queue.input_producer()
+        reader = tf.TableRecordReader()
+      else:
+        file_queue = tf.train.string_input_producer(
+            self._input_path,
+            num_epochs=self.num_epochs,
+            capacity=1000,
+            shuffle=self._data_config.shuffle)
     else:
       file_queue = tf.train.string_input_producer(
           self._input_path, num_epochs=1, capacity=1000, shuffle=False)
diff --git a/easy_rec/python/input/odps_input_v2.py b/easy_rec/python/input/odps_input_v2.py
index 511a28c0e..60a2ae080 100644
--- a/easy_rec/python/input/odps_input_v2.py
+++ b/easy_rec/python/input/odps_input_v2.py
@@ -1,10 +1,17 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
 import tensorflow as tf
 
 from easy_rec.python.input.input import Input
 from easy_rec.python.utils import odps_util
 
+try:
+  import pai
+except Exception:
+  pass
+
 
 class OdpsInputV2(Input):
 
@@ -36,12 +43,36 @@ def _build(self, mode, params):
         self.get_type_defaults(x, v)
         for x, v in zip(self._input_field_types, self._input_field_defaults)
     ]
-    dataset = tf.data.TableRecordDataset(
-        self._input_path,
-        record_defaults=record_defaults,
-        selected_cols=selected_cols,
-        slice_id=self._task_index,
-        slice_count=self._task_num)
+
+    if self._data_config.pai_worker_queue and \
+        mode == tf.estimator.ModeKeys.TRAIN:
+      logging.info('pai_worker_slice_num = %d' %
+                   self._data_config.pai_worker_slice_num)
+      work_queue = pai.data.WorkQueue(
+          self._input_path,
+          num_epochs=self.num_epochs,
+          shuffle=self._data_config.shuffle,
+          num_slices=self._data_config.pai_worker_slice_num * self._task_num)
+      que_paths = work_queue.input_dataset()
+      dataset = tf.data.TableRecordDataset(
+          que_paths,
+          record_defaults=record_defaults,
+          selected_cols=selected_cols)
+    elif self._data_config.chief_redundant and \
+        mode == tf.estimator.ModeKeys.TRAIN:
+      dataset = tf.data.TableRecordDataset(
+          self._input_path,
+          record_defaults=record_defaults,
+          selected_cols=selected_cols,
+          slice_id=max(self._task_index - 1, 0),
+          slice_count=max(self._task_num - 1, 1))
+    else:
+      dataset = tf.data.TableRecordDataset(
+          self._input_path,
+          record_defaults=record_defaults,
+          selected_cols=selected_cols,
+          slice_id=self._task_index,
+          slice_count=self._task_num)
 
     if mode == tf.estimator.ModeKeys.TRAIN:
       if self._data_config.shuffle:
diff --git a/easy_rec/python/input/odps_input_v3.py b/easy_rec/python/input/odps_input_v3.py
new file mode 100644
index 000000000..4a67049b2
--- /dev/null
+++ b/easy_rec/python/input/odps_input_v3.py
@@ -0,0 +1,126 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import logging
+import sys
+
+import numpy as np
+import tensorflow as tf
+
+from easy_rec.python.input.input import Input
+from easy_rec.python.utils import odps_util
+
+try:
+  import common_io
+except Exception:
+  common_io = None
+
+
+class OdpsInputV3(Input):
+  """Common IO based interface, could run at local or on data science."""
+
+  def __init__(self,
+               data_config,
+               feature_config,
+               input_path,
+               task_index=0,
+               task_num=1):
+    super(OdpsInputV3, self).__init__(data_config, feature_config, input_path,
+                                      task_index, task_num)
+    self._num_epoch = 0
+    if common_io is None:
+      logging.error("""please install common_io pip install
+                    https://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/common_io-0.1.0-cp37-cp37m-linux_x86_64.whl"""
+                    )
+      sys.exit(1)
+
+  def _parse_table(self, *fields):
+    fields = list(fields)
+    inputs = {self._input_fields[x]: fields[x] for x in self._effective_fids}
+    for x in self._label_fids:
+      inputs[self._input_fields[x]] = fields[x]
+    return inputs
+
+  def _odps_read(self):
+    logging.info('start epoch[%d]' % self._num_epoch)
+    self._num_epoch += 1
+    if type(self._input_path) != list:
+      self._input_path = [x for x in self._input_path.split(',')]
+
+    # check data_config are consistent with odps tables
+    odps_util.check_input_field_and_types(self._data_config)
+
+    record_defaults = [
+        self.get_type_defaults(x, v)
+        for x, v in zip(self._input_field_types, self._input_field_defaults)
+    ]
+
+    selected_cols = ','.join(self._input_fields)
+    for table_path in self._input_path:
+      reader = common_io.table.TableReader(
+          table_path,
+          selected_cols=selected_cols,
+          slice_id=self._task_index,
+          slice_count=self._task_num)
+      total_records_num = reader.get_row_count()
+      batch_num = int(total_records_num / self._data_config.batch_size)
+      res_num = total_records_num - batch_num * self._data_config.batch_size
+      batch_defaults = [
+          np.array([x] * self._data_config.batch_size) for x in record_defaults
+      ]
+      for batch_id in range(batch_num):
+        batch_data_np = [x.copy() for x in batch_defaults]
+        for row_id, one_data in enumerate(
+            reader.read(self._data_config.batch_size)):
+          for col_id in range(len(record_defaults)):
+            if one_data[col_id] not in ['', 'NULL', None]:
+              batch_data_np[col_id][row_id] = one_data[col_id]
+        yield tuple(batch_data_np)
+      if res_num > 0:
+        batch_data_np = [x[:res_num] for x in batch_defaults]
+        for row_id, one_data in enumerate(reader.read(res_num)):
+          for col_id in range(len(record_defaults)):
+            if one_data[col_id] not in ['', 'NULL', None]:
+              batch_data_np[col_id][row_id] = one_data[col_id]
+        yield tuple(batch_data_np)
+      reader.close()
+    logging.info('finish epoch[%d]' % self._num_epoch)
+
+  def _build(self, mode, params):
+    # get input type
+    list_type = [self.get_tf_type(x) for x in self._input_field_types]
+    list_type = tuple(list_type)
+    list_shapes = [tf.TensorShape([None]) for x in range(0, len(list_type))]
+    list_shapes = tuple(list_shapes)
+
+    # read odps tables
+    dataset = tf.data.Dataset.from_generator(
+        self._odps_read, output_types=list_type, output_shapes=list_shapes)
+
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      dataset = dataset.shuffle(
+          self._data_config.shuffle_buffer_size,
+          seed=2020,
+          reshuffle_each_iteration=True)
+      dataset = dataset.repeat(self.num_epochs)
+    else:
+      dataset = dataset.repeat(1)
+
+    dataset = dataset.map(
+        self._parse_table,
+        num_parallel_calls=self._data_config.num_parallel_calls)
+
+    # preprocess is necessary to transform data
+    # so that they could be feed into FeatureColumns
+    dataset = dataset.map(
+        map_func=self._preprocess,
+        num_parallel_calls=self._data_config.num_parallel_calls)
+
+    dataset = dataset.prefetch(buffer_size=self._prefetch_size)
+
+    if mode != tf.estimator.ModeKeys.PREDICT:
+      dataset = dataset.map(lambda x:
+                            (self._get_features(x), self._get_labels(x)))
+    else:
+      dataset = dataset.map(lambda x: (self._get_features(x)))
+    return dataset
diff --git a/easy_rec/python/input/odps_rtp_input.py b/easy_rec/python/input/odps_rtp_input.py
index 66b5be3d6..df5efc9f0 100644
--- a/easy_rec/python/input/odps_rtp_input.py
+++ b/easy_rec/python/input/odps_rtp_input.py
@@ -2,10 +2,16 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import logging
 
+import numpy as np
 import tensorflow as tf
 
 from easy_rec.python.input.input import Input
 
+try:
+  import pai
+except Exception:
+  pass
+
 
 class OdpsRTPInput(Input):
   """RTPInput for parsing rtp fg new input format on odps.
@@ -45,11 +51,29 @@ def _parse_table(self, *fields):
     ]
     # assume that the last field is the generated feature column
     print('field_delim = %s' % self._data_config.separator)
-    fields = tf.decode_csv(
-        fields[-1],
-        record_defaults=record_defaults,
-        field_delim=self._data_config.separator,
-        name='decode_csv')
+    fields = tf.string_split(
+        fields[-1], self._data_config.separator, skip_empty=False)
+    tmp_fields = tf.reshape(fields.values, [-1, len(record_defaults)])
+    fields = []
+    for i in range(len(record_defaults)):
+      if type(record_defaults[i]) == int:
+        fields.append(
+            tf.string_to_number(
+                tmp_fields[:, i], tf.int64, name='field_as_int_%d' % i))
+      elif type(record_defaults[i]) in [float, np.float32, np.float64]:
+        fields.append(
+            tf.string_to_number(
+                tmp_fields[:, i], tf.float32, name='field_as_flt_%d' % i))
+      elif type(record_defaults[i]) in [str, type(u''), bytes]:
+        fields.append(tmp_fields[:, i])
+      elif type(record_defaults[i]) == bool:
+        fields.append(
+            tf.logical_or(
+                tf.equal(tmp_fields[:, i], 'True'),
+                tf.equal(tmp_fields[:, i], 'true')))
+      else:
+        assert 'invalid types: %s' % str(type(record_defaults[i]))
+
     field_keys = [x for x in self._input_fields if x not in self._label_fields]
     effective_fids = [field_keys.index(x) for x in self._effective_fields]
     inputs = {field_keys[x]: fields[x] for x in effective_fids}
@@ -60,7 +84,7 @@ def _parse_table(self, *fields):
 
   def _build(self, mode, params):
     if type(self._input_path) != list:
-      self._input_path = [self._input_path]
+      self._input_path = [x for x in self._input_path.split(',')]
 
     record_defaults = [
         self.get_type_defaults(t, v)
@@ -78,12 +102,28 @@ def _build(self, mode, params):
         ]))
     selected_cols = self._data_config.selected_cols \
         if self._data_config.selected_cols else None
-    dataset = tf.data.TableRecordDataset(
-        self._input_path,
-        record_defaults=record_defaults,
-        selected_cols=selected_cols,
-        slice_id=self._task_index,
-        slice_count=self._task_num)
+
+    if self._data_config.pai_worker_queue and \
+        mode == tf.estimator.ModeKeys.TRAIN:
+      logging.info('pai_worker_slice_num = %d' %
+                   self._data_config.pai_worker_slice_num)
+      work_queue = pai.data.WorkQueue(
+          self._input_path,
+          num_epochs=self.num_epochs,
+          shuffle=self._data_config.shuffle,
+          num_slices=self._data_config.pai_worker_slice_num * self._task_num)
+      que_paths = work_queue.input_dataset()
+      dataset = tf.data.TableRecordDataset(
+          que_paths,
+          record_defaults=record_defaults,
+          selected_cols=selected_cols)
+    else:
+      dataset = tf.data.TableRecordDataset(
+          self._input_path,
+          record_defaults=record_defaults,
+          selected_cols=selected_cols,
+          slice_id=self._task_index,
+          slice_count=self._task_num)
 
     if mode == tf.estimator.ModeKeys.TRAIN:
       if self._data_config.shuffle:
diff --git a/easy_rec/python/input/rtp_input.py b/easy_rec/python/input/rtp_input.py
index 63b2a8b67..e09a817df 100644
--- a/easy_rec/python/input/rtp_input.py
+++ b/easy_rec/python/input/rtp_input.py
@@ -2,6 +2,7 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import logging
 
+import numpy as np
 import tensorflow as tf
 
 from easy_rec.python.input.input import Input
@@ -63,9 +64,9 @@ def _parse_csv(self, line):
         if x not in self._label_fields
     ])
 
-    fields = tf.decode_csv(
-        line, field_delim=self._rtp_separator, record_defaults=record_defaults)
-    labels = [fields[x] for x in self._selected_cols[:-1]]
+    fields = tf.string_split(line, self._rtp_separator, skip_empty=False)
+    fields = tf.reshape(fields.values, [-1, len(record_defaults)])
+    labels = [fields[:, x] for x in self._selected_cols[:-1]]
 
     # only for features, labels excluded
     record_defaults = [
@@ -76,11 +77,31 @@ def _parse_csv(self, line):
     ]
     # assume that the last field is the generated feature column
     print('field_delim = %s' % self._data_config.separator)
-    fields = tf.decode_csv(
-        fields[self._feature_col_id],
-        record_defaults=record_defaults,
-        field_delim=self._data_config.separator,
-        name='decode_csv')
+    fields = tf.string_split(
+        fields[:, self._feature_col_id],
+        self._data_config.separator,
+        skip_empty=False)
+    tmp_fields = tf.reshape(fields.values, [-1, len(record_defaults)])
+    fields = []
+    for i in range(len(record_defaults)):
+      if type(record_defaults[i]) == int:
+        fields.append(
+            tf.string_to_number(
+                tmp_fields[:, i], tf.int64, name='field_as_int_%d' % i))
+      elif type(record_defaults[i]) in [float, np.float32, np.float64]:
+        fields.append(
+            tf.string_to_number(
+                tmp_fields[:, i], tf.float32, name='field_as_flt_%d' % i))
+      elif type(record_defaults[i]) in [str, type(u''), bytes]:
+        fields.append(tmp_fields[:, i])
+      elif type(record_defaults[i]) == bool:
+        fields.append(
+            tf.logical_or(
+                tf.equal(tmp_fields[:, i], 'True'),
+                tf.equal(tmp_fields[:, i], 'true')))
+      else:
+        assert 'invalid types: %s' % str(type(record_defaults[i]))
+
     field_keys = [x for x in self._input_fields if x not in self._label_fields]
     effective_fids = [field_keys.index(x) for x in self._effective_fields]
     inputs = {field_keys[x]: fields[x] for x in effective_fids}
@@ -137,7 +158,11 @@ def _build(self, mode, params):
           tf.data.TextLineDataset,
           cycle_length=parallel_num,
           num_parallel_calls=parallel_num)
-      dataset = dataset.shard(self._task_num, self._task_index)
+      if self._data_config.chief_redundant:
+        dataset = dataset.shard(
+            max(self._task_num - 1, 1), max(self._task_index - 1, 0))
+      else:
+        dataset = dataset.shard(self._task_num, self._task_index)
       if self._data_config.shuffle:
         dataset = dataset.shuffle(
             self._data_config.shuffle_buffer_size,
diff --git a/easy_rec/python/input/rtp_input_v2.py b/easy_rec/python/input/rtp_input_v2.py
index fa51015ec..1635c623a 100644
--- a/easy_rec/python/input/rtp_input_v2.py
+++ b/easy_rec/python/input/rtp_input_v2.py
@@ -99,7 +99,11 @@ def _build(self, mode, params):
           tf.data.TextLineDataset,
           cycle_length=parallel_num,
           num_parallel_calls=parallel_num)
-      dataset = dataset.shard(self._task_num, self._task_index)
+      if self._data_config.chief_redundant:
+        dataset = dataset.shard(
+            max(self._task_num - 1, 1), max(self._task_index - 1, 0))
+      else:
+        dataset = dataset.shard(self._task_num, self._task_index)
       if self._data_config.shuffle:
         dataset = dataset.shuffle(
             self._data_config.shuffle_buffer_size,
diff --git a/easy_rec/python/input/tfrecord_input.py b/easy_rec/python/input/tfrecord_input.py
new file mode 100644
index 000000000..c3d9e228e
--- /dev/null
+++ b/easy_rec/python/input/tfrecord_input.py
@@ -0,0 +1,88 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+import tensorflow as tf
+
+from easy_rec.python.input.input import Input
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class TFRecordInput(Input):
+
+  def __init__(self,
+               data_config,
+               feature_config,
+               input_path,
+               task_index=0,
+               task_num=1):
+    super(TFRecordInput, self).__init__(data_config, feature_config, input_path,
+                                        task_index, task_num)
+
+    self.feature_desc = {}
+    for x, t, d in zip(self._input_fields, self._input_field_types,
+                       self._input_field_defaults):
+      d = self.get_type_defaults(t, d)
+      t = self.get_tf_type(t)
+      self.feature_desc[x] = tf.FixedLenFeature(
+          dtype=t, shape=1, default_value=d)
+
+  def _parse_tfrecord(self, example):
+    try:
+      inputs = tf.parse_single_example(example, features=self.feature_desc)
+    except AttributeError:
+      inputs = tf.io.parse_single_example(example, features=self.feature_desc)
+    return inputs
+
+  def _build(self, mode, params):
+    file_paths = tf.gfile.Glob(self._input_path)
+    assert len(file_paths) > 0, 'match no files with %s' % self._input_path
+
+    num_parallel_calls = self._data_config.num_parallel_calls
+    data_compression_type = self._data_config.data_compression_type
+    if mode == tf.estimator.ModeKeys.TRAIN:
+      logging.info('train files[%d]: %s' %
+                   (len(file_paths), ','.join(file_paths)))
+      dataset = tf.data.Dataset.from_tensor_slices(file_paths)
+      if self._data_config.shuffle:
+        # shuffle input files
+        dataset = dataset.shuffle(len(file_paths))
+      # too many readers read the same file will cause performance issues
+      # as the same data will be read multiple times
+      parallel_num = min(num_parallel_calls, len(file_paths))
+      dataset = dataset.interleave(
+          lambda x: tf.data.TFRecordDataset(
+              x, compression_type=data_compression_type),
+          cycle_length=parallel_num,
+          num_parallel_calls=parallel_num)
+      dataset = dataset.shard(self._task_num, self._task_index)
+      if self._data_config.shuffle:
+        dataset = dataset.shuffle(
+            self._data_config.shuffle_buffer_size,
+            seed=2020,
+            reshuffle_each_iteration=True)
+      dataset = dataset.repeat(self.num_epochs)
+    else:
+      logging.info('eval files[%d]: %s' %
+                   (len(file_paths), ','.join(file_paths)))
+      dataset = tf.data.TFRecordDataset(
+          file_paths, compression_type=data_compression_type)
+      dataset = dataset.repeat(1)
+
+    dataset = dataset.map(
+        self._parse_tfrecord, num_parallel_calls=num_parallel_calls)
+    dataset = dataset.batch(self._data_config.batch_size)
+    dataset = dataset.prefetch(buffer_size=self._prefetch_size)
+    dataset = dataset.map(
+        map_func=self._preprocess, num_parallel_calls=num_parallel_calls)
+
+    dataset = dataset.prefetch(buffer_size=self._prefetch_size)
+
+    if mode != tf.estimator.ModeKeys.PREDICT:
+      dataset = dataset.map(lambda x:
+                            (self._get_features(x), self._get_labels(x)))
+    else:
+      dataset = dataset.map(lambda x: (self._get_features(x)))
+    return dataset
diff --git a/easy_rec/python/layers/capsule_layer.py b/easy_rec/python/layers/capsule_layer.py
new file mode 100644
index 000000000..4b6928402
--- /dev/null
+++ b/easy_rec/python/layers/capsule_layer.py
@@ -0,0 +1,120 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+import numpy as np
+import tensorflow as tf
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class CapsuleLayer:
+
+  def __init__(self, capsule_config, is_training):
+    # max_seq_len: max behaviour sequence length(history length)
+    self._max_seq_len = capsule_config.max_seq_len
+    # max_k: max high capsule number
+    self._max_k = capsule_config.max_k
+    # high_dim: high capsule vector dimension
+    self._high_dim = capsule_config.high_dim
+    # number of Expectation-Maximization iterations
+    self._num_iters = capsule_config.num_iters
+    # routing_logits_scale
+    self._routing_logits_scale = capsule_config.routing_logits_scale
+    # routing_logits_stddev
+    self._routing_logits_stddev = capsule_config.routing_logits_stddev
+    self._is_training = is_training
+
+  def squash(self, inputs):
+    """Squash inputs over the last dimension."""
+    input_norm = tf.reduce_sum(tf.square(inputs), keep_dims=True, axis=-1)
+    scalar_factor = input_norm / (1 + input_norm) / tf.sqrt(input_norm + 1e-8)
+    return scalar_factor * inputs
+
+  def __call__(self, seq_feas, seq_lens):
+    """Capsule layer.
+
+    Args:
+      seq_feas: tensor of shape batch_size x self._max_seq_len x low_fea_dim(bsd)
+      seq_lens: tensor of shape batch_size
+
+    Return:
+      high_capsules: tensor of shape batch_size x max_k x high_dim
+    """
+    # pad or clip to max_seq_len
+    seq_feas = tf.cond(
+        tf.greater(tf.shape(seq_feas)[1], self._max_seq_len),
+        lambda: seq_feas[:, :self._max_seq_len, :], lambda: tf.cond(
+            tf.less(tf.shape(seq_feas)[1], self._max_seq_len), lambda: tf.pad(
+                seq_feas, [[0, 0], [
+                    0, self._max_seq_len - tf.shape(seq_feas)[1]
+                ], [0, 0]]), lambda: seq_feas))
+    seq_lens = tf.minimum(seq_lens, self._max_seq_len)
+
+    batch_size = tf.shape(seq_lens)[0]
+    # max_seq_len x max_num_high_capsule(sh)
+    if self._is_training:
+      routing_logits = tf.truncated_normal(
+          [batch_size, self._max_seq_len, self._max_k],
+          stddev=self._routing_logits_stddev)
+    else:
+      np.random.seed(28)
+      routing_logits = tf.constant(
+          np.random.uniform(
+              high=self._routing_logits_stddev,
+              size=[self._max_seq_len, self._max_k]),
+          dtype=tf.float32)
+      routing_logits = tf.tile(routing_logits[None, :, :], [batch_size, 1, 1])
+    routing_logits = tf.stop_gradient(routing_logits)
+    # batch_size x max_seq_len x max_k(bsh)
+    low_fea_dim = seq_feas.get_shape()[-1]
+    # map low capsule features to high capsule features:
+    #    low_fea_dim x high_dim(de)
+    bilinear_matrix = tf.get_variable(
+        dtype=tf.float32, shape=[low_fea_dim, self._high_dim], name='capsule/S')
+    # map sequence feature to high dimensional space
+    seq_feas_high = tf.tensordot(seq_feas, bilinear_matrix, axes=1)
+    seq_feas_high_stop = tf.stop_gradient(seq_feas_high)
+    seq_feas_high_norm = tf.nn.l2_normalize(seq_feas_high_stop, -1)
+    num_high_capsules = tf.maximum(
+        1, tf.minimum(self._max_k, tf.to_int32(tf.log(tf.to_float(seq_lens)))))
+    # batch_size x max_seq_len(bs)
+    mask = tf.sequence_mask(seq_lens, self._max_seq_len)
+    mask = tf.cast(mask, tf.float32)
+    # batch_size x max_k(bh)
+    mask_cap = tf.sequence_mask(num_high_capsules, self._max_k)
+    mask_cap = tf.cast(mask_cap, tf.float32)
+    # batch_size x max_seq_len x 1(bs1)
+    # max_seq_thresh = (mask[:, :, None] * 2 - 1) * 1e32
+    # batch_size x 1 x h (b1h)
+    max_cap_thresh = (tf.cast(mask_cap[:, None, :], tf.float32) * 2 - 1) * 1e32
+    for iter_id in range(self._num_iters):
+      # batch_size x max_seq_len x max_k(bsh)
+      routing_logits = tf.minimum(routing_logits, max_cap_thresh)
+      routing_logits = tf.nn.softmax(routing_logits, axis=2)
+      routing_logits = routing_logits * mask[:, :, None]
+      # batch_size x max_k x high_dim(bse,bsh->bhe)
+      high_capsules = tf.einsum(
+          'bse, bsh->bhe', seq_feas_high_stop
+          if iter_id + 1 < self._num_iters else seq_feas_high, routing_logits)
+      if iter_id + 1 == self._num_iters:
+        high_capsules = self.squash(high_capsules)
+        break
+      # batch_size x max_k x high_dim(bhe)
+      high_capsules = tf.nn.l2_normalize(high_capsules, -1)
+      # batch_size x max_seq_len x max_k(bse, bhe->bsh)
+      if self._routing_logits_scale > 0:
+        if iter_id == 0:
+          logging.info('routing_logits_scale = %.2f' %
+                       self._routing_logits_scale)
+        routing_logits = tf.einsum('bse, bhe->bsh', seq_feas_high_norm,
+                                   high_capsules) * self._routing_logits_scale
+      else:
+        routing_logits = tf.einsum('bse, bhe->bsh', seq_feas_high_stop,
+                                   high_capsules)
+
+    # zero paddings
+    # high_capsule_mask = tf.sequence_mask(num_high_capsules, self._max_k)
+    # high_capsules = high_capsules * tf.to_float(high_capsule_mask[:, :, None])
+    return high_capsules, num_high_capsules
diff --git a/easy_rec/python/layers/dnn.py b/easy_rec/python/layers/dnn.py
index 96a646f48..ff6e9630d 100644
--- a/easy_rec/python/layers/dnn.py
+++ b/easy_rec/python/layers/dnn.py
@@ -36,7 +36,9 @@ def hidden_units(self):
   def dropout_ratio(self):
     return self._config.dropout_ratio
 
-  def __call__(self, deep_fea):
+  def __call__(self, deep_fea, hidden_layer_feature_output=False):
+    hidden_units_len = len(self.hidden_units)
+    hidden_feature_dict = {}
     for i, unit in enumerate(self.hidden_units):
       deep_fea = tf.layers.dense(
           inputs=deep_fea,
@@ -59,4 +61,11 @@ def __call__(self, deep_fea):
             deep_fea,
             keep_prob=1 - self.dropout_ratio[i],
             name='%s/%d/dropout' % (self._name, i))
-    return deep_fea
+
+      if hidden_layer_feature_output:
+        hidden_feature_dict['hidden_layer' + str(i)] = deep_fea
+        if (i + 1 == hidden_units_len):
+          hidden_feature_dict['hidden_layer_end'] = deep_fea
+          return hidden_feature_dict
+    else:
+      return deep_fea
diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py
index 6bd15721e..86a98f8d7 100644
--- a/easy_rec/python/layers/input_layer.py
+++ b/easy_rec/python/layers/input_layer.py
@@ -1,24 +1,19 @@
 # -*- encoding: utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
-import json
-import logging
-import os
-
 import tensorflow as tf
 
-import easy_rec
+from easy_rec.python.compat import regularizers
 from easy_rec.python.compat.feature_column import feature_column
 from easy_rec.python.feature_column.feature_column import FeatureColumnParser
 from easy_rec.python.feature_column.feature_group import FeatureGroup
+from easy_rec.python.layers import variational_dropout_layer
 from easy_rec.python.protos.feature_config_pb2 import WideOrDeep
 
 from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn  # NOQA
 from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn  # NOQA
 
 if tf.__version__ >= '2.0':
-  gfile = tf.compat.v1.gfile
-else:
-  gfile = tf.gfile
+  tf = tf.compat.v1
 
 
 class InputLayer(object):
@@ -30,8 +25,12 @@ class InputLayer(object):
   def __init__(self,
                feature_configs,
                feature_groups_config,
+               variational_dropout_config,
                wide_output_dim=-1,
-               use_embedding_variable=False):
+               use_embedding_variable=False,
+               embedding_regularizer=None,
+               kernel_regularizer=None,
+               is_training=False):
     self._feature_groups = {
         x.group_name: FeatureGroup(x) for x in feature_groups_config
     }
@@ -42,55 +41,102 @@ def __init__(self,
         wide_output_dim,
         use_embedding_variable=use_embedding_variable)
 
+    self._embedding_regularizer = embedding_regularizer
+    self._kernel_regularizer = kernel_regularizer
+    self._is_training = is_training
+    self._variational_dropout_config = variational_dropout_config
+
   def has_group(self, group_name):
     return group_name in self._feature_groups
 
-  def __call__(self, features, group_name):
-    assert group_name in self._feature_groups, 'invalid group_name[%s], list: %s' % \
-                                               (group_name, ','.join([x for x in self._feature_groups]))
-    feature_group = self._feature_groups[group_name]
-    group_columns = feature_group.select_columns(self._fc_parser)
-    cols_to_output_tensors = {}
-    features = feature_column.input_layer(
-        features, group_columns, cols_to_output_tensors=cols_to_output_tensors)
-
-    # dump model inputs
-    self._dump_feature_shape(group_name, group_columns, cols_to_output_tensors)
-
-    group_features = [cols_to_output_tensors[x] for x in group_columns]
-    return features, group_features
-
-  def _dump_feature_shape(self, group_name, group_columns,
-                          cols_to_output_tensors):
-    """Dump embedding feature column shape info.
-
-    For large embedding serving on eas, the shapes are dumped so that
-    embedding placeholders could be create in easy_rec/python/input/input.py
+  def __call__(self, features, group_name, is_combine=True):
+    """Get features by group_name.
 
     Args:
-      group_name: feature group name
-      group_columns: feature columns of the group
-      cols_to_output_tensors: dict of feature_columns to feature tensors
+      features: input tensor dict
+      group_name: feature_group name
+      is_combine: whether to combine sequence features over the
+          time dimension.
+
+    Return:
+      features: all features concatenate together
+      group_features: list of features
+      seq_features: list of sequence features, each element is a tuple:
+          3 dimension embedding tensor (batch_size, max_seq_len, embedding_dimension),
+          1 dimension sequence length tensor.
     """
-    if 'dump_embedding_shape_dir' not in easy_rec._global_config:
-      return
-    dump_dir = easy_rec._global_config['dump_embedding_shape_dir']
-    dump_path = os.path.join(dump_dir, 'input_layer_%s.txt' % group_name)
-    with gfile.GFile(dump_path, 'w') as fout:
-      for key in group_columns:
-        assert 'name' in dir(key), 'column(%s) has no attributes name: %s' % \
-                                   (str(key), str(dir(key)))
-        if tf.__version__ >= '2.0':
-          shape_vals = [x for x in cols_to_output_tensors[key].shape]
-        else:
-          shape_vals = [x.value for x in cols_to_output_tensors[key].shape]
-        shape_config = {'name': key.name, 'shape': shape_vals}
-        if isinstance(key, _SharedEmbeddingColumn):
-          shape_config['embedding_name'] = key.shared_embedding_collection_name
-        elif isinstance(key, EmbeddingColumn):
-          shape_config['embedding_name'] = key.name.replace('_embedding', '')
-        fout.write('%s\n' % json.dumps(shape_config))
-    logging.info('dump input_layer to %s' % dump_path)
+    assert group_name in self._feature_groups, 'invalid group_name[%s], list: %s' % (
+        group_name, ','.join([x for x in self._feature_groups]))
+    feature_group = self._feature_groups[group_name]
+    group_columns, group_seq_columns = feature_group.select_columns(
+        self._fc_parser)
+
+    if is_combine:  # return sequence features in combined format
+      cols_to_output_tensors = {}
+      output_features = feature_column.input_layer(
+          features,
+          group_columns,
+          cols_to_output_tensors=cols_to_output_tensors)
+      embedding_reg_lst = [output_features]
+      builder = feature_column._LazyBuilder(features)
+      seq_features = []
+      for column in sorted(group_seq_columns, key=lambda x: x.name):
+        with tf.variable_scope(None, default_name=column._var_scope_name):
+          seq_feature, seq_len = column._get_sequence_dense_tensor(builder)
+          embedding_reg_lst.append(seq_feature)
+
+          sequence_combiner = column.sequence_combiner
+          if sequence_combiner is None:
+            raise ValueError(
+                'sequence_combiner is none, please set sequence_combiner or use TagFeature'
+            )
+          if sequence_combiner.WhichOneof('combiner') == 'attention':
+            attn_logits = tf.layers.dense(
+                inputs=seq_feature,
+                units=1,
+                kernel_regularizer=self._kernel_regularizer,
+                use_bias=False,
+                activation=None,
+                name='attention')
+            attn_logits = tf.squeeze(attn_logits, axis=-1)
+            attn_logits_padding = tf.ones_like(attn_logits) * (-2**32 + 1)
+            seq_mask = tf.sequence_mask(seq_len)
+            attn_score = tf.nn.softmax(
+                tf.where(seq_mask, attn_logits, attn_logits_padding))
+            seq_feature = tf.reduce_sum(
+                attn_score[:, :, tf.newaxis] * seq_feature, axis=1)
+            seq_features.append(seq_feature)
+            cols_to_output_tensors[column] = seq_feature
+          else:
+            raise NotImplementedError
+      if self._variational_dropout_config is not None:
+        variational_dropout = variational_dropout_layer.VariationalDropoutLayer(
+            self._variational_dropout_config, group_columns, self._is_training)
+        noisy_features = variational_dropout(output_features)
+        concat_features = tf.concat([noisy_features] + seq_features, axis=-1)
+      else:
+        concat_features = tf.concat([output_features] + seq_features, axis=-1)
+      regularizers.apply_regularization(
+          self._embedding_regularizer, weights_list=embedding_reg_lst)
+
+      group_features = [cols_to_output_tensors[x] for x in group_columns] + \
+                       [cols_to_output_tensors[x] for x in group_seq_columns]
+      return concat_features, group_features
+
+    else:  # return sequence feature in raw format instead of combine them
+      assert len(group_columns) == 0, \
+          'there are none sequence columns: %s' % str(group_columns)
+      builder = feature_column._LazyBuilder(features)
+      seq_features = []
+      embedding_reg_lst = []
+      for fc in group_seq_columns:
+        with tf.variable_scope('input_layer/' + fc.categorical_column.name):
+          tmp_embedding, tmp_seq_len = fc._get_sequence_dense_tensor(builder)
+          seq_features.append((tmp_embedding, tmp_seq_len))
+          embedding_reg_lst.append(tmp_embedding)
+      regularizers.apply_regularization(
+          self._embedding_regularizer, weights_list=embedding_reg_lst)
+      return seq_features
 
   def get_wide_deep_dict(self):
     """Get wide or deep indicator for feature columns.
diff --git a/easy_rec/python/layers/multihead_attention.py b/easy_rec/python/layers/multihead_attention.py
new file mode 100644
index 000000000..1da28e5d4
--- /dev/null
+++ b/easy_rec/python/layers/multihead_attention.py
@@ -0,0 +1,162 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import tensorflow as tf
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class MultiHeadAttention:
+
+  def __init__(self, head_num, head_size, l2_reg, use_res=False, name=''):
+    """Initializes a `MultiHeadAttention` Layer.
+
+    Args:
+      head_num: The number of heads
+      head_size: The dimension of a head
+      l2_reg: l2 regularizer
+      use_res: Whether to use residual connections before output.
+      name: scope of the MultiHeadAttention, so that the parameters could be separated from other MultiHeadAttention
+    """
+    self._head_num = head_num
+    self._head_size = head_size
+    self._l2_reg = l2_reg
+    self._use_res = use_res
+    self._name = name
+
+  def _split_multihead_qkv(self, q, k, v):
+    """Split multiple heads.
+
+    Args:
+      q: Query matrix of shape [bs, feature_num, head_num * head_size].
+      k: Key matrix of shape [bs, feature_num, head_num * head_size].
+      v: Value matrix of shape [bs, feature_num, head_num * head_size].
+
+    Returns:
+      q: Query matrix of shape [bs, head_num, feature_num, head_size].
+      k: Key matrix of shape [bs, head_num, feature_num, head_size].
+      v: Value matrix of shape [bs, head_num, feature_num, head_size].
+    """
+    reshaped_q = tf.reshape(
+        q, shape=[-1, q.shape[1], self._head_num, self._head_size])
+    q = tf.transpose(reshaped_q, perm=[0, 2, 1, 3])
+    reshaped_k = tf.reshape(
+        k, shape=[-1, k.shape[1], self._head_num, self._head_size])
+    k = tf.transpose(reshaped_k, perm=[0, 2, 1, 3])
+    reshaped_v = tf.reshape(
+        v, shape=[-1, v.shape[1], self._head_num, self._head_size])
+    v = tf.transpose(reshaped_v, perm=[0, 2, 1, 3])
+    return q, k, v
+
+  def _scaled_dot_product_attention(self, q, k, v):
+    """Calculate scaled dot product attention by q, k and v.
+
+    Args:
+      q: Query matrix of shape [bs, head_num, feature_num, head_size].
+      k: Key matrix of shape [bs, head_num, feature_num, head_size].
+      v: Value matrix of shape [bs, head_num, feature_num, head_size].
+
+    Returns:
+      q: Query matrix of shape [bs, head_num, feature_num, head_size].
+      k: Key matrix of shape [bs, head_num, feature_num, head_size].
+      v: Value matrix of shape [bs, head_num, feature_num, head_size].
+    """
+    product = tf.linalg.matmul(
+        a=q, b=k, transpose_b=True) / (
+            self._head_size**-0.5)
+    weights = tf.nn.softmax(product)
+    out = tf.linalg.matmul(weights, v)
+    return out
+
+  def _compute_qkv(self, q, k, v):
+    """Calculate q, k and v matrices.
+
+    Args:
+      q: Query matrix of shape [bs, feature_num, d_model].
+      k: Key matrix of shape [bs, feature_num, d_model].
+      v: Value matrix of shape [bs, feature_num, d_model].
+
+    Returns:
+      q: Query matrix of shape [bs, feature_num, head_size * n_head].
+      k: Key matrix of shape [bs, feature_num, head_size * n_head].
+      v: Value matrix of shape [bs, feature_num, head_size * n_head].
+    """
+    q = tf.layers.dense(
+        q,
+        self._head_num * self._head_size,
+        use_bias=False,
+        kernel_regularizer=self._l2_reg,
+        name='%s/%s/dnn' % (self._name, 'query'))
+    k = tf.layers.dense(
+        k,
+        self._head_num * self._head_size,
+        use_bias=False,
+        kernel_regularizer=self._l2_reg,
+        name='%s/%s/dnn' % (self._name, 'key'))
+    v = tf.layers.dense(
+        v,
+        self._head_num * self._head_size,
+        use_bias=False,
+        kernel_regularizer=self._l2_reg,
+        name='%s/%s/dnn' % (self._name, 'value'))
+    return q, k, v
+
+  def _combine_heads(self, multi_head_tensor):
+    """Combine the results of multiple heads.
+
+    Args:
+      multi_head_tensor: Result matrix of shape [bs, head_num, feature_num, head_size].
+
+    Returns:
+      out: Result matrix of shape [bs, feature_num, head_num * head_size].
+    """
+    x = tf.transpose(multi_head_tensor, perm=[0, 2, 1, 3])
+    out = tf.reshape(x, shape=[-1, x.shape[1], x.shape[2] * x.shape[3]])
+    return out
+
+  def _multi_head_attention(self, attention_input):
+    """Build multiple heads attention layer.
+
+    Args:
+      attention_input: The input of interacting layer, has a shape of [bs, feature_num, d_model].
+
+    Returns:
+      out: The output of multi head attention layer, has a shape of [bs, feature_num, head_num * head_size].
+    """
+    if isinstance(attention_input, list):
+      assert len(attention_input) == 3 or len(attention_input) == 1, \
+          'If the input of multi_head_attention is a list, the length must be 1 or 3.'
+
+      if len(attention_input) == 3:
+        ori_q = attention_input[0]
+        ori_k = attention_input[1]
+        ori_v = attention_input[2]
+      else:
+        ori_q = attention_input[0]
+        ori_k = attention_input[0]
+        ori_v = attention_input[0]
+    else:
+      ori_q = attention_input
+      ori_k = attention_input
+      ori_v = attention_input
+
+    q, k, v = self._compute_qkv(ori_q, ori_k, ori_v)
+    q, k, v = self._split_multihead_qkv(q, k, v)
+    multi_head_tensor = self._scaled_dot_product_attention(q, k, v)
+    out = self._combine_heads(multi_head_tensor)
+
+    if self._use_res:
+      W_0_x = tf.layers.dense(
+          ori_v,
+          out.shape[2],
+          use_bias=False,
+          kernel_regularizer=self._l2_reg,
+          name='%s/dnn' % (self._name))
+      res_out = tf.nn.relu(out + W_0_x)
+      return res_out
+    else:
+      return out
+
+  def __call__(self, deep_fea):
+    deep_fea = self._multi_head_attention(deep_fea)
+    return deep_fea
diff --git a/easy_rec/python/layers/variational_dropout_layer.py b/easy_rec/python/layers/variational_dropout_layer.py
new file mode 100644
index 000000000..d75f61e92
--- /dev/null
+++ b/easy_rec/python/layers/variational_dropout_layer.py
@@ -0,0 +1,136 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import numpy as np
+import tensorflow as tf
+
+from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn  # NOQA
+from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn  # NOQA
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class VariationalDropoutLayer(object):
+  """Rank features by variational dropout.
+
+  Use the Dropout concept on the input feature layer and optimize the corresponding feature-wise dropout rate
+  paper: Dropout Feature Ranking for Deep Learning Models
+  arXiv: 1712.08645
+  """
+
+  def __init__(self,
+               variational_dropout_config,
+               group_columns,
+               is_training=False):
+    self._config = variational_dropout_config
+    self.features_dim_used = []
+    self.features_embedding_size = 0
+    for item in range(0, len(group_columns)):
+      if (hasattr(group_columns[item], 'dimension')):
+        self.features_dim_used.append(group_columns[item].dimension)
+        self.features_embedding_size += group_columns[item].dimension
+      else:
+        self.features_dim_used.append(1)
+        self.features_embedding_size += 1
+
+    if self.variational_dropout_wise():
+      self._dropout_param_size = self.features_embedding_size
+      self.drop_param_shape = [self._dropout_param_size]
+    else:
+      self._dropout_param_size = len(self.features_dim_used)
+      self.drop_param_shape = [self._dropout_param_size]
+    self.evaluate = not is_training
+
+  def get_lambda(self):
+    return self._config.regularization_lambda
+
+  def variational_dropout_wise(self):
+    return self._config.embedding_wise_variational_dropout
+
+  def expand_bern_val(self):
+    # Build index_list--->[[0,0],[0,0],[0,0],[0,0],[0,1]......]
+    self.expanded_bern_val = []
+    for i in range(len(self.features_dim_used)):
+      index_loop_count = self.features_dim_used[i]
+      for m in range(index_loop_count):
+        self.expanded_bern_val.append([i])
+    self.expanded_bern_val = tf.tile(self.expanded_bern_val,
+                                     [self.batch_size, 1])
+    batch_size_range = tf.range(self.batch_size)
+    expand_range_axis = tf.expand_dims(batch_size_range, 1)
+    self.fetures_dim_len = 0
+    for i in self.features_dim_used:
+      self.fetures_dim_len += self.features_dim_used[i]
+    batch_size_range_expand_dim_len = tf.tile(expand_range_axis,
+                                              [1, self.fetures_dim_len])
+    index_i = tf.reshape(batch_size_range_expand_dim_len, [-1, 1])
+    self.expanded_bern_val = tf.concat([index_i, self.expanded_bern_val], 1)
+
+  def build_variational_dropout(self):
+    self.logit_p = tf.get_variable(
+        name='logit_p',
+        shape=self.drop_param_shape,
+        dtype=tf.float32,
+        initializer=None)
+
+  def sample_noisy_input(self, input):
+    self.batch_size = tf.shape(input)[0]
+    if self.evaluate:
+      expanded_dims_logit_p = tf.expand_dims(self.logit_p, 0)
+      expanded_logit_p = tf.tile(expanded_dims_logit_p, [self.batch_size, 1])
+      p = tf.sigmoid(expanded_logit_p)
+      if self.variational_dropout_wise():
+        scaled_input = input * (1 - p)
+      else:
+        # expand dropout layer
+        self.expand_bern_val()
+        expanded_p = tf.gather_nd(p, self.expanded_bern_val)
+        expanded_p = tf.reshape(expanded_p, [-1, self.fetures_dim_len])
+        scaled_input = input * (1 - expanded_p)
+
+      return scaled_input
+
+    bern_val = self.sampled_from_logit_p(self.batch_size)
+    bern_val = tf.reshape(bern_val, [-1, self.fetures_dim_len])
+    noisy_input = input * bern_val
+    return noisy_input
+
+  def sampled_from_logit_p(self, num_samples):
+    expand_dims_logit_p = tf.expand_dims(self.logit_p, 0)
+    expand_logit_p = tf.tile(expand_dims_logit_p, [num_samples, 1])
+    dropout_p = tf.sigmoid(expand_logit_p)
+    bern_val = self.concrete_dropout_neuron(dropout_p)
+
+    if self.variational_dropout_wise():
+      return bern_val
+    else:
+      # from feature_num to embedding_dim_num
+      self.expand_bern_val()
+      bern_val_gather_nd = []
+      bern_val_gather_nd = tf.gather_nd(bern_val, self.expanded_bern_val)
+      return bern_val_gather_nd
+
+  def concrete_dropout_neuron(self, dropout_p, temp=1.0 / 10.0):
+    EPSILON = np.finfo(float).eps
+    unif_noise = tf.random_uniform(
+        tf.shape(dropout_p), dtype=tf.float32, seed=None, name='unif_noise')
+
+    approx = (
+        tf.log(dropout_p + EPSILON) - tf.log(1. - dropout_p + EPSILON) +
+        tf.log(unif_noise + EPSILON) - tf.log(1. - unif_noise + EPSILON))
+
+    approx_output = tf.sigmoid(approx / temp)
+    return 1 - approx_output
+
+  def __call__(self, output_features):
+    self.build_variational_dropout()
+    noisy_input = self.sample_noisy_input(output_features)
+    dropout_p = tf.sigmoid(self.logit_p)
+    variational_dropout_penalty = 1. - dropout_p
+    variational_dropout_penalty_lambda = self.get_lambda() / tf.cast(
+        self.batch_size, dtype=tf.float32)
+    variational_dropout_loss_sum = variational_dropout_penalty_lambda * tf.reduce_sum(
+        variational_dropout_penalty, axis=0)
+    tf.add_to_collection('variational_dropout_loss',
+                         variational_dropout_loss_sum)
+    return noisy_input
diff --git a/easy_rec/python/main.py b/easy_rec/python/main.py
index 354381287..c8777f987 100644
--- a/easy_rec/python/main.py
+++ b/easy_rec/python/main.py
@@ -1,7 +1,6 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
-# Date: 2018-09-13
-"""Binary to run train and evaluation on recommendation model."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -21,9 +20,11 @@
 from easy_rec.python.input.input import Input
 from easy_rec.python.model.easy_rec_estimator import EasyRecEstimator
 from easy_rec.python.model.easy_rec_model import EasyRecModel
+from easy_rec.python.protos.train_pb2 import DistributionStrategy
 from easy_rec.python.utils import config_util
 from easy_rec.python.utils import estimator_utils
 from easy_rec.python.utils import load_class
+from easy_rec.python.utils.export_big_model import export_big_model
 from easy_rec.python.utils.pai_util import is_on_pai
 
 if tf.__version__ >= '2.0':
@@ -66,18 +67,7 @@ def _get_input_fn(data_config,
   Returns:
     subclass of Input
   """
-  input_class_map = {
-      data_config.CSVInput: 'CSVInput',
-      data_config.CSVInputV2: 'CSVInputV2',
-      data_config.OdpsInput: 'OdpsInput',
-      data_config.OdpsInputV2: 'OdpsInputV2',
-      data_config.RTPInput: 'RTPInput',
-      data_config.RTPInputV2: 'RTPInputV2',
-      data_config.OdpsRTPInput: 'OdpsRTPInput',
-      data_config.DummyInput: 'DummyInput',
-      data_config.KafkaInput: 'KafkaInput'
-  }
-
+  input_class_map = {y: x for x, y in data_config.InputType.items()}
   input_cls_name = input_class_map[data_config.input_type]
   input_class = Input.create_class(input_cls_name)
 
@@ -99,7 +89,9 @@ def _create_estimator(pipeline_config, distribution=None, params={}):
   session_config = ConfigProto(
       gpu_options=gpu_options,
       allow_soft_placement=True,
-      log_device_placement=False)
+      log_device_placement=params.get('log_device_placement', False),
+      inter_op_parallelism_threads=train_config.inter_op_parallelism_threads,
+      intra_op_parallelism_threads=train_config.intra_op_parallelism_threads)
   session_config.device_filters.append('/job:ps')
   model_cls = EasyRecModel.create_class(model_config.model_class)
 
@@ -120,6 +112,7 @@ def _create_estimator(pipeline_config, distribution=None, params={}):
       save_summary_steps=train_config.save_summary_steps,
       save_checkpoints_steps=save_checkpoints_steps,
       save_checkpoints_secs=save_checkpoints_secs,
+      keep_checkpoint_max=train_config.keep_checkpoint_max,
       train_distribute=distribution,
       eval_distribute=distribution,
       session_config=session_config)
@@ -152,7 +145,7 @@ def _create_eval_export_spec(pipeline_config, eval_data):
         LatestExporter(
             name='latest',
             serving_input_receiver_fn=export_input_fn,
-            exports_to_keep=1)
+            exports_to_keep=export_config.exports_to_keep)
     ]
   elif export_config.exporter_type == 'best':
     logging.info(
@@ -173,7 +166,8 @@ def _metric_cmp_fn(best_eval_result, current_eval_result):
         BestExporter(
             name='best',
             serving_input_receiver_fn=export_input_fn,
-            compare_fn=_metric_cmp_fn)
+            compare_fn=_metric_cmp_fn,
+            exports_to_keep=export_config.exports_to_keep)
     ]
   elif export_config.exporter_type == 'none':
     exporters = []
@@ -265,13 +259,24 @@ def _train_and_evaluate_impl(pipeline_config, continue_train=False):
   data_config = pipeline_config.data_config
   feature_configs = pipeline_config.feature_configs
 
+  if train_config.train_distribute != DistributionStrategy.NoStrategy\
+      and train_config.sync_replicas:
+    logging.warning(
+        'will set sync_replicas to False, because train_distribute[%s] != NoStrategy'
+        % pipeline_config.train_config.train_distribute)
+    pipeline_config.train_config.sync_replicas = False
+
   if pipeline_config.WhichOneof('train_path') == 'kafka_train_input':
     train_data = pipeline_config.kafka_train_input
+  elif pipeline_config.WhichOneof('train_path') == 'datahub_train_input':
+    train_data = pipeline_config.datahub_train_input
   else:
     train_data = pipeline_config.train_input_path
 
   if pipeline_config.WhichOneof('eval_path') == 'kafka_eval_input':
     eval_data = pipeline_config.kafka_eval_input
+  elif pipeline_config.WhichOneof('eval_path') == 'datahub_eval_input':
+    eval_data = pipeline_config.datahub_eval_input
   else:
     eval_data = pipeline_config.eval_input_path
 
@@ -290,7 +295,7 @@ def _train_and_evaluate_impl(pipeline_config, continue_train=False):
 
   master_stat_file = os.path.join(pipeline_config.model_dir, 'master.stat')
   version_file = os.path.join(pipeline_config.model_dir, 'version')
-  if run_config.is_chief:
+  if estimator_utils.is_chief():
     _check_model_dir(pipeline_config.model_dir, continue_train)
     config_util.save_pipeline_config(pipeline_config, pipeline_config.model_dir)
     with gfile.GFile(version_file, 'w') as f:
@@ -342,12 +347,13 @@ def evaluate(pipeline_config,
       * pipeline_config_path does not exist
   """
   pipeline_config = config_util.get_configs_from_pipeline_file(pipeline_config)
+
   if eval_data_path is not None:
     logging.info('Evaluating on data: %s' % eval_data_path)
     if isinstance(eval_data_path, list):
-      pipeline_config.eval_data.input_path[:] = eval_data_path
+      pipeline_config.eval_input_path = ','.join(eval_data_path)
     else:
-      pipeline_config.eval_data.input_path[:] = [eval_data_path]
+      pipeline_config.eval_input_path = eval_data_path
   train_config = pipeline_config.train_config
 
   if pipeline_config.WhichOneof('eval_path') == 'kafka_eval_input':
@@ -355,14 +361,71 @@ def evaluate(pipeline_config,
   else:
     eval_data = pipeline_config.eval_input_path
 
+  server_target = None
+  if 'TF_CONFIG' in os.environ:
+    tf_config = estimator_utils.chief_to_master()
+    from tensorflow.python.training import server_lib
+    if tf_config['task']['type'] == 'ps':
+      cluster = tf.train.ClusterSpec(tf_config['cluster'])
+      server = server_lib.Server(
+          cluster, job_name='ps', task_index=tf_config['task']['index'])
+      server.join()
+    elif tf_config['task']['type'] == 'master':
+      if 'ps' in tf_config['cluster']:
+        cluster = tf.train.ClusterSpec(tf_config['cluster'])
+        server = server_lib.Server(cluster, job_name='master', task_index=0)
+        server_target = server.target
+        print('server_target = %s' % server_target)
+
   distribution = strategy_builder.build(train_config)
-  estimator, _ = _create_estimator(pipeline_config, distribution)
+  estimator, run_config = _create_estimator(pipeline_config, distribution)
   eval_spec = _create_eval_export_spec(pipeline_config, eval_data)
 
   ckpt_path = _get_ckpt_path(pipeline_config, eval_checkpoint_path)
 
-  eval_result = estimator.evaluate(
-      eval_spec.input_fn, eval_spec.steps, checkpoint_path=ckpt_path)
+  if server_target:
+    # evaluate with parameter server
+    input_iter = eval_spec.input_fn(
+        mode=tf.estimator.ModeKeys.EVAL).make_one_shot_iterator()
+    input_feas, input_lbls = input_iter.get_next()
+    from tensorflow.python.training.device_setter import replica_device_setter
+    from tensorflow.python.framework.ops import device
+    from tensorflow.python.training.monitored_session import MonitoredSession
+    from tensorflow.python.training.monitored_session import ChiefSessionCreator
+    with device(
+        replica_device_setter(
+            worker_device='/job:master/task:0', cluster=cluster)):
+      estimator_spec = estimator._eval_model_fn(input_feas, input_lbls,
+                                                run_config)
+
+    session_config = ConfigProto(
+        allow_soft_placement=True, log_device_placement=True)
+    chief_sess_creator = ChiefSessionCreator(
+        master=server_target,
+        checkpoint_filename_with_path=ckpt_path,
+        config=session_config)
+    eval_metric_ops = estimator_spec.eval_metric_ops
+    update_ops = [eval_metric_ops[x][1] for x in eval_metric_ops.keys()]
+    metric_ops = {x: eval_metric_ops[x][0] for x in eval_metric_ops.keys()}
+    update_op = tf.group(update_ops)
+    with MonitoredSession(
+        session_creator=chief_sess_creator,
+        hooks=None,
+        stop_grace_period_secs=120) as sess:
+      while True:
+        try:
+          sess.run(update_op)
+        except tf.errors.OutOfRangeError:
+          break
+      eval_result = sess.run(metric_ops)
+  else:
+    # this way does not work, wait to be debugged
+    # the variables are not placed to parameter server
+    # with tf.device(
+    #    replica_device_setter(
+    #        worker_device='/job:master/task:0', cluster=cluster)):
+    eval_result = estimator.evaluate(
+        eval_spec.input_fn, eval_spec.steps, checkpoint_path=ckpt_path)
   logging.info('Evaluate finish')
 
   # write eval result to file
@@ -378,7 +441,7 @@ def evaluate(pipeline_config,
       # convert numpy float to python float
       result_to_write[key] = eval_result[key].item()
 
-    ofile.write(json.dumps(result_to_write, indent=2))
+    ofile.write(json.dumps(result_to_write))
 
   return eval_result
 
@@ -423,15 +486,28 @@ def predict(pipeline_config, checkpoint_path='', data_path=None):
   return pred_result
 
 
-def export(export_dir, pipeline_config_path, checkpoint_path=''):
+def export(export_dir,
+           pipeline_config,
+           checkpoint_path='',
+           asset_files=None,
+           verbose=False,
+           **redis_params):
   """Export model defined in pipeline_config_path.
 
   Args:
     export_dir: base directory where the model should be exported
-    pipeline_config_path: file specify proto.EasyRecConfig, including
-       model_config, eval_data, eval_config
+    pipeline_config: proto.EasyRecConfig instance or file path
+       specify proto.EasyRecConfig
     checkpoint_path: if specified, will use this model instead of
        model in model_dir in pipeline_config_path
+    asset_files: extra files to add to assets, comma separated
+    version: if version is defined, then will skip writing embedding to redis,
+       assume that embedding is already write into redis
+    verbose: dumps debug information
+    redis_params: keys related to write embedding to redis
+       redis_url, redis_passwd, redis_threads, redis_batch_size,
+       redis_timeout, redis_expire if export embedding to redis;
+       redis_embedding_version: if specified, will kill export to redis
 
   Returns:
     the directory where model is exported
@@ -440,21 +516,29 @@ def export(export_dir, pipeline_config_path, checkpoint_path=''):
     AssertionError, if:
       * pipeline_config_path does not exist
   """
-  assert gfile.Exists(pipeline_config_path), 'pipeline_config_path is empty'
   if not gfile.Exists(export_dir):
     gfile.MakeDirs(export_dir)
 
-  pipeline_config = config_util.get_configs_from_pipeline_file(
-      pipeline_config_path)
+  pipeline_config = config_util.get_configs_from_pipeline_file(pipeline_config)
   feature_configs = pipeline_config.feature_configs
 
-  estimator, _ = _create_estimator(pipeline_config)
+  # create estimator
+  params = {'log_device_placement': verbose}
+  if asset_files:
+    logging.info('will add asset files: %s' % asset_files)
+    params['asset_files'] = asset_files
+  estimator, _ = _create_estimator(pipeline_config, params=params)
   # construct serving input fn
   export_config = pipeline_config.export_config
   data_config = pipeline_config.data_config
   serving_input_fn = _get_input_fn(data_config, feature_configs, None,
                                    export_config)
 
+  if 'redis_url' in redis_params:
+    return export_big_model(export_dir, pipeline_config, redis_params,
+                            serving_input_fn, estimator, checkpoint_path,
+                            verbose)
+
   # pack embedding.pb into asset_extras
   assets_extra = None
   if export_config.dump_embedding_shape:
diff --git a/easy_rec/python/model/autoint.py b/easy_rec/python/model/autoint.py
new file mode 100644
index 000000000..76ee900eb
--- /dev/null
+++ b/easy_rec/python/model/autoint.py
@@ -0,0 +1,66 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+import tensorflow as tf
+
+from easy_rec.python.layers import multihead_attention
+from easy_rec.python.model.rank_model import RankModel
+
+from easy_rec.python.protos.autoint_pb2 import AutoInt as AutoIntConfig  # NOQA
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class AutoInt(RankModel):
+
+  def __init__(self,
+               model_config,
+               feature_configs,
+               features,
+               labels=None,
+               is_training=False):
+    super(AutoInt, self).__init__(model_config, feature_configs, features,
+                                  labels, is_training)
+    assert self._model_config.WhichOneof('model') == 'autoint', \
+        'invalid model config: %s' % self._model_config.WhichOneof('model')
+    self._features, _ = self._input_layer(self._feature_dict, 'all')
+    self._feature_num = len(self._model_config.feature_groups[0].feature_names)
+    self._model_config = self._model_config.autoint
+    assert isinstance(self._model_config, AutoIntConfig)
+
+    fea_emb_dim_list = []
+    for feature_config in feature_configs:
+      fea_emb_dim_list.append(feature_config.embedding_dim)
+    assert len(set(fea_emb_dim_list)) == 1 and len(fea_emb_dim_list) == self._feature_num, \
+        'AutoInt requires that all feature dimensions must be consistent.'
+
+    self._d_model = fea_emb_dim_list[0]
+    self._head_num = self._model_config.multi_head_num
+    self._head_size = self._model_config.multi_head_size
+
+  def build_predict_graph(self):
+    logging.info('feature_num: {0}'.format(self._feature_num))
+
+    attention_fea = tf.reshape(
+        self._features, shape=[-1, self._feature_num, self._d_model])
+
+    for i in range(self._model_config.interacting_layer_num):
+      attention_layer = multihead_attention.MultiHeadAttention(
+          head_num=self._head_num,
+          head_size=self._head_size,
+          l2_reg=self._l2_reg,
+          use_res=True,
+          name='multi_head_self_attention_layer_%d' % i)
+      attention_fea = attention_layer(attention_fea)
+
+    attention_fea = tf.reshape(
+        attention_fea,
+        shape=[-1, attention_fea.shape[1] * attention_fea.shape[2]])
+
+    final = tf.layers.dense(attention_fea, self._num_class, name='output')
+
+    self._add_to_prediction_dict(final)
+
+    return self._prediction_dict
diff --git a/easy_rec/python/model/dbmtl.py b/easy_rec/python/model/dbmtl.py
index 6e3954bbd..e0e2db607 100644
--- a/easy_rec/python/model/dbmtl.py
+++ b/easy_rec/python/model/dbmtl.py
@@ -2,7 +2,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import tensorflow as tf
 
-from easy_rec.python.compat import regularizers
 from easy_rec.python.layers import dnn
 from easy_rec.python.layers import mmoe
 from easy_rec.python.model.multi_task_model import MultiTaskModel
@@ -28,12 +27,7 @@ def __init__(self,
     assert isinstance(self._model_config, DBMTLConfig)
 
     self._features, _ = self._input_layer(self._feature_dict, 'all')
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=[self._features])
-
     self._init_towers(self._model_config.task_towers)
-    self._l2_reg = regularizers.l2_regularizer(
-        self._model_config.l2_regularization)
 
   def build_predict_graph(self):
     if self._model_config.HasField('bottom_dnn'):
@@ -92,7 +86,7 @@ def build_predict_graph(self):
 
       output_logits = tf.layers.dense(
           relation_fea,
-          self._num_class,
+          task_tower_cfg.num_class,
           kernel_regularizer=self._l2_reg,
           name=tower_name + '/output')
       tower_outputs[tower_name] = output_logits
diff --git a/easy_rec/python/model/dcn.py b/easy_rec/python/model/dcn.py
new file mode 100644
index 000000000..fcfa7e780
--- /dev/null
+++ b/easy_rec/python/model/dcn.py
@@ -0,0 +1,70 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import tensorflow as tf
+
+from easy_rec.python.layers import dnn
+from easy_rec.python.model.rank_model import RankModel
+
+from easy_rec.python.protos.dcn_pb2 import DCN as DCNConfig  # NOQA
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class DCN(RankModel):
+
+  def __init__(self,
+               model_config,
+               feature_configs,
+               features,
+               labels=None,
+               is_training=False):
+    super(DCN, self).__init__(model_config, feature_configs, features, labels,
+                              is_training)
+    assert self._model_config.WhichOneof('model') == 'dcn', \
+        'invalid model config: %s' % self._model_config.WhichOneof('model')
+    self._model_config = self._model_config.dcn
+    assert isinstance(self._model_config, DCNConfig)
+
+    self._features, _ = self._input_layer(self._feature_dict, 'all')
+
+  def _cross_net(self, tensor, num_cross_layers):
+    x = x0 = tensor
+    input_dim = tensor.shape[-1]
+    for i in range(num_cross_layers):
+      name = 'cross_layer_%s' % i
+      w = tf.get_variable(
+          name=name + '_w',
+          dtype=tf.float32,
+          shape=(input_dim),
+      )
+      b = tf.get_variable(name=name + '_b', dtype=tf.float32, shape=(input_dim))
+      xw = tf.reduce_sum(x * w, axis=1, keepdims=True)  # (B, 1)
+      x = tf.math.add(tf.math.add(x0 * xw, b), x)
+    return x
+
+  def build_predict_graph(self):
+    tower_fea_arr = []
+    # deep tower
+    deep_tower_config = self._model_config.deep_tower
+
+    dnn_layer = dnn.DNN(deep_tower_config.dnn, self._l2_reg, 'dnn',
+                        self._is_training)
+    deep_tensor = dnn_layer(self._features)
+    tower_fea_arr.append(deep_tensor)
+    # cross tower
+    cross_tower_config = self._model_config.cross_tower
+    num_cross_layers = cross_tower_config.cross_num
+    cross_tensor = self._cross_net(self._features, num_cross_layers)
+    tower_fea_arr.append(cross_tensor)
+    # final tower
+    all_fea = tf.concat(tower_fea_arr, axis=1)
+    final_dnn_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg,
+                              'final_dnn', self._is_training)
+    all_fea = final_dnn_layer(all_fea)
+    output = tf.layers.dense(all_fea, self._num_class, name='output')
+
+    self._add_to_prediction_dict(output)
+
+    return self._prediction_dict
diff --git a/easy_rec/python/model/deepfm.py b/easy_rec/python/model/deepfm.py
index 2c6e74b28..f17bec0be 100644
--- a/easy_rec/python/model/deepfm.py
+++ b/easy_rec/python/model/deepfm.py
@@ -2,7 +2,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import tensorflow as tf
 
-from easy_rec.python.compat import regularizers
 from easy_rec.python.layers import dnn
 from easy_rec.python.layers import fm
 from easy_rec.python.layers import input_layer
@@ -32,26 +31,12 @@ def __init__(self,
     if self._model_config.HasField('wide_regularization'):
       tf.logging.warn(
           'wide_regularization is deprecated, please use l2_regularization')
-    if self._model_config.HasField('dense_regularization'):
-      tf.logging.warn(
-          'dense_regularization is deprecated, please use l2_regularization')
-      if not self._model_config.HasField('l2_regularization'):
-        self._model_config.l2_regularization = self._model_config.dense_regularization
 
     self._wide_features, _ = self._input_layer(self._feature_dict, 'wide')
     self._deep_features, self._fm_features = self._input_layer(
         self._feature_dict, 'deep')
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=[self._wide_features])
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=[self._deep_features])
     if 'fm' in self._input_layer._feature_groups:
       _, self._fm_features = self._input_layer(self._feature_dict, 'fm')
-      regularizers.apply_regularization(
-          self._emb_reg, weights_list=self._fm_features)
-
-    self._l2_reg = regularizers.l2_regularizer(
-        self._model_config.l2_regularization)
 
   def build_input_layer(self, model_config, feature_configs):
     # overwrite create input_layer to support wide_output_dim
@@ -62,7 +47,9 @@ def build_input_layer(self, model_config, feature_configs):
         feature_configs,
         model_config.feature_groups,
         model_config.deepfm.wide_output_dim,
-        use_embedding_variable=model_config.use_embedding_variable)
+        use_embedding_variable=model_config.use_embedding_variable,
+        embedding_regularizer=self._emb_reg,
+        kernel_regularizer=self._l2_reg)
 
   def build_predict_graph(self):
     # Wide
diff --git a/easy_rec/python/model/dssm.py b/easy_rec/python/model/dssm.py
index 19aacdb07..20f873677 100644
--- a/easy_rec/python/model/dssm.py
+++ b/easy_rec/python/model/dssm.py
@@ -4,20 +4,18 @@
 
 import tensorflow as tf
 
-from easy_rec.python.compat import regularizers
+from easy_rec.python.builders import loss_builder
 from easy_rec.python.layers import dnn
 from easy_rec.python.model.easy_rec_model import EasyRecModel
 from easy_rec.python.protos.dssm_pb2 import DSSM as DSSMConfig
 from easy_rec.python.protos.loss_pb2 import LossType
+from easy_rec.python.protos.simi_pb2 import Similarity
 from easy_rec.python.utils.proto_util import copy_obj
 
 if tf.__version__ >= '2.0':
-  losses = tf.compat.v1.losses
-  metrics = tf.compat.v1.metrics
   tf = tf.compat.v1
-else:
-  losses = tf.losses
-  metrics = tf.metrics
+losses = tf.losses
+metrics = tf.metrics
 
 
 class DSSM(EasyRecModel):
@@ -37,19 +35,6 @@ def __init__(self,
     self._model_config = self._model_config.dssm
     assert isinstance(self._model_config, DSSMConfig)
 
-    if self._labels is not None:
-      self._labels = list(self._labels.values())
-      if self._loss_type == LossType.CLASSIFICATION:
-        if tf.__version__ >= '2.0':
-          self._labels[0] = tf.cast(self._labels[0], tf.int64)
-        else:
-          self._labels[0] = tf.to_int64(self._labels[0])
-      elif self._loss_type == LossType.L2_LOSS:
-        if tf.__version__ >= '2.0':
-          self._labels[0] = tf.cast(self._labels[0], tf.float32)
-        else:
-          self._labels[0] = tf.to_float(self._labels[0])
-
     if self._loss_type == LossType.CLASSIFICATION:
       assert self._num_class == 1
 
@@ -62,22 +47,62 @@ def __init__(self,
     self.item_tower_feature, _ = self._input_layer(self._feature_dict, 'item')
     self.item_id = self.item_tower.id
 
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=[self.user_tower_feature])
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=[self.item_tower_feature])
+    if self._loss_type in [LossType.CLASSIFICATION, LossType.L2_LOSS]:
+      self._is_point_wise = True
+      logging.info('Use point wise dssm.')
+    else:
+      self._is_point_wise = False
+      logging.info('Use list wise dssm.')
 
-    self._l2_reg = regularizers.l2_regularizer(
-        self._model_config.l2_regularization)
+  def _list_wise_sim(self, user_emb, item_emb):
+    batch_size = tf.shape(user_emb)[0]
+    hard_neg_indices = self._feature_dict.get('hard_neg_indices', None)
 
-  def sim(self, user_emb, item_emb):
+    if hard_neg_indices is not None:
+      tf.logging.info('With hard negative examples')
+      noclk_size = tf.shape(hard_neg_indices)[0]
+      pos_item_emb, neg_item_emb, hard_neg_item_emb = tf.split(
+          item_emb, [batch_size, -1, noclk_size], axis=0)
+    else:
+      pos_item_emb = item_emb[:batch_size]
+      neg_item_emb = item_emb[batch_size:]
+
+    pos_user_item_sim = tf.reduce_sum(
+        tf.multiply(user_emb, pos_item_emb), axis=1, keep_dims=True)
+    neg_user_item_sim = tf.matmul(user_emb, tf.transpose(neg_item_emb))
+
+    if hard_neg_indices is not None:
+      user_emb_expand = tf.gather(user_emb, hard_neg_indices[:, 0])
+      hard_neg_user_item_sim = tf.reduce_sum(
+          tf.multiply(user_emb_expand, hard_neg_item_emb), axis=1)
+      # scatter hard negatives sim update neg_user_item_sim
+      neg_sim_shape = tf.shape(neg_user_item_sim, out_type=tf.int64)
+      hard_neg_mask = tf.scatter_nd(
+          hard_neg_indices,
+          tf.ones_like(hard_neg_user_item_sim, dtype=tf.bool),
+          shape=neg_sim_shape)
+      hard_neg_user_item_sim = tf.scatter_nd(
+          hard_neg_indices, hard_neg_user_item_sim, shape=neg_sim_shape)
+      neg_user_item_sim = tf.where(
+          hard_neg_mask, x=hard_neg_user_item_sim, y=neg_user_item_sim)
+
+    user_item_sim = tf.concat([pos_user_item_sim, neg_user_item_sim], axis=1)
+    return user_item_sim
+
+  def _point_wise_sim(self, user_emb, item_emb):
     user_item_sim = tf.reduce_sum(
         tf.multiply(user_emb, item_emb), axis=1, keep_dims=True)
     return user_item_sim
 
+  def sim(self, user_emb, item_emb):
+    if self._is_point_wise:
+      return self._point_wise_sim(user_emb, item_emb)
+    else:
+      return self._list_wise_sim(user_emb, item_emb)
+
   def norm(self, fea):
-    fea_norm = tf.norm(fea, axis=1, keepdims=True)
-    return tf.div(fea, tf.maximum(fea_norm, 1e-12))
+    fea_norm = tf.nn.l2_normalize(fea, axis=1)
+    return fea_norm
 
   def build_predict_graph(self):
     num_user_dnn_layer = len(self.user_tower.dnn.hidden_units)
@@ -103,25 +128,32 @@ def build_predict_graph(self):
         name='item_dnn/dnn_%d' % (num_item_dnn_layer - 1))
 
     if self._loss_type == LossType.CLASSIFICATION:
-      user_tower_emb = self.norm(user_tower_emb)
-      item_tower_emb = self.norm(item_tower_emb)
+      if self._model_config.simi_func == Similarity.COSINE:
+        user_tower_emb = self.norm(user_tower_emb)
+        item_tower_emb = self.norm(item_tower_emb)
 
     user_item_sim = self.sim(user_tower_emb, item_tower_emb)
-    sim_w = tf.get_variable(
-        'sim_w',
-        dtype=tf.float32,
-        shape=(1, 1),
-        initializer=tf.ones_initializer())
-    sim_b = tf.get_variable(
-        'sim_b',
-        dtype=tf.float32,
-        shape=(1),
-        initializer=tf.zeros_initializer())
-    y_pred = tf.matmul(user_item_sim, tf.abs(sim_w)) + sim_b
-    y_pred = tf.reshape(y_pred, [-1])
+    y_pred = user_item_sim
+    if self._model_config.scale_simi:
+      sim_w = tf.get_variable(
+          'sim_w',
+          dtype=tf.float32,
+          shape=(1, 1),
+          initializer=tf.ones_initializer())
+      sim_b = tf.get_variable(
+          'sim_b',
+          dtype=tf.float32,
+          shape=(1),
+          initializer=tf.zeros_initializer())
+      y_pred = tf.matmul(user_item_sim, tf.abs(sim_w)) + sim_b
+      y_pred = tf.reshape(y_pred, [-1])
 
     if self._loss_type == LossType.CLASSIFICATION:
-      self._prediction_dict['logits'] = tf.nn.sigmoid(y_pred)
+      self._prediction_dict['logits'] = y_pred
+      self._prediction_dict['probs'] = tf.nn.sigmoid(y_pred)
+    elif self._loss_type == LossType.SOFTMAX_CROSS_ENTROPY:
+      self._prediction_dict['logits'] = y_pred
+      self._prediction_dict['probs'] = tf.nn.softmax(y_pred)
     else:
       self._prediction_dict['y'] = y_pred
 
@@ -132,44 +164,92 @@ def build_predict_graph(self):
     return self._prediction_dict
 
   def build_loss_graph(self):
+    if self._is_point_wise:
+      return self._build_point_wise_loss_graph()
+    else:
+      return self._build_list_wise_loss_graph()
+
+  def _build_list_wise_loss_graph(self):
+    if self._loss_type == LossType.SOFTMAX_CROSS_ENTROPY:
+      hit_prob = self._prediction_dict['probs'][:, :1]
+      self._loss_dict['cross_entropy_loss'] = -tf.reduce_mean(
+          tf.log(hit_prob + 1e-12))
+      logging.info('softmax cross entropy loss is used')
+    else:
+      raise ValueError('invalid loss type: %s' % str(self._loss_type))
+    return self._loss_dict
+
+  def _build_point_wise_loss_graph(self):
+    label = list(self._labels.values())[0]
     if self._loss_type == LossType.CLASSIFICATION:
-      logging.info('log loss is used')
-      loss = losses.log_loss(self._labels[0], self._prediction_dict['logits'])
-      self._loss_dict['cross_entropy_loss'] = loss
+      pred = self._prediction_dict['logits']
+      loss_name = 'cross_entropy_loss'
     elif self._loss_type == LossType.L2_LOSS:
-      logging.info('l2 loss is used')
-      loss = tf.reduce_mean(
-          tf.square(self._labels[0] - self._prediction_dict['y']))
-      self._loss_dict['l2_loss'] = loss
+      pred = self._prediction_dict['y']
+      loss_name = 'l2_loss'
     else:
       raise ValueError('invalid loss type: %s' % str(self._loss_type))
+
+    self._loss_dict[loss_name] = loss_builder.build(
+        self._loss_type,
+        label=label,
+        pred=pred,
+        loss_weight=self._sample_weight)
+
+    # build kd loss
+    kd_loss_dict = loss_builder.build_kd_loss(self.kd, self._prediction_dict,
+                                              self._labels)
+    self._loss_dict.update(kd_loss_dict)
     return self._loss_dict
 
   def build_metric_graph(self, eval_config):
+    if self._is_point_wise:
+      return self._build_point_wise_metric_graph(eval_config)
+    else:
+      return self._build_list_wise_metric_graph(eval_config)
+
+  def _build_list_wise_metric_graph(self, eval_config):
+    metric_dict = {}
+    for metric in eval_config.metrics_set:
+      if metric.WhichOneof('metric') == 'recall_at_topk':
+        logits = self._prediction_dict['logits']
+        label = tf.zeros_like(logits[:, :1], dtype=tf.int64)
+        metric_dict['recall_at_top%d' %
+                    metric.recall_at_topk.topk] = metrics.recall_at_k(
+                        label, logits, metric.recall_at_topk.topk)
+      else:
+        ValueError('invalid metric type: %s' % str(metric))
+    return metric_dict
+
+  def _build_point_wise_metric_graph(self, eval_config):
     metric_dict = {}
+    label = list(self._labels.values())[0]
     for metric in eval_config.metrics_set:
       if metric.WhichOneof('metric') == 'auc':
         assert self._loss_type == LossType.CLASSIFICATION
-        metric_dict['auc'] = metrics.auc(self._labels[0],
-                                         self._prediction_dict['logits'])
+        metric_dict['auc'] = metrics.auc(label, self._prediction_dict['probs'])
       elif metric.WhichOneof('metric') == 'recall_at_topk':
         assert self._loss_type == LossType.CLASSIFICATION
-        metric_dict['recall_at_topk'] = metrics.recall_at_k(
-            self._labels[0], self._prediction_dict['logits'],
-            metric.recall_at_topk.topk)
+        metric_dict['recall_at_topk%d' %
+                    metric.recall_at_topk.topk] = metrics.recall_at_k(
+                        label, self._prediction_dict['probs'],
+                        metric.recall_at_topk.topk)
       elif metric.WhichOneof('metric') == 'mean_absolute_error':
         assert self._loss_type == LossType.L2_LOSS
         metric_dict['mean_absolute_error'] = metrics.mean_absolute_error(
-            self._labels[0], self._prediction_dict['y'])
+            label, self._prediction_dict['y'])
       elif metric.WhichOneof('metric') == 'accuracy':
         assert self._loss_type == LossType.CLASSIFICATION
         metric_dict['accuracy'] = metrics.accuracy(
-            self._labels[0], self._prediction_dict['logits'])
+            label, self._prediction_dict['probs'])
+      else:
+        ValueError('invalid metric type: %s' % str(metric))
     return metric_dict
 
   def get_outputs(self):
-    if self._loss_type == LossType.CLASSIFICATION:
-      return ['logits', 'user_emb', 'item_emb']
+    if self._loss_type in (LossType.CLASSIFICATION,
+                           LossType.SOFTMAX_CROSS_ENTROPY):
+      return ['logits', 'probs', 'user_emb', 'item_emb']
     elif self._loss_type == LossType.L2_LOSS:
       return ['y', 'user_emb', 'item_emb']
     else:
diff --git a/easy_rec/python/model/easy_rec_estimator.py b/easy_rec/python/model/easy_rec_estimator.py
index 2c8fe1d7d..35aac385d 100644
--- a/easy_rec/python/model/easy_rec_estimator.py
+++ b/easy_rec/python/model/easy_rec_estimator.py
@@ -8,6 +8,7 @@
 from collections import OrderedDict
 
 import tensorflow as tf
+from tensorflow.python.lib.io import file_io
 from tensorflow.python.saved_model import signature_constants
 
 from easy_rec.python.builders import optimizer_builder
@@ -75,6 +76,12 @@ def _train_model_fn(self, features, labels, run_config):
           regularization_losses, name='regularization_loss')
       loss_dict['regularization_loss'] = regularization_losses
 
+    variational_dropout_loss = tf.get_collection('variational_dropout_loss')
+    if variational_dropout_loss:
+      variational_dropout_loss = tf.add_n(
+          variational_dropout_loss, name='variational_dropout_loss')
+      loss_dict['variational_dropout_loss'] = variational_dropout_loss
+
     loss = tf.add_n(list(loss_dict.values()))
     loss_dict['total_loss'] = loss
     for key in loss_dict:
@@ -103,6 +110,10 @@ def _train_model_fn(self, features, labels, run_config):
           total_num_replicas=run_config.num_worker_replicas)
       hooks.append(
           optimizer.make_session_run_hook(run_config.is_chief, num_tokens=0))
+
+    # add barrier for no strategy case
+    if run_config.num_worker_replicas > 1 and \
+       self.train_config.train_distribute == DistributionStrategy.NoStrategy:
       hooks.append(
           estimator_utils.ExitBarrierHook(run_config.num_worker_replicas,
                                           run_config.is_chief, self.model_dir))
@@ -115,6 +126,17 @@ def _train_model_fn(self, features, labels, run_config):
     if gradient_clipping_by_norm <= 0:
       gradient_clipping_by_norm = None
 
+    gradient_multipliers = None
+    if self.train_config.optimizer_config.HasField(
+        'embedding_learning_rate_multiplier'):
+      gradient_multipliers = {
+          var:
+          self.train_config.optimizer_config.embedding_learning_rate_multiplier
+          for var in tf.trainable_variables()
+          if 'embedding_weights:' in var.name or
+          '/embedding_weights/part_' in var.name
+      }
+
     # optimize loss
     # colocate_gradients_with_ops=True means to compute gradients
     # on the same device on which op is processes in forward process
@@ -124,11 +146,28 @@ def _train_model_fn(self, features, labels, run_config):
         learning_rate=None,
         clip_gradients=gradient_clipping_by_norm,
         optimizer=optimizer,
+        gradient_multipliers=gradient_multipliers,
         variables=tf.trainable_variables(),
         summaries=summaries,
         colocate_gradients_with_ops=True,
+        not_apply_grad_after_first_step=run_config.is_chief and
+        self._pipeline_config.data_config.chief_redundant,
         name='')  # Preventing scope prefix on all variables.
 
+    # online evaluation
+    metric_update_op_dict = None
+    if self.eval_config.eval_online:
+      metric_update_op_dict = {}
+      metric_dict = model.build_metric_graph(self.eval_config)
+      for k, v in metric_dict.items():
+        metric_update_op_dict['%s/batch' % k] = v[1]
+        tf.summary.scalar('%s/batch' % k, v[1])
+      train_op = tf.group([train_op] + list(metric_update_op_dict.values()))
+      if estimator_utils.is_chief():
+        hooks.append(
+            estimator_utils.OnlineEvaluationHook(
+                metric_dict=metric_dict, output_dir=self.model_dir))
+
     if self.train_config.HasField('fine_tune_checkpoint'):
       fine_tune_ckpt = self.train_config.fine_tune_checkpoint
       logging.warning('will restore from %s' % fine_tune_ckpt)
@@ -141,11 +180,14 @@ def _train_model_fn(self, features, labels, run_config):
           force_restore_shape_compatible=force_restore)
       if restore_hook is not None:
         hooks.append(restore_hook)
+
     # logging
     logging_dict = OrderedDict()
     logging_dict['lr'] = learning_rate[0]
     logging_dict['step'] = tf.train.get_global_step()
     logging_dict.update(loss_dict)
+    if metric_update_op_dict is not None:
+      logging_dict.update(metric_update_op_dict)
     tensor_order = logging_dict.keys()
 
     def format_fn(tensor_dict):
@@ -170,7 +212,9 @@ def format_fn(tensor_dict):
       scaffold = tf.train.Scaffold()
       chief_hooks = []
     else:
-      scaffold = tf.train.Scaffold(saver=tf.train.Saver(sharded=False))
+      scaffold = tf.train.Scaffold(
+          saver=tf.train.Saver(
+              sharded=True, max_to_keep=self.train_config.keep_checkpoint_max))
       # saver hook
       saver_hook = estimator_utils.CheckpointSaverHook(
           checkpoint_dir=self.model_dir,
@@ -178,10 +222,12 @@ def format_fn(tensor_dict):
           save_steps=self._config.save_checkpoints_steps,
           scaffold=scaffold,
           write_graph=True)
-      chief_hooks = [saver_hook]
+      chief_hooks = []
+      if estimator_utils.is_chief():
+        hooks.append(saver_hook)
 
     # profiling hook
-    if self.train_config.is_profiling and run_config.is_chief:
+    if self.train_config.is_profiling and estimator_utils.is_chief():
       profile_hook = tf.train.ProfilerHook(
           save_steps=log_step_count_steps, output_dir=self.model_dir)
       hooks.append(profile_hook)
@@ -222,67 +268,15 @@ def _eval_model_fn(self, features, labels, run_config):
         predictions=predict_dict,
         eval_metric_ops=metric_dict)
 
-  def _export_model_fn(self, features, labels, run_config):
-    if self.export_config.dump_embedding_shape:
-      logging.info('use embedded features as input')
-      model = self._model_cls(
-          self.model_config, None, features, labels=None, is_training=False)
-    else:
-      model = self._model_cls(
-          self.model_config,
-          self.feature_configs,
-          features,
-          labels=None,
-          is_training=False)
+  def _export_model_fn(self, features, labels, run_config, params):
+    model = self._model_cls(
+        self.model_config,
+        self.feature_configs,
+        features,
+        labels=None,
+        is_training=False)
     predict_dict = model.build_predict_graph()
 
-    # for embedding export to redis
-    # norm_name is the embedding name will be used in redis
-    # redis_key_name = norm_name + ":" + hash_id(int)
-    def _get_norm_name(name):
-      name_toks = name.split('/')
-      for i in range(0, len(name_toks) - 1):
-        if name_toks[i + 1].startswith('embedding_weights:'):
-          var_id = name_toks[i + 1].replace('embedding_weights:', '')
-          if name_toks[i].endswith('_embedding'):
-            tmp_name = name_toks[i][:-len('_embedding')]
-          else:
-            tmp_name = name_toks[i]
-          if var_id != '0':
-            tmp_name = tmp_name + '_' + var_id
-          return tmp_name, 0
-        if i > 1 and name_toks[i + 1].startswith('part_') and \
-           name_toks[i] == 'embedding_weights':
-          part_id = name_toks[i + 1].replace('part_', '')
-          part_toks = part_id.split(':')
-          if name_toks[i - 1].endswith('_embedding'):
-            tmp_name = name_toks[i - 1][:-len('_embedding')]
-          else:
-            tmp_name = name_toks[i - 1]
-          if part_toks[1] != '0':
-            tmp_name = tmp_name + '_' + part_toks[1]
-          return tmp_name, int(part_toks[0])
-      return None, None
-
-    embed_vars = {}
-    for x in tf.global_variables():
-      if 'embedding_weights' not in x.name:
-        continue
-      if '/embedding_weights:' in x.name or\
-         '/embedding_weights/part_' in x.name:
-        norm_name, part_id = _get_norm_name(x.name)
-        if '/part_' in x.name:
-          toks = x.name.split('/')
-          toks = toks[:-1]
-          var_name = '/'.join(toks)
-        else:
-          var_name = x.name
-          var_name = var_name.split(':')[0]
-        embed_vars[norm_name] = var_name
-    for norm_name in embed_vars.keys():
-      tf.add_to_collections('easy_rec_embedding_vars', embed_vars[norm_name])
-      tf.add_to_collections('easy_rec_embedding_names', norm_name)
-
     # add output info to estimator spec
     outputs = {}
     output_list = model.get_outputs()
@@ -297,20 +291,23 @@ def _get_norm_name(name):
         signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
             tf.estimator.export.PredictOutput(outputs)
     }
-    tf.add_to_collection(
-        tf.GraphKeys.ASSET_FILEPATHS,
-        tf.constant(
-            self._model_dir + '/pipeline.config',
-            dtype=tf.string,
-            name='pipeline.config'))
-    if self.export_config.dump_embedding_shape:
-      embed_input_desc_files = tf.gfile.Glob(
-          os.path.join(self.model_dir, 'embedding_shapes', 'input_layer_*.txt'))
-      for one_file in embed_input_desc_files:
-        _, one_file_name = os.path.split(one_file)
+
+    # save train pipeline.config for debug purpose
+    pipeline_path = os.path.join(self._model_dir, 'pipeline.config')
+    if tf.gfile.Exists(pipeline_path):
+      tf.add_to_collection(
+          tf.GraphKeys.ASSET_FILEPATHS,
+          tf.constant(pipeline_path, dtype=tf.string, name='pipeline.config'))
+    else:
+      print('train pipeline_path(%s) does not exist' % pipeline_path)
+
+    # add more asset files
+    if 'asset_files' in params:
+      for asset_file in params['asset_files'].split(','):
+        _, asset_name = os.path.split(asset_file)
         tf.add_to_collection(
             tf.GraphKeys.ASSET_FILEPATHS,
-            tf.constant(one_file, dtype=tf.string, name=one_file_name))
+            tf.constant(asset_file, dtype=tf.string, name=asset_name))
 
     return tf.estimator.EstimatorSpec(
         mode=tf.estimator.ModeKeys.PREDICT,
@@ -319,9 +316,11 @@ def _get_norm_name(name):
         export_outputs=export_outputs)
 
   def _model_fn(self, features, labels, mode, config, params):
+    os.environ['tf.estimator.mode'] = mode
+    os.environ['tf.estimator.ModeKeys.TRAIN'] = tf.estimator.ModeKeys.TRAIN
     if mode == tf.estimator.ModeKeys.TRAIN:
       return self._train_model_fn(features, labels, config)
     elif mode == tf.estimator.ModeKeys.EVAL:
       return self._eval_model_fn(features, labels, config)
     elif mode == tf.estimator.ModeKeys.PREDICT:
-      return self._export_model_fn(features, labels, config)
+      return self._export_model_fn(features, labels, config, params)
diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py
index fb04520c6..e4712b88a 100644
--- a/easy_rec/python/model/easy_rec_model.py
+++ b/easy_rec/python/model/easy_rec_model.py
@@ -9,8 +9,8 @@
 import tensorflow as tf
 
 from easy_rec.python.compat import regularizers
-from easy_rec.python.layers import embed_input_layer
 from easy_rec.python.layers import input_layer
+from easy_rec.python.utils import constant
 from easy_rec.python.utils import estimator_utils
 from easy_rec.python.utils import restore_filter
 from easy_rec.python.utils.load_class import get_register_class_meta
@@ -36,29 +36,54 @@ def __init__(self,
     self._is_training = is_training
     self._feature_dict = features
 
-    self._feature_configs = feature_configs
+    self._emb_reg = regularizers.l2_regularizer(self.embedding_regularization)
+    self._l2_reg = regularizers.l2_regularizer(self.l2_regularization)
 
+    self._feature_configs = feature_configs
     self.build_input_layer(model_config, feature_configs)
 
-    self._emb_reg = regularizers.l2_regularizer(self.embedding_regularization)
-
     self._labels = labels
     self._prediction_dict = {}
     self._loss_dict = {}
 
+    # add sample weight from inputs
+    self._sample_weight = 1.0
+    if constant.SAMPLE_WEIGHT in features:
+      self._sample_weight = features[constant.SAMPLE_WEIGHT]
+
   @property
   def embedding_regularization(self):
     return self._base_model_config.embedding_regularization
 
+  @property
+  def kd(self):
+    return self._base_model_config.kd
+
+  @property
+  def l2_regularization(self):
+    model_config = getattr(self._base_model_config,
+                           self._base_model_config.WhichOneof('model'))
+    l2_regularization = 0.0
+    if hasattr(model_config, 'dense_regularization') and \
+       model_config.HasField('dense_regularization'):
+      # backward compatibility
+      tf.logging.warn(
+          'dense_regularization is deprecated, please use l2_regularization')
+      l2_regularization = model_config.dense_regularization
+    elif hasattr(model_config, 'l2_regularization'):
+      l2_regularization = model_config.l2_regularization
+    return l2_regularization
+
   def build_input_layer(self, model_config, feature_configs):
-    if feature_configs is not None:
-      self._input_layer = input_layer.InputLayer(
-          feature_configs,
-          model_config.feature_groups,
-          use_embedding_variable=model_config.use_embedding_variable)
-    else:
-      self._input_layer = embed_input_layer.EmbedInputLayer(
-          model_config.feature_groups)
+    self._input_layer = input_layer.InputLayer(
+        feature_configs,
+        model_config.feature_groups,
+        use_embedding_variable=model_config.use_embedding_variable,
+        embedding_regularizer=self._emb_reg,
+        kernel_regularizer=self._l2_reg,
+        variational_dropout_config=model_config.variational_dropout
+        if model_config.HasField('variational_dropout') else None,
+        is_training=False)
 
   @abstractmethod
   def build_predict_graph(self):
@@ -103,8 +128,8 @@ def restore(self,
     name2var_map = self._get_restore_vars(ckpt_var_map_path)
     logging.info('start to restore from %s' % ckpt_path)
 
-    if tf.gfile.IsDirectory(ckpt_path):
-      ckpt_path = tf.train.latest_checkpoint(ckpt_path)
+    if ckpt_path.endswith('/') or tf.gfile.IsDirectory(ckpt_path + '/'):
+      ckpt_path = estimator_utils.latest_checkpoint(ckpt_path)
       print('ckpt_path is model_dir,  will use the latest checkpoint: %s' %
             ckpt_path)
 
@@ -120,8 +145,11 @@ def restore(self,
       if variable_name in ckpt_var2shape_map:
         print('restore %s' % variable_name)
         ckpt_var_shape = ckpt_var2shape_map[variable_name]
-        var_shape = variable.shape.as_list()
-        if ckpt_var_shape == var_shape:
+        if type(variable) == list:
+          var_shape = None
+        else:
+          var_shape = variable.shape.as_list()
+        if ckpt_var_shape == var_shape or var_shape is None:
           vars_in_ckpt[variable_name] = variable
         elif len(ckpt_var_shape) == len(var_shape):
           if force_restore_shape_compatible:
@@ -169,13 +197,32 @@ def _get_restore_vars(self, ckpt_var_map_path):
     # here must use global_variables, because variables such as moving_mean
     #  and moving_variance is usually not trainable in detection models
     all_vars = tf.global_variables()
+    PARTITION_PATTERN = '/part_[0-9]+'
+    VAR_SUFIX_PATTERN = ':[0-9]$'
+
+    name2var = {}
+    for one_var in all_vars:
+      var_name = re.sub(VAR_SUFIX_PATTERN, '', one_var.name)
+      if re.search(PARTITION_PATTERN,
+                   var_name) and (not var_name.endswith('/AdamAsync_2') and
+                                  not var_name.endswith('/AdamAsync_3')):
+        var_name = re.sub(PARTITION_PATTERN, '', var_name)
+        is_part = True
+      else:
+        is_part = False
+      if var_name in name2var:
+        assert is_part, 'multiple vars: %s' % var_name
+        name2var[var_name].append(one_var)
+      else:
+        name2var[var_name] = [one_var] if is_part else one_var
+
     if ckpt_var_map_path != '':
       if not tf.gfile.Exists(ckpt_var_map_path):
         logging.warning('%s not exist' % ckpt_var_map_path)
-        return {re.sub(':[0-9]$', '', var.name): var for var in all_vars}
+        return name2var
 
       # load var map
-      var_name_map = {}
+      name_map = {}
       with open(ckpt_var_map_path, 'r') as fin:
         for one_line in fin:
           one_line = one_line.strip()
@@ -183,25 +230,30 @@ def _get_restore_vars(self, ckpt_var_map_path):
           if len(line_tok) != 2:
             logging.warning('Failed to process: %s' % one_line)
             continue
-          var_name_map[line_tok[0]] = line_tok[1]
+          name_map[line_tok[0]] = line_tok[1]
       var_map = {}
-      for one_var in all_vars:
-        var_name = re.sub(':[0-9]$', '', one_var.name)
-        if var_name in var_name_map:
-          var_map[var_name_map[var_name]] = one_var
-        elif 'Momentum' not in var_name:
-          logging.warning('Failed to find in var_map_lst(%s): %s' %
+      for var_name in name2var:
+        if var_name in name_map:
+          in_ckpt_name = name_map[var_name]
+          var_map[in_ckpt_name] = name2var[var_name]
+        else:
+          logging.warning('Failed to find in var_map_file(%s): %s' %
                           (ckpt_var_map_path, var_name))
-      return var_map
+      return name2var
     else:
       var_filter, scope_update = self.get_restore_filter()
       if var_filter is not None:
-        all_vars = [var for var in all_vars if var_filter.keep(var.name)]
-      # drop scope prefix if necessary, in this case, return a dict
+        name2var = {
+            var_name: name2var[var_name]
+            for var in name2var
+            if var_filter.keep(var.name)
+        }
+      # drop scope prefix if necessary
       if scope_update is not None:
-        all_vars = {scope_update(var.name): var for var in all_vars}
-
-      return {re.sub(':[0-9]$', '', var.name): var for var in all_vars}
+        name2var = {
+            scope_update(var_name): name2var[var_name] for var_name in name2var
+        }
+      return name2var
 
   def get_restore_filter(self):
     """Get restore variable filter.
@@ -210,7 +262,24 @@ def get_restore_filter(self):
        filter: type of Filter in restore_filter.py
        scope_drop: type of ScopeDrop in restore_filter.py
     """
-    adam_filter = restore_filter.KeywordFilter('/Adam', True)
-    momentum_filter = restore_filter.KeywordFilter('/Momentum', True)
-    return restore_filter.CombineFilter([adam_filter, momentum_filter],
+    if len(self._base_model_config.restore_filters) == 0:
+      return None, None
+
+    for x in self._base_model_config.restore_filters:
+      logging.info('restore will filter out pattern %s' % x)
+
+    all_filters = [
+        restore_filter.KeywordFilter(x, True)
+        for x in self._base_model_config.restore_filters
+    ]
+
+    return restore_filter.CombineFilter(all_filters,
                                         restore_filter.Logical.AND), None
+
+  def get_grouped_vars(self):
+    """Get grouped variables, each group will be optimized by a separate optimizer.
+
+    Return:
+       grouped_vars: list of list of variables
+    """
+    raise NotImplementedError()
diff --git a/easy_rec/python/model/esmm.py b/easy_rec/python/model/esmm.py
index 64e1a4f6c..319a54469 100644
--- a/easy_rec/python/model/esmm.py
+++ b/easy_rec/python/model/esmm.py
@@ -4,7 +4,6 @@
 
 import tensorflow as tf
 
-from easy_rec.python.compat import regularizers
 from easy_rec.python.layers import dnn
 from easy_rec.python.model.multi_task_model import MultiTaskModel
 from easy_rec.python.protos.esmm_pb2 import ESMM as ESMMConfig
@@ -42,8 +41,6 @@ def __init__(self,
     else:
       group_feature, _ = self._input_layer(self._feature_dict, 'all')
       self._group_features.append(group_feature)
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=self._group_features)
 
     # This model only supports two tasks (cvr+ctr or playtime+ctr).
     # In order to be consistent with the paper,
@@ -57,9 +54,6 @@ def __init__(self,
     for task_tower_cfg in self._task_towers:
       assert task_tower_cfg.num_class == 1, 'Does not support multiclass classification problem'
 
-    self._l2_reg = regularizers.l2_regularizer(
-        self._model_config.l2_regularization)
-
   def build_loss_graph(self):
     """Build loss graph.
 
@@ -72,8 +66,10 @@ def build_loss_graph(self):
     ctr_label_name = self._label_name_dict[ctr_tower_name]
     if self._cvr_tower_cfg.loss_type == LossType.CLASSIFICATION:
       ctcvr_label = self._labels[cvr_label_name] * self._labels[ctr_label_name]
-      cvr_loss = losses.log_loss(ctcvr_label,
-                                 self._prediction_dict['probs_ctcvr'])
+      cvr_loss = losses.log_loss(
+          ctcvr_label,
+          self._prediction_dict['probs_ctcvr'],
+          weights=self._sample_weight)
       # The weight defaults to 1.
       self._loss_dict['weighted_cross_entropy_loss_%s' %
                       cvr_tower_name] = self._cvr_tower_cfg.weight * cvr_loss
@@ -84,13 +80,16 @@ def build_loss_graph(self):
       ctcvr_label = self._labels[cvr_label_name] * tf.cast(
           self._labels[ctr_label_name], cvr_dtype)
       cvr_loss = tf.losses.mean_squared_error(
-          labels=ctcvr_label, predictions=self._prediction_dict['y_ctcvr'])
+          labels=ctcvr_label,
+          predictions=self._prediction_dict['y_ctcvr'],
+          weights=self._sample_weight)
       self._loss_dict['weighted_l2_loss_%s' %
                       cvr_tower_name] = self._cvr_tower_cfg.weight * cvr_loss
 
     ctr_loss = losses.sigmoid_cross_entropy(
         self._labels[ctr_label_name],
-        self._prediction_dict['logits_%s' % ctr_tower_name])
+        self._prediction_dict['logits_%s' % ctr_tower_name],
+        weights=self._sample_weight)
     self._loss_dict['weighted_cross_entropy_loss_%s' %
                     ctr_tower_name] = self._ctr_tower_cfg.weight * ctr_loss
     return self._loss_dict
diff --git a/easy_rec/python/model/fm.py b/easy_rec/python/model/fm.py
index 5714ee927..be51c261b 100644
--- a/easy_rec/python/model/fm.py
+++ b/easy_rec/python/model/fm.py
@@ -4,7 +4,6 @@
 
 import tensorflow as tf
 
-from easy_rec.python.compat import regularizers
 from easy_rec.python.layers import fm
 from easy_rec.python.layers import input_layer
 from easy_rec.python.model.rank_model import RankModel
@@ -32,18 +31,16 @@ def __init__(self,
     self._wide_features, _ = self._input_layer(self._feature_dict, 'wide')
     self._deep_features, self._fm_features = self._input_layer(
         self._feature_dict, 'deep')
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=[self._wide_features])
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=[self._deep_features])
 
   def build_input_layer(self, model_config, feature_configs):
     # overwrite create input_layer to support wide_output_dim
     self._input_layer = input_layer.InputLayer(
         feature_configs,
         model_config.feature_groups,
-        wide_output_dim=1,
-        use_embedding_variable=model_config.use_embedding_variable)
+        wide_output_dim=model_config.num_class,
+        use_embedding_variable=model_config.use_embedding_variable,
+        embedding_regularizer=self._emb_reg,
+        kernel_regularizer=self._l2_reg)
 
   def build_predict_graph(self):
     wide_fea = tf.reduce_sum(
@@ -55,8 +52,7 @@ def build_predict_graph(self):
       fm_fea = tf.layers.dense(
           fm_fea,
           self._num_class,
-          kernel_regularizer=regularizers.l2_regularizer(
-              self._model_config.l2_regularization),
+          kernel_regularizer=self._l2_reg,
           name='fm_logits')
     else:
       fm_fea = tf.reduce_sum(fm_fea, 1, keepdims=True)
diff --git a/easy_rec/python/model/mind.py b/easy_rec/python/model/mind.py
new file mode 100644
index 000000000..a749e6dfe
--- /dev/null
+++ b/easy_rec/python/model/mind.py
@@ -0,0 +1,243 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+import tensorflow as tf
+
+from easy_rec.python.compat import regularizers
+from easy_rec.python.layers import dnn
+from easy_rec.python.layers.capsule_layer import CapsuleLayer
+from easy_rec.python.model.easy_rec_model import EasyRecModel
+from easy_rec.python.protos.loss_pb2 import LossType
+from easy_rec.python.protos.mind_pb2 import MIND as MINDConfig
+from easy_rec.python.protos.simi_pb2 import Similarity
+from easy_rec.python.utils.proto_util import copy_obj
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+losses = tf.losses
+metrics = tf.metrics
+
+
+class MIND(EasyRecModel):
+
+  def __init__(self,
+               model_config,
+               feature_configs,
+               features,
+               labels=None,
+               is_training=False):
+    super(MIND, self).__init__(model_config, feature_configs, features, labels,
+                               is_training)
+    self._loss_type = self._model_config.loss_type
+    self._num_class = self._model_config.num_class
+    assert self._model_config.WhichOneof('model') == 'mind', \
+        'invalid model config: %s' % self._model_config.WhichOneof('model')
+    self._model_config = self._model_config.mind
+
+    self._hist_seq_features = self._input_layer(
+        self._feature_dict, 'hist', is_combine=False)
+    self._user_features, _ = self._input_layer(self._feature_dict, 'user')
+    self._item_features, _ = self._input_layer(self._feature_dict, 'item')
+
+    # copy_obj so that any modification will not affect original config
+    self.user_dnn = copy_obj(self._model_config.user_dnn)
+    # copy_obj so that any modification will not affect original config
+    self.item_dnn = copy_obj(self._model_config.item_dnn)
+
+    self._l2_reg = regularizers.l2_regularizer(
+        self._model_config.l2_regularization)
+
+    if self._labels is not None:
+      self._labels = list(self._labels.values())
+      if self._loss_type == LossType.CLASSIFICATION:
+        self._labels[0] = tf.cast(self._labels[0], tf.int64)
+      elif self._loss_type == LossType.L2_LOSS:
+        self._labels[0] = tf.cast(self._labels[0], tf.float32)
+
+    if self._loss_type == LossType.CLASSIFICATION:
+      assert self._num_class == 1
+
+  def sim(self, user_emb, item_emb):
+    user_item_sim = tf.reduce_sum(
+        tf.multiply(user_emb, item_emb), axis=1, keep_dims=True)
+    return user_item_sim
+
+  def norm(self, fea):
+    fea_norm = tf.norm(fea, axis=-1, keepdims=True)
+    return tf.div(fea, tf.maximum(fea_norm, 1e-12))
+
+  def build_predict_graph(self):
+    capsule_layer = CapsuleLayer(self._model_config.capsule_config,
+                                 self._is_training)
+
+    time_id_fea = [
+        x[0] for x in self._hist_seq_features if 'time_id/' in x[0].name
+    ]
+    time_id_fea = time_id_fea[0] if len(time_id_fea) > 0 else None
+
+    hist_seq_feas = [
+        x[0] for x in self._hist_seq_features if 'time_id/' not in x[0].name
+    ]
+    # it is assumed that all hist have the same length
+    hist_seq_len = self._hist_seq_features[0][1]
+
+    if self._model_config.user_seq_combine == MINDConfig.SUM:
+      # sum pooling over the features
+      hist_embed_dims = [x.get_shape()[-1] for x in hist_seq_feas]
+      for i in range(1, len(hist_embed_dims)):
+        assert hist_embed_dims[i] == hist_embed_dims[0], \
+            'all hist seq must have the same embedding shape, but: %s' \
+            % str(hist_embed_dims)
+      hist_seq_feas = tf.add_n(hist_seq_feas) / len(hist_seq_feas)
+    else:
+      hist_seq_feas = tf.concat(hist_seq_feas, axis=2)
+
+    if self._model_config.HasField('pre_capsule_dnn') and \
+        len(self._model_config.pre_capsule_dnn.hidden_units) > 0:
+      pre_dnn_layer = dnn.DNN(self._model_config.pre_capsule_dnn, self._l2_reg,
+                              'pre_capsule_dnn', self._is_training)
+      hist_seq_feas = pre_dnn_layer(hist_seq_feas)
+
+    if time_id_fea is not None:
+      assert time_id_fea.get_shape(
+      )[-1] == 1, 'time_id must have only embedding_size of 1'
+      time_id_mask = tf.sequence_mask(hist_seq_len, tf.shape(time_id_fea)[1])
+      time_id_mask = (tf.cast(time_id_mask, tf.float32) * 2 - 1) * 1e32
+      time_id_fea = tf.minimum(time_id_fea, time_id_mask[:, :, None])
+      hist_seq_feas = hist_seq_feas * tf.nn.softmax(time_id_fea, axis=1)
+
+    # batch_size x max_k x high_capsule_dim
+    high_capsules, num_high_capsules = capsule_layer(hist_seq_feas,
+                                                     hist_seq_len)
+    # concatenate with user features
+    user_features = tf.tile(
+        tf.expand_dims(self._user_features, axis=1),
+        [1, tf.shape(high_capsules)[1], 1])
+    user_features = tf.concat([high_capsules, user_features], axis=2)
+    num_user_dnn_layer = len(self.user_dnn.hidden_units)
+    last_user_hidden = self.user_dnn.hidden_units.pop()
+    user_dnn = dnn.DNN(self.user_dnn, self._l2_reg, 'user_dnn',
+                       self._is_training)
+    user_features = user_dnn(user_features)
+    user_features = tf.layers.dense(
+        inputs=user_features,
+        units=last_user_hidden,
+        kernel_regularizer=self._l2_reg,
+        name='user_dnn/dnn_%d' % (num_user_dnn_layer - 1))
+
+    num_item_dnn_layer = len(self.item_dnn.hidden_units)
+    last_item_hidden = self.item_dnn.hidden_units.pop()
+    item_dnn = dnn.DNN(self.item_dnn, self._l2_reg, 'item_dnn',
+                       self._is_training)
+    item_feature = item_dnn(self._item_features)
+    item_feature = tf.layers.dense(
+        inputs=item_feature,
+        units=last_item_hidden,
+        kernel_regularizer=self._l2_reg,
+        name='item_dnn/dnn_%d' % (num_item_dnn_layer - 1))
+
+    assert self._model_config.simi_func in [
+        Similarity.COSINE, Similarity.INNER_PRODUCT
+    ]
+
+    if self._model_config.simi_func == Similarity.COSINE:
+      item_feature = self.norm(item_feature)
+      user_features = self.norm(user_features)
+
+    # label guided attention
+    # attention item features on high capsules vector
+    simi = tf.einsum('bhe,be->bh', user_features, item_feature)
+    simi = tf.pow(simi, self._model_config.simi_pow)
+    simi_mask = tf.sequence_mask(num_high_capsules,
+                                 self._model_config.capsule_config.max_k)
+
+    user_features = user_features * tf.to_float(simi_mask[:, :, None])
+    self._prediction_dict['user_features'] = user_features
+
+    max_thresh = (tf.cast(simi_mask, tf.float32) * 2 - 1) * 1e32
+    simi = tf.minimum(simi, max_thresh)
+    simi = tf.nn.softmax(simi, axis=1)
+    simi = tf.stop_gradient(simi)
+    user_tower_emb = tf.einsum('bhe,bh->be', user_features, simi)
+
+    # calculate similarity between user_tower_emb and item_tower_emb
+    item_tower_emb = item_feature
+    user_item_sim = self.sim(user_tower_emb, item_tower_emb)
+    sim_w = tf.get_variable(
+        'sim_w',
+        dtype=tf.float32,
+        shape=(1, 1),
+        initializer=tf.ones_initializer())
+    sim_b = tf.get_variable(
+        'sim_b',
+        dtype=tf.float32,
+        shape=(1),
+        initializer=tf.zeros_initializer())
+    y_pred = tf.matmul(user_item_sim, tf.abs(sim_w)) + sim_b
+    y_pred = tf.reshape(y_pred, [-1])
+
+    if self._loss_type == LossType.CLASSIFICATION:
+      self._prediction_dict['logits'] = tf.nn.sigmoid(y_pred)
+    else:
+      self._prediction_dict['y'] = y_pred
+
+    self._prediction_dict['user_emb'] = tf.reduce_join(
+        tf.reduce_join(tf.as_string(user_features), axis=-1, separator=','),
+        axis=-1,
+        separator='|')
+    self._prediction_dict['user_emb_num'] = num_high_capsules
+    self._prediction_dict['item_emb'] = tf.reduce_join(
+        tf.as_string(item_tower_emb), axis=-1, separator=',')
+    return self._prediction_dict
+
+  def build_loss_graph(self):
+    if self._loss_type == LossType.CLASSIFICATION:
+      logging.info('log loss is used')
+      loss = losses.log_loss(self._labels[0], self._prediction_dict['logits'])
+      self._loss_dict['cross_entropy_loss'] = loss
+    elif self._loss_type == LossType.L2_LOSS:
+      logging.info('l2 loss is used')
+      loss = tf.reduce_mean(
+          tf.square(self._labels[0] - self._prediction_dict['y']))
+      self._loss_dict['l2_loss'] = loss
+    else:
+      raise ValueError('invalid loss type: %s' % str(self._loss_type))
+    return self._loss_dict
+
+  def _build_interest_metric(self):
+    user_features = self._prediction_dict['user_features']
+    user_features = self.norm(user_features)
+    user_feature_num = self._prediction_dict['user_emb_num']
+
+    user_feature_sum_sqr = tf.square(tf.reduce_sum(user_features, axis=1))
+    user_feature_sqr_sum = tf.reduce_sum(tf.square(user_features), axis=1)
+    simi = user_feature_sum_sqr - user_feature_sqr_sum
+
+    simi = tf.reduce_sum(
+        simi, axis=1) / tf.maximum(
+            tf.to_float(user_feature_num * (user_feature_num - 1)), 1.0)
+    user_feature_num = tf.reduce_sum(tf.to_float(user_feature_num > 1))
+    return metrics.mean(tf.reduce_sum(simi) / tf.maximum(user_feature_num, 1.0))
+
+  def build_metric_graph(self, eval_config):
+    metric_dict = {}
+    for metric in eval_config.metrics_set:
+      if metric.WhichOneof('metric') == 'auc':
+        assert self._loss_type == LossType.CLASSIFICATION
+        metric_dict['auc'] = metrics.auc(self._labels[0],
+                                         self._prediction_dict['logits'])
+      elif metric.WhichOneof('metric') == 'mean_absolute_error':
+        assert self._loss_type == LossType.L2_LOSS
+        metric_dict['mean_absolute_error'] = metrics.mean_absolute_error(
+            self._labels[0], self._prediction_dict['y'])
+    metric_dict['interest_similarity'] = self._build_interest_metric()
+    return metric_dict
+
+  def get_outputs(self):
+    if self._loss_type == LossType.CLASSIFICATION:
+      return ['logits', 'user_emb', 'item_emb']
+    elif self._loss_type == LossType.L2_LOSS:
+      return ['y', 'user_emb', 'item_emb']
+    else:
+      raise ValueError('invalid loss type: %s' % str(self._loss_type))
diff --git a/easy_rec/python/model/mmoe.py b/easy_rec/python/model/mmoe.py
index 89a822c20..acf1d6d59 100644
--- a/easy_rec/python/model/mmoe.py
+++ b/easy_rec/python/model/mmoe.py
@@ -2,7 +2,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import tensorflow as tf
 
-from easy_rec.python.compat import regularizers
 from easy_rec.python.layers import dnn
 from easy_rec.python.layers import mmoe
 from easy_rec.python.model.multi_task_model import MultiTaskModel
@@ -28,12 +27,7 @@ def __init__(self,
     assert isinstance(self._model_config, MMoEConfig)
 
     self._features, _ = self._input_layer(self._feature_dict, 'all')
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=[self._features])
-
     self._init_towers(self._model_config.task_towers)
-    self._l2_reg = regularizers.l2_regularizer(
-        self._model_config.l2_regularization)
 
   def build_predict_graph(self):
     if self._model_config.HasField('expert_dnn'):
@@ -64,7 +58,7 @@ def build_predict_graph(self):
         tower_output = task_input_list[i]
       tower_output = tf.layers.dense(
           inputs=tower_output,
-          units=self._num_class,
+          units=task_tower_cfg.num_class,
           kernel_regularizer=self._l2_reg,
           name='dnn_output_%d' % i)
 
diff --git a/easy_rec/python/model/multi_task_model.py b/easy_rec/python/model/multi_task_model.py
index c20137805..0cbf3340c 100644
--- a/easy_rec/python/model/multi_task_model.py
+++ b/easy_rec/python/model/multi_task_model.py
@@ -4,6 +4,7 @@
 
 import tensorflow as tf
 
+from easy_rec.python.builders import loss_builder
 from easy_rec.python.model.rank_model import RankModel
 from easy_rec.python.protos import tower_pb2
 
@@ -76,7 +77,7 @@ def build_loss_graph(self):
     """Build loss graph for multi task model."""
     for task_tower_cfg in self._task_towers:
       tower_name = task_tower_cfg.tower_name
-      loss_weight = task_tower_cfg.weight
+      loss_weight = task_tower_cfg.weight * self._sample_weight
 
       if hasattr(task_tower_cfg, 'task_space_indicator_label') and \
           task_tower_cfg.HasField('task_space_indicator_label'):
@@ -93,6 +94,11 @@ def build_loss_graph(self):
               loss_weight=loss_weight,
               num_class=task_tower_cfg.num_class,
               suffix='_%s' % tower_name))
+
+    kd_loss_dict = loss_builder.build_kd_loss(self.kd, self._prediction_dict,
+                                              self._labels)
+    self._loss_dict.update(kd_loss_dict)
+
     return self._loss_dict
 
   def get_outputs(self):
diff --git a/easy_rec/python/model/multi_tower.py b/easy_rec/python/model/multi_tower.py
index 44017aa57..9904c9eee 100644
--- a/easy_rec/python/model/multi_tower.py
+++ b/easy_rec/python/model/multi_tower.py
@@ -3,7 +3,6 @@
 
 import tensorflow as tf
 
-from easy_rec.python.compat import regularizers
 from easy_rec.python.layers import dnn
 from easy_rec.python.model.rank_model import RankModel
 
@@ -34,11 +33,6 @@ def __init__(self,
       tower = self._model_config.towers[tower_id]
       tower_feature, _ = self._input_layer(self._feature_dict, tower.input)
       self._tower_features.append(tower_feature)
-      regularizers.apply_regularization(
-          self._emb_reg, weights_list=[tower_feature])
-
-    self._l2_reg = regularizers.l2_regularizer(
-        self._model_config.l2_regularization)
 
   def build_predict_graph(self):
     tower_fea_arr = []
diff --git a/easy_rec/python/model/multi_tower_bst.py b/easy_rec/python/model/multi_tower_bst.py
index 4b9d07288..6d93ebeda 100644
--- a/easy_rec/python/model/multi_tower_bst.py
+++ b/easy_rec/python/model/multi_tower_bst.py
@@ -39,8 +39,6 @@ def __init__(self,
     for tower_id in range(self._tower_num):
       tower = self._model_config.towers[tower_id]
       tower_feature, _ = self._input_layer(self._feature_dict, tower.input)
-      regularizers.apply_regularization(
-          self._emb_reg, weights_list=[tower_feature])
       self._tower_features.append(tower_feature)
 
     self._bst_tower_features = []
@@ -59,9 +57,6 @@ def __init__(self,
           self._emb_reg, weights_list=[tower_feature['hist_seq_emb']])
       self._bst_tower_features.append(tower_feature)
 
-    self._l2_reg = regularizers.l2_regularizer(
-        self._model_config.l2_regularization)
-
   def dnn_net(self, net, dnn_units, name):
     with tf.variable_scope(name_or_scope=name, reuse=tf.AUTO_REUSE):
       for idx, units in enumerate(dnn_units):
diff --git a/easy_rec/python/model/multi_tower_din.py b/easy_rec/python/model/multi_tower_din.py
index cbdfed1b3..afd473f3d 100644
--- a/easy_rec/python/model/multi_tower_din.py
+++ b/easy_rec/python/model/multi_tower_din.py
@@ -37,8 +37,6 @@ def __init__(self,
     for tower_id in range(self._tower_num):
       tower = self._model_config.towers[tower_id]
       tower_feature, _ = self._input_layer(self._feature_dict, tower.input)
-      regularizers.apply_regularization(
-          self._emb_reg, weights_list=[tower_feature])
       self._tower_features.append(tower_feature)
 
     self._din_tower_features = []
@@ -57,9 +55,6 @@ def __init__(self,
           self._emb_reg, weights_list=[tower_feature['hist_seq_emb']])
       self._din_tower_features.append(tower_feature)
 
-    self._l2_reg = regularizers.l2_regularizer(
-        self._model_config.l2_regularization)
-
   def din(self, dnn_config, deep_fea, name):
     cur_id, hist_id_col, seq_len = deep_fea['key'], deep_fea[
         'hist_seq_emb'], deep_fea['hist_seq_len']
diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py
index ab2789321..66dfa18a2 100644
--- a/easy_rec/python/model/rank_model.py
+++ b/easy_rec/python/model/rank_model.py
@@ -1,9 +1,9 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
-import logging
-
 import tensorflow as tf
 
+from easy_rec.python.builders import loss_builder
+from easy_rec.python.core import metrics as metrics_lib
 from easy_rec.python.model.easy_rec_model import EasyRecModel
 from easy_rec.python.protos.loss_pb2 import LossType
 
@@ -44,9 +44,12 @@ def _output_to_prediction_impl(self,
         prediction_dict['logits' + suffix] = output
         prediction_dict['probs' + suffix] = probs
         prediction_dict['y' + suffix] = tf.argmax(output, axis=1)
-    else:
+    elif loss_type == LossType.L2_LOSS:
       output = tf.squeeze(output, axis=1)
       prediction_dict['y' + suffix] = output
+    elif loss_type == LossType.SIGMOID_L2_LOSS:
+      output = tf.squeeze(output, axis=1)
+      prediction_dict['y' + suffix] = tf.sigmoid(output)
     return prediction_dict
 
   def _add_to_prediction_dict(self, output):
@@ -62,27 +65,17 @@ def _build_loss_impl(self,
                        suffix=''):
     loss_dict = {}
     if loss_type == LossType.CLASSIFICATION:
-      if num_class == 1:
-        loss = tf.losses.sigmoid_cross_entropy(
-            self._labels[label_name],
-            logits=self._prediction_dict['logits' + suffix],
-            weights=loss_weight)
-        loss_dict['cross_entropy_loss' + suffix] = loss
-      else:
-        loss = tf.losses.sparse_softmax_cross_entropy(
-            labels=self._labels[label_name],
-            logits=self._prediction_dict['logits' + suffix],
-            weights=loss_weight)
-        loss_dict['cross_entropy_loss' + suffix] = loss
-    elif loss_type == LossType.L2_LOSS:
-      logging.info('l2 loss is used')
-      loss = tf.losses.mean_squared_error(
-          labels=self._labels[label_name],
-          predictions=self._prediction_dict['y' + suffix],
-          weights=loss_weight)
-      loss_dict['l2_loss' + suffix] = loss
+      loss_name = 'cross_entropy_loss' + suffix
+      pred = self._prediction_dict['logits' + suffix]
+    elif loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]:
+      loss_name = 'l2_loss' + suffix
+      pred = self._prediction_dict['y' + suffix]
     else:
-      raise ValueError('invalid loss type: %s' % str(loss_type))
+      raise ValueError('invalid loss type: %s' % LossType.Name(loss_type))
+
+    loss_dict[loss_name] = loss_builder.build(loss_type,
+                                              self._labels[label_name], pred,
+                                              loss_weight, num_class)
     return loss_dict
 
   def build_loss_graph(self):
@@ -90,7 +83,14 @@ def build_loss_graph(self):
         self._build_loss_impl(
             self._loss_type,
             label_name=self._label_name,
+            loss_weight=self._sample_weight,
             num_class=self._num_class))
+
+    # build kd loss
+    kd_loss_dict = loss_builder.build_kd_loss(self.kd, self._prediction_dict,
+                                              self._labels)
+    self._loss_dict.update(kd_loss_dict)
+
     return self._loss_dict
 
   def _build_metric_impl(self,
@@ -102,10 +102,64 @@ def _build_metric_impl(self,
     metric_dict = {}
     if metric.WhichOneof('metric') == 'auc':
       assert loss_type == LossType.CLASSIFICATION
-      assert num_class == 1
-      label = tf.to_int64(self._labels[label_name])
-      metric_dict['auc' + suffix] = tf.metrics.auc(
-          label, self._prediction_dict['probs' + suffix])
+      if num_class == 1:
+        label = tf.to_int64(self._labels[label_name])
+        metric_dict['auc' + suffix] = tf.metrics.auc(
+            label, self._prediction_dict['probs' + suffix])
+      elif num_class == 2:
+        label = tf.to_int64(self._labels[label_name])
+        metric_dict['auc' + suffix] = tf.metrics.auc(
+            label, self._prediction_dict['probs' + suffix][:, 1])
+      else:
+        raise ValueError('Wrong class number')
+    elif metric.WhichOneof('metric') == 'gauc':
+      assert loss_type == LossType.CLASSIFICATION
+      if num_class == 1:
+        label = tf.to_int64(self._labels[label_name])
+        metric_dict['gauc' + suffix] = metrics_lib.gauc(
+            label,
+            self._prediction_dict['probs' + suffix],
+            uids=self._feature_dict[metric.gauc.uid_field],
+            reduction=metric.gauc.reduction)
+      elif num_class == 2:
+        label = tf.to_int64(self._labels[label_name])
+        metric_dict['gauc' + suffix] = metrics_lib.gauc(
+            label,
+            self._prediction_dict['probs' + suffix][:, 1],
+            uids=self._feature_dict[metric.gauc.uid_field],
+            reduction=metric.gauc.reduction)
+      else:
+        raise ValueError('Wrong class number')
+    elif metric.WhichOneof('metric') == 'session_auc':
+      assert loss_type == LossType.CLASSIFICATION
+      if num_class == 1:
+        label = tf.to_int64(self._labels[label_name])
+        metric_dict['gauc' + suffix] = metrics_lib.session_auc(
+            label,
+            self._prediction_dict['probs' + suffix],
+            session_ids=self._feature_dict[metric.session_auc.session_id_field],
+            reduction=metric.session_auc.reduction)
+      elif num_class == 2:
+        label = tf.to_int64(self._labels[label_name])
+        metric_dict['gauc' + suffix] = metrics_lib.session_auc(
+            label,
+            self._prediction_dict['probs' + suffix][:, 1],
+            session_ids=self._feature_dict[metric.session_auc.session_id_field],
+            reduction=metric.session_auc.reduction)
+      else:
+        raise ValueError('Wrong class number')
+    elif metric.WhichOneof('metric') == 'max_f1':
+      assert loss_type == LossType.CLASSIFICATION
+      if num_class == 1:
+        label = tf.to_int64(self._labels[label_name])
+        metric_dict['f1' + suffix] = metrics_lib.max_f1(
+            label, self._prediction_dict['logits' + suffix])
+      elif num_class == 2:
+        label = tf.to_int64(self._labels[label_name])
+        metric_dict['f1' + suffix] = metrics_lib.max_f1(
+            label, self._prediction_dict['logits' + suffix][:, 1])
+      else:
+        raise ValueError('Wrong class number')
     elif metric.WhichOneof('metric') == 'recall_at_topk':
       assert loss_type == LossType.CLASSIFICATION
       assert num_class > 1
@@ -115,7 +169,7 @@ def _build_metric_impl(self,
           metric.recall_at_topk.topk)
     elif metric.WhichOneof('metric') == 'mean_absolute_error':
       label = tf.to_float(self._labels[label_name])
-      if loss_type == LossType.L2_LOSS:
+      if loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]:
         metric_dict['mean_absolute_error' +
                     suffix] = tf.metrics.mean_absolute_error(
                         label, self._prediction_dict['y' + suffix])
@@ -127,7 +181,7 @@ def _build_metric_impl(self,
         assert False, 'mean_absolute_error is not supported for this model'
     elif metric.WhichOneof('metric') == 'mean_squared_error':
       label = tf.to_float(self._labels[label_name])
-      if loss_type == LossType.L2_LOSS:
+      if loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]:
         metric_dict['mean_squared_error' +
                     suffix] = tf.metrics.mean_squared_error(
                         label, self._prediction_dict['y' + suffix])
@@ -137,6 +191,18 @@ def _build_metric_impl(self,
                         label, self._prediction_dict['probs' + suffix])
       else:
         assert False, 'mean_squared_error is not supported for this model'
+    elif metric.WhichOneof('metric') == 'root_mean_squared_error':
+      label = tf.to_float(self._labels[label_name])
+      if loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]:
+        metric_dict['root_mean_squared_error' +
+                    suffix] = tf.metrics.root_mean_squared_error(
+                        label, self._prediction_dict['y' + suffix])
+      elif loss_type == LossType.CLASSIFICATION and num_class == 1:
+        metric_dict['root_mean_squared_error' +
+                    suffix] = tf.metrics.root_mean_squared_error(
+                        label, self._prediction_dict['probs' + suffix])
+      else:
+        assert False, 'root_mean_squared_error is not supported for this model'
     elif metric.WhichOneof('metric') == 'accuracy':
       assert loss_type == LossType.CLASSIFICATION
       assert num_class > 1
@@ -162,10 +228,10 @@ def _get_outputs_impl(self, loss_type, num_class=1, suffix=''):
         return ['probs' + suffix, 'logits' + suffix]
       else:
         return ['y' + suffix, 'probs' + suffix, 'logits' + suffix]
-    elif loss_type == LossType.L2_LOSS:
+    elif loss_type in [LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS]:
       return ['y' + suffix]
     else:
-      raise ValueError('invalid loss type: %s' % str(loss_type))
+      raise ValueError('invalid loss type: %s' % LossType.Name(loss_type))
 
   def get_outputs(self):
     return self._get_outputs_impl(self._loss_type, self._num_class)
diff --git a/easy_rec/python/model/rocket_launching.py b/easy_rec/python/model/rocket_launching.py
new file mode 100755
index 000000000..2b45ba5e3
--- /dev/null
+++ b/easy_rec/python/model/rocket_launching.py
@@ -0,0 +1,197 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import tensorflow as tf
+
+from easy_rec.python.builders import loss_builder
+from easy_rec.python.layers import dnn
+from easy_rec.python.model.rank_model import RankModel
+from easy_rec.python.protos.loss_pb2 import LossType
+
+from easy_rec.python.protos.rocket_launching_pb2 import RocketLaunching as RocketLaunchingConfig  # NOQA
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+metrics = tf.metrics
+
+
+class RocketLaunching(RankModel):
+
+  def __init__(self,
+               model_config,
+               feature_configs,
+               features,
+               labels=None,
+               is_training=False):
+    super(RocketLaunching, self).__init__(model_config, feature_configs,
+                                          features, labels, is_training)
+    assert self._model_config.WhichOneof('model') == 'rocket_launching', \
+        'invalid model config: %s' % self._model_config.WhichOneof('model')
+    self._model_config = self._model_config.rocket_launching
+    assert isinstance(self._model_config, RocketLaunchingConfig)
+    if self._labels is not None:
+      self._label_name = list(self._labels.keys())[0]
+
+    self._features, _ = self._input_layer(self._feature_dict, 'all')
+
+  def sim(self, feature_emb1, feature_emb2):
+    emb1_emb2_sim = tf.reduce_sum(
+        tf.multiply(feature_emb1, feature_emb2), axis=1, keepdims=True)
+    return emb1_emb2_sim
+
+  def norm(self, fea):
+    fea_norm = tf.nn.l2_normalize(fea, axis=1)
+    return fea_norm
+
+  def feature_based_sim(self, feature_based_distillation, i, j):
+    booster_feature_no_gradient = tf.stop_gradient(
+        self.booster_feature['hidden_layer' + str(j)])
+    if feature_based_distillation == 'COSION':
+      booster_feature_no_gradient_norm = self.norm(booster_feature_no_gradient)
+      light_feature_norm = self.norm(self.light_feature['hidden_layer' +
+                                                        str(i)])
+      sim_middle_layer = tf.reduce_mean(
+          self.sim(booster_feature_no_gradient_norm, light_feature_norm))
+      return sim_middle_layer
+    else:
+      return tf.sqrt(
+          tf.reduce_sum(
+              tf.square(booster_feature_no_gradient -
+                        self.light_feature['hidden_layer' + str(i)])))
+
+  def build_predict_graph(self):
+    self.hidden_layer_feature_output = self._model_config.feature_based_distillation
+    if self._model_config.HasField('share_dnn'):
+      share_dnn_layer = dnn.DNN(self._model_config.share_dnn, self._l2_reg,
+                                'share_dnn', self._is_training)
+      share_feature = share_dnn_layer(self._features)
+    booster_dnn_layer = dnn.DNN(self._model_config.booster_dnn, self._l2_reg,
+                                'booster_dnn', self._is_training)
+    light_dnn_layer = dnn.DNN(self._model_config.light_dnn, self._l2_reg,
+                              'light_dnn', self._is_training)
+    if self._model_config.HasField('share_dnn'):
+      self.booster_feature = booster_dnn_layer(share_feature,
+                                               self.hidden_layer_feature_output)
+      input_embedding_stop_gradient = tf.stop_gradient(share_feature)
+      self.light_feature = light_dnn_layer(input_embedding_stop_gradient,
+                                           self.hidden_layer_feature_output)
+    else:
+      self.booster_feature = booster_dnn_layer(self._features,
+                                               self.hidden_layer_feature_output)
+      input_embedding_stop_gradient = tf.stop_gradient(self._features)
+      self.light_feature = light_dnn_layer(input_embedding_stop_gradient,
+                                           self.hidden_layer_feature_output)
+
+    if self._model_config.feature_based_distillation:
+      booster_out = tf.layers.dense(
+          self.booster_feature['hidden_layer_end'],
+          self._num_class,
+          kernel_regularizer=self._l2_reg,
+          name='booster_output')
+
+      light_out = tf.layers.dense(
+          self.light_feature['hidden_layer_end'],
+          self._num_class,
+          kernel_regularizer=self._l2_reg,
+          name='light_output')
+    else:
+      booster_out = tf.layers.dense(
+          self.booster_feature,
+          self._num_class,
+          kernel_regularizer=self._l2_reg,
+          name='booster_output')
+
+      light_out = tf.layers.dense(
+          self.light_feature,
+          self._num_class,
+          kernel_regularizer=self._l2_reg,
+          name='light_output')
+
+    if self._loss_type == LossType.CLASSIFICATION:
+      probs_booster = tf.nn.softmax(booster_out, axis=1)
+      self._prediction_dict['logits_booster'] = booster_out
+      self._prediction_dict['probs_booster'] = probs_booster
+      self._prediction_dict['y_booster'] = tf.argmax(probs_booster, axis=1)
+
+      probs_light = tf.nn.softmax(light_out, axis=1)
+      self._prediction_dict['logits_light'] = light_out
+      self._prediction_dict['probs_light'] = probs_light
+      self._prediction_dict['y_light'] = tf.argmax(probs_light, axis=1)
+
+    return self._prediction_dict
+
+  def build_loss_graph(self):
+    logits_booster = self._prediction_dict['logits_booster']
+    logits_light = self._prediction_dict['logits_light']
+    self.feature_distillation_function = self._model_config.feature_distillation_function
+
+    # feature_based_distillation loss
+    if self._model_config.feature_based_distillation:
+      booster_hidden_units = self._model_config.booster_dnn.hidden_units
+      light_hidden_units = self._model_config.light_dnn.hidden_units
+      count = 0
+
+      for i, unit_i in enumerate(light_hidden_units):
+        for j, unit_j in enumerate(booster_hidden_units):
+          if light_hidden_units[i] == booster_hidden_units[j]:
+            self._prediction_dict[
+                'similarity_' + str(count)] = self.feature_based_sim(
+                    self._model_config.feature_based_distillation, i, j)
+            count += 1
+            break
+
+    self._loss_dict.update(
+        self._build_loss_impl(
+            LossType.CLASSIFICATION,
+            label_name=self._label_name,
+            loss_weight=self._sample_weight,
+            num_class=self._num_class,
+            suffix='_booster'))
+
+    self._loss_dict.update(
+        self._build_loss_impl(
+            LossType.CLASSIFICATION,
+            label_name=self._label_name,
+            loss_weight=self._sample_weight,
+            num_class=self._num_class,
+            suffix='_light'))
+
+    booster_logits_no_grad = tf.stop_gradient(logits_booster)
+
+    self._loss_dict['hint_loss'] = loss_builder.build(
+        LossType.L2_LOSS,
+        label=booster_logits_no_grad,
+        pred=logits_light,
+        loss_weight=self._sample_weight)
+
+    if self._model_config.feature_based_distillation:
+      for key, value in self._prediction_dict.items():
+        if key.startswith('similarity_'):
+          self._loss_dict[key] = -0.1 * value
+      return self._loss_dict
+    else:
+      return self._loss_dict
+
+  def build_metric_graph(self, eval_config):
+    metric_dict = {}
+    for metric in eval_config.metrics_set:
+      metric_dict.update(
+          self._build_metric_impl(
+              metric,
+              loss_type=LossType.CLASSIFICATION,
+              label_name=self._label_name,
+              num_class=self._num_class,
+              suffix='_light'))
+      metric_dict.update(
+          self._build_metric_impl(
+              metric,
+              loss_type=LossType.CLASSIFICATION,
+              label_name=self._label_name,
+              num_class=self._num_class,
+              suffix='_booster'))
+    return metric_dict
+
+  def get_outputs(self):
+    return [
+        'logits_booster', 'probs_booster', 'y_booster', 'logits_light',
+        'probs_light', 'y_light'
+    ]
diff --git a/easy_rec/python/model/simple_multi_task.py b/easy_rec/python/model/simple_multi_task.py
index a20344c95..b4c0613bc 100644
--- a/easy_rec/python/model/simple_multi_task.py
+++ b/easy_rec/python/model/simple_multi_task.py
@@ -2,7 +2,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import tensorflow as tf
 
-from easy_rec.python.compat import regularizers
 from easy_rec.python.layers import dnn
 from easy_rec.python.model.multi_task_model import MultiTaskModel
 
@@ -29,12 +28,7 @@ def __init__(self,
     assert isinstance(self._model_config, SimpleMultiTaskConfig)
 
     self._features, _ = self._input_layer(self._feature_dict, 'all')
-    regularizers.apply_regularization(
-        self._emb_reg, weights_list=[self._features])
-
     self._init_towers(self._model_config.task_towers)
-    self._l2_reg = regularizers.l2_regularizer(
-        self._model_config.l2_regularization)
 
   def build_predict_graph(self):
     tower_outputs = {}
@@ -48,7 +42,7 @@ def build_predict_graph(self):
       task_fea = task_dnn(self._features)
       task_output = tf.layers.dense(
           inputs=task_fea,
-          units=self._num_class,
+          units=task_tower_cfg.num_class,
           kernel_regularizer=self._l2_reg,
           name='dnn_output_%d' % i)
       tower_outputs[tower_name] = task_output
diff --git a/easy_rec/python/model/wide_and_deep.py b/easy_rec/python/model/wide_and_deep.py
new file mode 100755
index 000000000..78ec781a8
--- /dev/null
+++ b/easy_rec/python/model/wide_and_deep.py
@@ -0,0 +1,87 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+
+import tensorflow as tf
+
+from easy_rec.python.layers import dnn
+from easy_rec.python.layers import input_layer
+from easy_rec.python.model.rank_model import RankModel
+
+from easy_rec.python.protos.wide_and_deep_pb2 import WideAndDeep as WideAndDeepConfig  # NOQA
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class WideAndDeep(RankModel):
+
+  def __init__(self,
+               model_config,
+               feature_configs,
+               features,
+               labels=None,
+               is_training=False):
+    super(WideAndDeep, self).__init__(model_config, feature_configs, features,
+                                      labels, is_training)
+    assert model_config.WhichOneof('model') == 'wide_and_deep', \
+        'invalid model config: %s' % model_config.WhichOneof('model')
+    self._model_config = model_config.wide_and_deep
+    assert isinstance(self._model_config, WideAndDeepConfig)
+    assert self._input_layer.has_group('wide')
+    _, self._wide_features = self._input_layer(self._feature_dict, 'wide')
+    assert self._input_layer.has_group('deep')
+    _, self._deep_features = self._input_layer(self._feature_dict, 'deep')
+
+  def build_input_layer(self, model_config, feature_configs):
+    # overwrite create input_layer to support wide_output_dim
+    has_final = len(model_config.wide_and_deep.final_dnn.hidden_units) > 0
+    wide_output_dim = model_config.wide_and_deep.wide_output_dim
+    if not has_final:
+      model_config.wide_and_deep.wide_output_dim = model_config.num_class
+      wide_output_dim = model_config.num_class
+    self._input_layer = input_layer.InputLayer(
+        feature_configs,
+        model_config.feature_groups,
+        wide_output_dim=wide_output_dim,
+        use_embedding_variable=model_config.use_embedding_variable,
+        embedding_regularizer=self._emb_reg,
+        kernel_regularizer=self._l2_reg)
+
+  def build_predict_graph(self):
+    wide_fea = tf.add_n(self._wide_features)
+    logging.info('wide features dimension: %d' % wide_fea.get_shape()[-1])
+
+    self._deep_features = tf.concat(self._deep_features, axis=1)
+    logging.info('input deep features dimension: %d' %
+                 self._deep_features.get_shape()[-1])
+
+    deep_layer = dnn.DNN(self._model_config.dnn, self._l2_reg, 'deep_feature',
+                         self._is_training)
+    deep_fea = deep_layer(self._deep_features)
+    logging.info('output deep features dimension: %d' %
+                 deep_fea.get_shape()[-1])
+
+    has_final = len(self._model_config.final_dnn.hidden_units) > 0
+    print('wide_deep has_final_dnn layers = %d' % has_final)
+    if has_final:
+      all_fea = tf.concat([wide_fea, deep_fea], axis=1)
+      final_layer = dnn.DNN(self._model_config.final_dnn, self._l2_reg,
+                            'final_dnn', self._is_training)
+      all_fea = final_layer(all_fea)
+      output = tf.layers.dense(
+          all_fea,
+          self._num_class,
+          kernel_regularizer=self._l2_reg,
+          name='output')
+    else:
+      deep_out = tf.layers.dense(
+          deep_fea,
+          self._num_class,
+          kernel_regularizer=self._l2_reg,
+          name='deep_out')
+      output = deep_out + wide_fea
+
+    self._add_to_prediction_dict(output)
+
+    return self._prediction_dict
diff --git a/easy_rec/python/ops/1.12/__init__.py b/easy_rec/python/ops/1.12/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/easy_rec/python/ops/1.12/libkv_lookup.so b/easy_rec/python/ops/1.12/libkv_lookup.so
new file mode 100755
index 000000000..9f8eec212
Binary files /dev/null and b/easy_rec/python/ops/1.12/libkv_lookup.so differ
diff --git a/easy_rec/python/ops/1.12/libwrite_kv.so b/easy_rec/python/ops/1.12/libwrite_kv.so
new file mode 100755
index 000000000..91a5be3b7
Binary files /dev/null and b/easy_rec/python/ops/1.12/libwrite_kv.so differ
diff --git a/easy_rec/python/ops/1.12/libwrite_sparse_kv.so b/easy_rec/python/ops/1.12/libwrite_sparse_kv.so
new file mode 100755
index 000000000..9ecaa5319
Binary files /dev/null and b/easy_rec/python/ops/1.12/libwrite_sparse_kv.so differ
diff --git a/easy_rec/python/ops/1.15/__init__.py b/easy_rec/python/ops/1.15/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/easy_rec/python/ops/1.15/libhiredis.so.1.0.0 b/easy_rec/python/ops/1.15/libhiredis.so.1.0.0
new file mode 100755
index 000000000..ba1399723
Binary files /dev/null and b/easy_rec/python/ops/1.15/libhiredis.so.1.0.0 differ
diff --git a/easy_rec/python/ops/1.15/libkv_lookup.so b/easy_rec/python/ops/1.15/libkv_lookup.so
new file mode 100755
index 000000000..96652ea24
Binary files /dev/null and b/easy_rec/python/ops/1.15/libkv_lookup.so differ
diff --git a/easy_rec/python/ops/1.15/libredis++.so b/easy_rec/python/ops/1.15/libredis++.so
new file mode 100755
index 000000000..afdd45b4b
Binary files /dev/null and b/easy_rec/python/ops/1.15/libredis++.so differ
diff --git a/easy_rec/python/ops/1.15/libwrite_kv.so b/easy_rec/python/ops/1.15/libwrite_kv.so
new file mode 100755
index 000000000..683362520
Binary files /dev/null and b/easy_rec/python/ops/1.15/libwrite_kv.so differ
diff --git a/easy_rec/python/ops/__init__.py b/easy_rec/python/ops/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/easy_rec/python/predict.py b/easy_rec/python/predict.py
index 48333c156..4c8369dae 100644
--- a/easy_rec/python/predict.py
+++ b/easy_rec/python/predict.py
@@ -2,8 +2,10 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
 import logging
+import os
 
 import tensorflow as tf
+from tensorflow.python.lib.io import file_io
 
 from easy_rec.python.main import predict
 
@@ -25,18 +27,28 @@
     'input_path', None, 'predict data path, if specified will '
     'override pipeline_config.eval_input_path')
 tf.app.flags.DEFINE_string('output_path', None, 'path to save predict result')
-tf.app.flags.mark_flag_as_required('pipeline_config_path')
+tf.app.flags.DEFINE_string('model_dir', None, help='will update the model_dir')
 FLAGS = tf.app.flags.FLAGS
 
 
 def main(argv):
-  pred_result = predict(FLAGS.pipeline_config_path, FLAGS.checkpoint_path,
+  assert FLAGS.model_dir or FLAGS.pipeline_config_path, 'At least one of model_dir and pipeline_config_path exists.'
+  if FLAGS.model_dir:
+    pipeline_config_path = os.path.join(FLAGS.model_dir, 'pipeline.config')
+    if file_io.file_exists(pipeline_config_path):
+      logging.info('update pipeline_config_path to %s' % pipeline_config_path)
+    else:
+      pipeline_config_path = FLAGS.pipeline_config_path
+  else:
+    pipeline_config_path = FLAGS.pipeline_config_path
+
+  pred_result = predict(pipeline_config_path, FLAGS.checkpoint_path,
                         FLAGS.input_path)
   if FLAGS.output_path is not None:
     logging.info('will save predict result to %s' % FLAGS.output_path)
     with tf.gfile.GFile(FLAGS.output_path, 'wb') as fout:
       for k in pred_result:
-        fout.write(str(k).replace("'", '"') + '\n')
+        fout.write(str(k).replace("u'", '"').replace("'", '"') + '\n')
 
 
 if __name__ == '__main__':
diff --git a/easy_rec/python/protos/autoint.proto b/easy_rec/python/protos/autoint.proto
new file mode 100644
index 000000000..2f9aa5eb5
--- /dev/null
+++ b/easy_rec/python/protos/autoint.proto
@@ -0,0 +1,13 @@
+syntax = "proto2";
+package protos;
+
+
+message AutoInt {
+    // The number of heads
+    required uint32 multi_head_num = 1 [default = 1];
+    // The dimension of heads
+    required uint32 multi_head_size = 2;
+    // The number of interacting layers
+    required uint32 interacting_layer_num = 3 [default = 1];
+    required float l2_regularization = 4 [default = 1e-4];
+}
diff --git a/easy_rec/python/protos/data_source.proto b/easy_rec/python/protos/data_source.proto
index 1defc27ca..a05134d12 100644
--- a/easy_rec/python/protos/data_source.proto
+++ b/easy_rec/python/protos/data_source.proto
@@ -6,5 +6,15 @@ message KafkaServer {
     required string topic      = 2;
     required string group      = 3;
     required uint32 partitions = 4;
-    required uint32 offset     = 5;
+    repeated uint32 offset     = 5;
+}
+
+message DatahubServer{
+    required string akId       = 1;
+    required string akSecret   = 2;
+    required string region     = 3;
+    required string project    = 4;
+    required string topic      = 5;
+    required uint32 shard_num  = 6;
+    required uint32 life_cycle = 7;
 }
diff --git a/easy_rec/python/protos/dataset.proto b/easy_rec/python/protos/dataset.proto
index 18fcf4146..1efea8466 100644
--- a/easy_rec/python/protos/dataset.proto
+++ b/easy_rec/python/protos/dataset.proto
@@ -1,6 +1,105 @@
 syntax = "proto2";
 package protos;
 
+// Weighted Random Sampling ItemID not in Batch
+message NegativeSampler {
+    // sample data path
+    // itemid weight attrs
+    required string input_path = 1;
+    // number of negative sample
+    required uint32 num_sample = 2;
+    // field names of attrs in train data or eval data
+    repeated string attr_fields = 3;
+    // field name of item_id in train data or eval data
+    required string item_id_field = 4;
+
+    optional string attr_delimiter = 5 [default=":"];
+
+    optional uint32 num_eval_sample = 6 [default=0];
+}
+
+// Weighted Random Sampling ItemID not with Edge
+message NegativeSamplerV2 {
+    // user data path
+    // userid weight
+    required string user_input_path = 1;
+    // item data path
+    // itemid weight attrs
+    required string item_input_path = 2;
+    // positive edge path
+    // userid itemid weight
+    required string pos_edge_input_path = 3;
+    // number of negative sample
+    required uint32 num_sample = 4;
+    // field names of attrs in train data or eval data
+    repeated string attr_fields = 5;
+    // field name of item_id in train data or eval data
+    required string item_id_field = 6;
+    // field name of user_id in train data or eval data
+    required string user_id_field = 7;
+
+    optional string attr_delimiter = 8 [default=":"];
+
+    optional uint32 num_eval_sample = 9 [default=0];
+}
+
+// Weighted Random Sampling ItemID not in Batch and Sampling Hard Edge
+message HardNegativeSampler {
+    // user data path
+    // userid weight
+    required string user_input_path = 1;
+    // item data path
+    // itemid weight attrs
+    required string item_input_path = 2;
+    // hard negative edge path
+    // userid itemid weight
+    required string hard_neg_edge_input_path = 3;
+    // number of negative sample
+    required uint32 num_sample = 4;
+    // max number of hard negative sample
+    required uint32 num_hard_sample = 5;
+    // field names of attrs in train data or eval data
+    repeated string attr_fields = 6;
+    // field name of item_id in train data or eval data
+    required string item_id_field = 7;
+    // field name of user_id in train data or eval data
+    required string user_id_field = 8;
+
+    optional string attr_delimiter = 9 [default=":"];
+
+    optional uint32 num_eval_sample = 10 [default=0];
+}
+
+// Weighted Random Sampling ItemID not with Edge and Sampling Hard Edge
+message HardNegativeSamplerV2 {
+    // user data path
+    // userid weight
+    required string user_input_path = 1;
+    // item data path
+    // itemid weight attrs
+    required string item_input_path = 2;
+    // positive edge path
+    // userid itemid weight
+    required string pos_edge_input_path = 3;
+    // hard negative edge path
+    // userid itemid weight
+    required string hard_neg_edge_input_path = 4;
+    // number of negative sample
+    required uint32 num_sample = 5;
+    // max number of hard negative sample
+    required uint32 num_hard_sample = 6;
+    // field names of attrs in train data or eval data
+    repeated string attr_fields = 7;
+    // field name of item_id in train data or eval data
+    required string item_id_field = 8;
+    // field name of user_id in train data or eval data
+    required string user_id_field = 9;
+
+    optional string attr_delimiter = 10 [default=":"];
+
+    optional uint32 num_eval_sample = 11 [default=0];
+}
+
 message DatasetConfig {
     // mini batch size to use for training and evaluation.
     optional uint32 batch_size = 1 [default = 32];
@@ -18,6 +117,8 @@ message DatasetConfig {
         required string input_name = 1;
         required FieldType input_type = 2 [default = STRING];
         optional string default_val = 3;
+        optional uint32 input_dim = 4 [default=1];
+        optional uint32 input_shape = 5 [default = 1];
     }
 
     // set auto_expand_input_fields to true to
@@ -29,6 +130,13 @@ message DatasetConfig {
     // multiple label_fields will be set.
     repeated string label_fields = 4;
 
+    // label separator
+    repeated string label_sep = 41;
+
+    // label dimensions which need to be set when there
+    // are labels have dimension > 1
+    repeated uint32 label_dim = 42;
+
     // whether to shuffle data
     optional bool shuffle = 5 [default = true];
 
@@ -41,28 +149,34 @@ message DatasetConfig {
     // will be reused indefinitely.
     optional uint32 num_epochs = 6 [default = 0];
 
-    // Number of decoded records to prefetch before batching.
-    optional uint32 prefetch_size = 7 [default = 512];
+    // Number of decoded batches to prefetch.
+    optional uint32 prefetch_size = 7 [default = 32];
 
     // shard dataset to 1/num_workers in distribute mode
     optional bool shard = 8 [default = false];
 
     enum InputType {
         // csv format input, could be used in local or hdfs
-        CSVInput = 0;
+        CSVInput = 10;
         // @Depreciated
-        CSVInputV2 = 1;
+        CSVInputV2 = 11;
+        // extended csv format, allow quote in fields
+        CSVInputEx = 12;
         // @Depreciated, has memory leak problem
         OdpsInput = 2;
         // odps input, used on pai
         OdpsInputV2 = 3;
+        DataHubInput = 15;
+        OdpsInputV3 = 9;
         RTPInput = 4;
         RTPInputV2 = 5;
         OdpsRTPInput = 6;
+        TFRecordInput = 7;
+        BatchTFRecordInput = 14;
         // for the purpose to debug performance bottleneck of
         // input pipelines
-        DummyInput = 7;
-        KafkaInput = 8;
+        DummyInput = 8;
+        KafkaInput = 13;
     }
     required InputType input_type = 10;
 
@@ -97,4 +211,32 @@ message DatasetConfig {
 
     // for RTPInput only
     optional string rtp_separator = 17 [default = ';'];
+
+    // ignore some data errors
+    // it is not suggested to set this parameter
+    optional bool ignore_error = 18 [default=false];
+
+    // whether to use pai global shuffle queue, only for OdpsInput,
+    // OdpsInputV2, OdpsRTPInputV2
+    optional bool pai_worker_queue = 19 [default = false];
+    optional int32 pai_worker_slice_num = 20 [default = 100];
+
+    // if true, one worker will duplicate the data of the chief node
+    // and undertake the gradient computation of the chief node
+    optional bool chief_redundant = 21 [default = false];
+
+    // input field for sample weight
+    optional string sample_weight = 22;
+    // the compression type of tfrecord
+    optional string data_compression_type = 23 [default = ''];
+
+    // n data for one feature in tfrecord
+    optional uint32 n_data_batch_tfrecord = 24;
+
+    oneof sampler {
+        NegativeSampler negative_sampler = 101;
+        NegativeSamplerV2 negative_sampler_v2 = 102;
+        HardNegativeSampler hard_negative_sampler = 103;
+        HardNegativeSamplerV2 hard_negative_sampler_v2 = 104;
+    }
 }
diff --git a/easy_rec/python/protos/dcn.proto b/easy_rec/python/protos/dcn.proto
new file mode 100644
index 000000000..7d061ffbe
--- /dev/null
+++ b/easy_rec/python/protos/dcn.proto
@@ -0,0 +1,18 @@
+syntax = "proto2";
+package protos;
+
+import "easy_rec/python/protos/dnn.proto";
+import "easy_rec/python/protos/tower.proto";
+
+message CrossTower {
+    required string input = 1;
+    // The number of cross layers
+    required uint32 cross_num = 2 [default = 3];
+};
+
+message DCN {
+    required Tower deep_tower = 1;
+    required CrossTower cross_tower = 2;
+    required DNN final_dnn = 3;
+    required float l2_regularization = 5 [default = 1e-4];
+}
diff --git a/easy_rec/python/protos/dssm.proto b/easy_rec/python/protos/dssm.proto
index 4ee86fe93..ab83e66b1 100644
--- a/easy_rec/python/protos/dssm.proto
+++ b/easy_rec/python/protos/dssm.proto
@@ -2,11 +2,12 @@ syntax = "proto2";
 package protos;
 
 import "easy_rec/python/protos/dnn.proto";
+import "easy_rec/python/protos/simi.proto";
 
 
 message DSSMTower {
-    required string id = 1;
-    required DNN dnn = 2;
+  required string id = 1;
+  required DNN dnn = 2;
 };
 
 
@@ -14,4 +15,7 @@ message DSSM {
     required DSSMTower user_tower = 1;
     required DSSMTower item_tower = 2;
     required float l2_regularization = 3 [default = 1e-4];
+    optional Similarity simi_func = 4 [default=COSINE];
+    // add a layer for scaling the similarity
+    optional bool scale_simi = 5 [default = true];
 }
diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto
index d42d537b2..61fcb847e 100644
--- a/easy_rec/python/protos/easy_rec_model.proto
+++ b/easy_rec/python/protos/easy_rec_model.proto
@@ -11,13 +11,35 @@ import "easy_rec/python/protos/mmoe.proto";
 import "easy_rec/python/protos/esmm.proto";
 import "easy_rec/python/protos/dbmtl.proto";
 import "easy_rec/python/protos/simple_multi_task.proto";
+import "easy_rec/python/protos/dcn.proto";
+import "easy_rec/python/protos/autoint.proto";
+import "easy_rec/python/protos/mind.proto";
 import "easy_rec/python/protos/loss.proto";
-
+import "easy_rec/python/protos/rocket_launching.proto";
+import "easy_rec/python/protos/variational_dropout.proto";
 // for input performance test
 message DummyModel {
 
 }
 
+// for knowledge distillation
+message KD {
+  optional string loss_name = 10;
+  required string pred_name = 11;
+  // default to be logits
+  optional bool pred_is_logits = 12 [default=true];
+  // for CROSS_ENTROPY_LOSS, soft_label must be logits instead of probs
+  required string soft_label_name = 21;
+  // default to be logits
+  optional bool label_is_logits = 22 [default=true];
+  // currently only support CROSS_ENTROPY_LOSS and L2_LOSS
+  required LossType loss_type = 3;
+  optional float loss_weight = 4 [default=1.0];
+  // only for loss_type == CROSS_ENTROPY_LOSS
+  optional float temperature = 5 [default=1.0];
+
+}
+
 message EasyRecModel {
     required string model_class = 1;
 
@@ -31,11 +53,18 @@ message EasyRecModel {
         DeepFM deepfm = 103;
         MultiTower multi_tower = 104;
         FM fm = 105;
+        DCN dcn = 106;
+        AutoInt autoint = 107;
+
         DSSM dssm = 201;
+        MIND mind = 202;
+
         MMoE mmoe = 301;
         ESMM esmm = 302;
         DBMTL dbmtl = 303;
         SimpleMultiTask simple_multi_task = 304;
+        RocketLaunching rocket_launching = 305;
+
     }
     repeated SeqAttGroupConfig seq_att_groups = 7;
     // implemented in easy_rec/python/model/easy_rec_estimator
@@ -48,4 +77,12 @@ message EasyRecModel {
     optional uint32 num_class = 10 [default = 1];
 
     optional bool use_embedding_variable = 11 [default=false];
+
+    repeated KD kd = 12;
+
+    // filter variables matching any pattern in restore_filters
+    // common filters are Adam, Momentum, etc.
+    repeated string restore_filters = 13;
+
+    optional VariationalDropoutLayer variational_dropout = 14;
 }
diff --git a/easy_rec/python/protos/eval.proto b/easy_rec/python/protos/eval.proto
index 52e571847..7bbfa1c56 100644
--- a/easy_rec/python/protos/eval.proto
+++ b/easy_rec/python/protos/eval.proto
@@ -17,6 +17,32 @@ message MeanSquaredError {
 message Accuracy {
 }
 
+message Max_F1 {
+}
+
+message RootMeanSquaredError {
+}
+
+message GAUC {
+    // uid field name
+    required string uid_field = 1;
+    // reduction method for auc of different users
+    // * "mean": simple mean of different users
+    // * "mean_by_sample_num": weighted mean with sample num of different users
+    // * "mean_by_positive_num": weighted mean with positive sample num of different users
+    optional string reduction = 2 [default='mean'];
+}
+
+message SessionAUC {
+    // session id field name
+    required string session_id_field = 1;
+    // reduction: reduction method for auc of different sessions
+    // * "mean": simple mean of different sessions
+    // * "mean_by_sample_num": weighted mean with sample num of different sessions
+    // * "mean_by_positive_num": weighted mean with positive sample num of different sessions
+    optional string reduction = 2 [default='mean'];
+}
+
 message EvalMetrics {
     oneof metric {
         AUC auc = 1;
@@ -24,6 +50,10 @@ message EvalMetrics {
         MeanAbsoluteError mean_absolute_error = 3;
         MeanSquaredError mean_squared_error = 4;
         Accuracy accuracy = 5;
+        Max_F1 max_f1 = 6;
+        RootMeanSquaredError root_mean_squared_error = 7;
+        GAUC gauc = 8;
+        SessionAUC session_auc = 9;
     }
 }
 
@@ -44,4 +74,7 @@ message EvalConfig {
     // Type of metrics to use for evaluation.
     // possible values:
     repeated EvalMetrics metrics_set = 5;
+
+    // Evaluation online with batch forward data of training
+    optional bool eval_online = 6 [default = false];
 }
diff --git a/easy_rec/python/protos/export.proto b/easy_rec/python/protos/export.proto
index fc40ac951..e54a89a80 100644
--- a/easy_rec/python/protos/export.proto
+++ b/easy_rec/python/protos/export.proto
@@ -1,6 +1,10 @@
 syntax = "proto2";
 package protos;
 
+message MultiValueFields {
+    repeated string input_name = 1;
+}
+
 // Message for configuring exporting models.
 message ExportConfig {
     // batch size used for exported model, -1 indicates batch_size is None
@@ -30,4 +34,12 @@ message ExportConfig {
 
     // each feature has a placeholder
     optional bool multi_placeholder = 6 [default = true];
+
+    // export to keep, only for exporter_type in [best, latest]
+    optional int32 exports_to_keep = 7 [default = 1];
+
+    // multi value field list
+    optional MultiValueFields multi_value_fields = 8;
+    // is placeholder named by input
+    optional bool placeholder_named_by_input = 9 [default = false];
 }
diff --git a/easy_rec/python/protos/feature_config.proto b/easy_rec/python/protos/feature_config.proto
index 7874a0bc1..1a68dad13 100644
--- a/easy_rec/python/protos/feature_config.proto
+++ b/easy_rec/python/protos/feature_config.proto
@@ -9,6 +9,19 @@ enum WideOrDeep {
     WIDE_AND_DEEP = 2;
 }
 
+message AttentionCombiner {
+}
+
+message MultiHeadAttentionCombiner {
+}
+
+message SequenceCombiner {
+    oneof combiner {
+        AttentionCombiner attention = 1;
+        MultiHeadAttentionCombiner multi_head_attention = 2;
+    }
+}
+
 message FeatureConfig {
     enum FeatureType {
         IdFeature = 0;
@@ -19,25 +32,29 @@ message FeatureConfig {
         SequenceFeature = 5;
     }
 
+    optional string feature_name = 1;
+
     // input field names: must be included in DatasetConfig.input_fields
-    repeated string input_names = 1;
-    required FeatureType feature_type = 2 [default = IdFeature];
-    optional string embedding_name = 3 [default = ''];
-    optional uint32 embedding_dim = 4 [default = 0];
+    repeated string input_names = 2;
+    required FeatureType feature_type = 3 [default = IdFeature];
+    optional string embedding_name = 4 [default = ''];
+    optional uint32 embedding_dim = 5 [default = 0];
 
-    optional uint32 hash_bucket_size = 5 [default = 0];
+    optional uint32 hash_bucket_size = 6 [default = 0];
     // for categorical_column_with_identity
-    optional uint32 num_buckets = 15 [default = 0];
+    optional uint32 num_buckets = 7 [default = 0];
 
     // only for raw features
-    repeated float boundaries = 6;
+    repeated float boundaries = 8;
 
-    optional string separator = 7 [default = '|'];
+    // separator with in features
+    optional string separator = 9 [default = '|'];
 
     // delimeter to separator key from value
-    optional string kv_separator = 8 [default = ':'];
+    optional string kv_separator = 10;
 
-    optional string feature_name = 9;
+    // delimeter to separate sequence multi-values
+    optional string seq_multi_sep = 101;
 
     optional string vocab_file = 11;
     repeated string vocab_list = 12;
@@ -65,6 +82,12 @@ message FeatureConfig {
     // normalize raw feature to [0-1]
     optional float min_val = 22 [default=0.0];
     optional float max_val = 23 [default=0.0];
+
+    // raw feature of multiple dimensions
+    optional uint32 raw_input_dim = 24 [default=1];
+
+    // sequence feature combiner
+    optional SequenceCombiner sequence_combiner = 25;
 }
 
 message FeatureGroupConfig {
diff --git a/easy_rec/python/protos/hyperparams.proto b/easy_rec/python/protos/hyperparams.proto
index 31e24e55c..b2164cbe6 100644
--- a/easy_rec/python/protos/hyperparams.proto
+++ b/easy_rec/python/protos/hyperparams.proto
@@ -33,6 +33,7 @@ message Initializer {
         TruncatedNormalInitializer truncated_normal_initializer = 1;
         RandomNormalInitializer random_normal_initializer = 2;
         GlorotNormalInitializer glorot_normal_initializer = 3;
+        ConstantInitializer constant_initializer = 4;
     }
 }
 
@@ -52,3 +53,7 @@ message RandomNormalInitializer {
 
 message GlorotNormalInitializer {
 }
+
+message ConstantInitializer {
+  repeated float consts = 1;
+}
diff --git a/easy_rec/python/protos/loss.proto b/easy_rec/python/protos/loss.proto
index fed477e87..82a14cb5e 100644
--- a/easy_rec/python/protos/loss.proto
+++ b/easy_rec/python/protos/loss.proto
@@ -5,4 +5,7 @@ enum LossType {
     CLASSIFICATION = 0;
     L2_LOSS = 1;
     SIGMOID_L2_LOSS = 2;
+    // crossentropy loss/log loss
+    CROSS_ENTROPY_LOSS = 3;
+    SOFTMAX_CROSS_ENTROPY = 4;
 }
diff --git a/easy_rec/python/protos/mind.proto b/easy_rec/python/protos/mind.proto
new file mode 100644
index 000000000..7868666f5
--- /dev/null
+++ b/easy_rec/python/protos/mind.proto
@@ -0,0 +1,50 @@
+syntax = "proto2";
+package protos;
+
+import "easy_rec/python/protos/dnn.proto";
+import "easy_rec/python/protos/simi.proto";
+
+message Capsule {
+  // max number of high capsules
+  optional uint32 max_k = 1 [default = 5];
+  // max behaviour sequence length
+  required uint32 max_seq_len = 2;
+  // high capsule embedding vector dimension
+  required uint32 high_dim = 3;
+  // number EM iterations
+  optional uint32 num_iters = 4 [default=3];
+  // routing logits scale
+  optional float routing_logits_scale = 5 [default=20];
+  // routing logits initial stddev
+  optional float routing_logits_stddev = 6 [default=1.0];
+}
+
+message MIND {
+  enum UserSeqCombineMethod {
+    CONCAT = 0;
+    SUM = 1;
+  }
+  // preprocessing dnn before entering capsule layer
+  optional DNN pre_capsule_dnn = 101;
+
+  // dnn layers applied on concated results of
+  // capsule output and user_context(none sequence features)
+  required DNN user_dnn = 102;
+
+  // method to combine several user sequences
+  // such as item_ids, category_ids
+  optional UserSeqCombineMethod user_seq_combine = 103 [default=SUM];
+
+  // dnn layers applied on item features
+  required DNN item_dnn = 2;
+
+  required Capsule capsule_config = 3;
+
+  // similarity power, the paper says that the big
+  // the better
+  optional float simi_pow = 4 [default=10];
+
+  optional Similarity simi_func = 6 [default=COSINE];
+
+  required float l2_regularization = 7 [default = 1e-4];
+}
diff --git a/easy_rec/python/protos/optimizer.proto b/easy_rec/python/protos/optimizer.proto
index 3bd421e9b..bce46ad20 100644
--- a/easy_rec/python/protos/optimizer.proto
+++ b/easy_rec/python/protos/optimizer.proto
@@ -9,11 +9,15 @@ message Optimizer {
     RMSPropOptimizer rms_prop_optimizer = 101;
     MomentumOptimizer momentum_optimizer = 102;
     AdamOptimizer adam_optimizer = 103;
-    AdamAsyncOptimizer adam_async_optimizer = 104;
-    AdagradOptimizer adagrad_optimizer = 105;
+    MomentumWOptimizer momentumw_optimizer = 104;
+    AdamWOptimizer adamw_optimizer = 105;
+    AdamAsyncOptimizer adam_async_optimizer = 106;
+    AdagradOptimizer adagrad_optimizer = 107;
+    AdamAsyncWOptimizer adam_asyncw_optimizer = 108;
   }
   optional bool use_moving_average = 5 [default = false];
   optional float moving_average_decay = 6 [default = 0.9999];
+  optional float embedding_learning_rate_multiplier = 7;
 }
 
 // Configuration message for the RMSPropOptimizer
@@ -40,13 +44,33 @@ message AdamOptimizer {
     optional float beta2 = 4 [default = 0.999];
 }
 
+message MomentumWOptimizer {
+  optional LearningRate learning_rate = 1;
+  optional float weight_decay = 2 [default = 1e-6];
+  optional float momentum_optimizer_value = 3 [default = 0.9];
+}
+
+message AdamWOptimizer {
+  optional LearningRate learning_rate = 1;
+  optional float weight_decay = 2 [default = 1e-6];
+  optional float beta1 = 3 [default = 0.9];
+  optional float beta2 = 4 [default = 0.999];
+}
+
+message AdamAsyncWOptimizer {
+  optional LearningRate learning_rate = 1;
+  optional float weight_decay = 2 [default = 1e-6];
+  optional float beta1 = 3 [default = 0.9];
+  optional float beta2 = 4 [default = 0.999];
+}
+
 // Configuration message for the AdagradOptimizer
 // See: https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer
 message AdagradOptimizer {
     optional LearningRate learning_rate = 1;
 }
 
-// Developed by pai
+// Only available on pai-tf, which has better performance than AdamOptimizer
 message AdamAsyncOptimizer {
   optional LearningRate learning_rate = 1;
   optional float beta1 = 3 [default = 0.9];
diff --git a/easy_rec/python/protos/pipeline.proto b/easy_rec/python/protos/pipeline.proto
index 09ed60292..694b7c874 100644
--- a/easy_rec/python/protos/pipeline.proto
+++ b/easy_rec/python/protos/pipeline.proto
@@ -15,10 +15,12 @@ message EasyRecConfig {
   oneof train_path {
       string train_input_path = 1;
       KafkaServer kafka_train_input = 2;
+      DatahubServer datahub_train_input = 12;
   }
   oneof eval_path {
       string eval_input_path = 3;
       KafkaServer kafka_eval_input = 4;
+      DatahubServer datahub_eval_input = 13;
   }
   required string model_dir = 5;
 
diff --git a/easy_rec/python/protos/rocket_launching.proto b/easy_rec/python/protos/rocket_launching.proto
new file mode 100755
index 000000000..039a5ad89
--- /dev/null
+++ b/easy_rec/python/protos/rocket_launching.proto
@@ -0,0 +1,15 @@
+syntax = "proto2";
+package protos;
+
+import "easy_rec/python/protos/dnn.proto";
+import "easy_rec/python/protos/simi.proto";
+
+message RocketLaunching {
+    required DNN share_dnn = 1;
+    required DNN booster_dnn =2;
+    required DNN light_dnn = 3;
+    optional float l2_regularization = 4 [default = 1e-4];
+    optional bool feature_based_distillation = 5 [default = false];
+    // COSINE = 0; EUCLID = 1;
+    optional Similarity feature_distillation_function = 6 [default=COSINE];
+}
diff --git a/easy_rec/python/protos/simi.proto b/easy_rec/python/protos/simi.proto
new file mode 100644
index 000000000..9014873b0
--- /dev/null
+++ b/easy_rec/python/protos/simi.proto
@@ -0,0 +1,8 @@
+syntax = "proto2";
+package protos;
+
+enum Similarity {
+  COSINE = 0;
+  INNER_PRODUCT = 1;
+  EUCLID = 2;
+}
diff --git a/easy_rec/python/protos/train.proto b/easy_rec/python/protos/train.proto
index 76813f622..d6769a8ff 100644
--- a/easy_rec/python/protos/train.proto
+++ b/easy_rec/python/protos/train.proto
@@ -44,17 +44,20 @@ message TrainConfig {
     /* The following fields are for distributed training */
     // Whether to synchronize replicas during training.
     // In case so, build a SyncReplicateOptimizer
-    optional bool sync_replicas = 9 [default = false];
+    optional bool sync_replicas = 9 [default = true];
 
     // Number of training steps between replica startup.
     // This flag must be set to 0 if sync_replicas is set to true.
     optional float startup_delay_steps = 10 [default = 15];
 
     // Step interval for saving checkpoint
-    optional uint32 save_checkpoints_steps = 14 [default = 5000];
+    optional uint32 save_checkpoints_steps = 141 [default = 1000];
 
     // Seconds interval for saving checkpoint
-    optional uint32 save_checkpoints_secs = 15;
+    optional uint32 save_checkpoints_secs = 142;
+
+    // Max checkpoints to keep
+    optional uint32 keep_checkpoint_max = 143 [default = 10];
 
     // Save summaries every this many steps.
     optional uint32 save_summary_steps = 16 [default = 1000];
diff --git a/easy_rec/python/protos/variational_dropout.proto b/easy_rec/python/protos/variational_dropout.proto
new file mode 100644
index 000000000..e72ca54c6
--- /dev/null
+++ b/easy_rec/python/protos/variational_dropout.proto
@@ -0,0 +1,10 @@
+syntax = "proto2";
+package protos;
+
+
+message  VariationalDropoutLayer{
+    // regularization coefficient lambda
+    optional float regularization_lambda = 1 [default = 0.01];
+    // variational_dropout dimension
+    optional bool embedding_wise_variational_dropout = 2 [default = false];
+}
diff --git a/easy_rec/python/protos/wide_and_deep.proto b/easy_rec/python/protos/wide_and_deep.proto
index f939d6107..bf402497f 100644
--- a/easy_rec/python/protos/wide_and_deep.proto
+++ b/easy_rec/python/protos/wide_and_deep.proto
@@ -1,8 +1,16 @@
-syntax = "proto2";
+syntax="proto2";
 package protos;
 
 import "easy_rec/python/protos/dnn.proto";
 
 message WideAndDeep {
-    required DNN dnn = 1;
+  required uint32 wide_output_dim = 1 [default=1];
+
+  required DNN dnn = 2;
+
+  // if set, the output of dnn and wide part are concatenated and
+  // passed to the final_dnn; otherwise, they are summarized
+  optional DNN final_dnn = 3;
+
+  optional float l2_regularization = 4 [default=1e-4];
 }
diff --git a/easy_rec/python/test/csv_input_test.py b/easy_rec/python/test/csv_input_test.py
index 19d0a5247..576b42297 100644
--- a/easy_rec/python/test/csv_input_test.py
+++ b/easy_rec/python/test/csv_input_test.py
@@ -6,6 +6,7 @@
 from google.protobuf import text_format
 
 from easy_rec.python.input.csv_input import CSVInput
+from easy_rec.python.input.csv_input_ex import CSVInputEx
 from easy_rec.python.protos.dataset_pb2 import DatasetConfig
 from easy_rec.python.protos.feature_config_pb2 import FeatureConfig
 from easy_rec.python.utils import config_util
@@ -23,6 +24,7 @@ class CSVInputTest(tf.test.TestCase):
   def __init__(self, methodName='CSVInputTest'):
     super(CSVInputTest, self).__init__(methodName=methodName)
     self._input_path = 'data/test/test.csv'
+    self._input_path_with_quote = 'data/test/test_with_quote.csv'
 
   @RunAsSubprocess
   def test_csv_data(self):
@@ -204,6 +206,123 @@ def test_csv_data_flt_to_str(self):
       sess.run(init_op)
       feature_dict, label_dict = sess.run([features, labels])
 
+  @RunAsSubprocess
+  def test_csv_input_ex(self):
+    data_config_str = """
+      input_fields {
+        input_name: 'label'
+        input_type: FLOAT
+      }
+      input_fields {
+        input_name: 'field[1-3]'
+        input_type: STRING
+      }
+      label_fields: 'label'
+      batch_size: 1024
+      num_epochs: 10000
+      prefetch_size: 32
+      auto_expand_input_fields: true
+    """
+    feature_config_str = """
+      input_names: 'field1'
+      shared_names: 'field[2-3]'
+      feature_type: IdFeature
+      embedding_dim: 32
+      hash_bucket_size: 2000
+    """
+    dataset_config = DatasetConfig()
+    text_format.Merge(data_config_str, dataset_config)
+    feature_config = FeatureConfig()
+    text_format.Merge(feature_config_str, feature_config)
+    feature_configs = [feature_config]
+    empty_config = FeatureConfig()
+    empty_config.CopyFrom(feature_config)
+    while len(empty_config.input_names) > 0:
+      empty_config.input_names.pop()
+    while len(empty_config.shared_names) > 0:
+      empty_config.shared_names.pop()
+    for input_name in feature_config.shared_names:
+      input_names = config_util.auto_expand_names(input_name)
+      for tmp_name in input_names:
+        tmp_config = FeatureConfig()
+        tmp_config.CopyFrom(empty_config)
+        tmp_config.input_names.append(tmp_name)
+        feature_configs.append(tmp_config)
+    train_input_fn = CSVInputEx(dataset_config, feature_configs,
+                                self._input_path_with_quote).create_input()
+    dataset = train_input_fn(mode=tf.estimator.ModeKeys.TRAIN)
+    iterator = dataset.make_initializable_iterator()
+    tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer)
+    features, labels = iterator.get_next()
+    init_op = tf.get_collection(tf.GraphKeys.TABLE_INITIALIZERS)
+    gpu_options = tf.GPUOptions(allow_growth=True)
+    session_config = tf.ConfigProto(
+        gpu_options=gpu_options,
+        allow_soft_placement=True,
+        log_device_placement=False)
+    with self.test_session(config=session_config) as sess:
+      sess.run(init_op)
+      feature_dict, label_dict = sess.run([features, labels])
+
+  @RunAsSubprocess
+  def test_csv_data_ignore_error(self):
+    data_config_str = """
+      input_fields {
+        input_name: 'label'
+        input_type: FLOAT
+      }
+      input_fields {
+        input_name: 'field[1-3]'
+        input_type: STRING
+      }
+      label_fields: 'label'
+      batch_size: 32
+      num_epochs: 10000
+      prefetch_size: 32
+      auto_expand_input_fields: true
+      ignore_error: true
+    """
+    feature_config_str = """
+      input_names: 'field1'
+      shared_names: 'field[2-3]'
+      feature_type: IdFeature
+      embedding_dim: 32
+      hash_bucket_size: 2000
+    """
+    dataset_config = DatasetConfig()
+    text_format.Merge(data_config_str, dataset_config)
+    feature_config = FeatureConfig()
+    text_format.Merge(feature_config_str, feature_config)
+    feature_configs = [feature_config]
+    empty_config = FeatureConfig()
+    empty_config.CopyFrom(feature_config)
+    while len(empty_config.input_names) > 0:
+      empty_config.input_names.pop()
+    while len(empty_config.shared_names) > 0:
+      empty_config.shared_names.pop()
+    for input_name in feature_config.shared_names:
+      input_names = config_util.auto_expand_names(input_name)
+      for tmp_name in input_names:
+        tmp_config = FeatureConfig()
+        tmp_config.CopyFrom(empty_config)
+        tmp_config.input_names.append(tmp_name)
+        feature_configs.append(tmp_config)
+    train_input_fn = CSVInput(dataset_config, feature_configs,
+                              self._input_path_with_quote).create_input()
+    dataset = train_input_fn(mode=tf.estimator.ModeKeys.TRAIN)
+    iterator = dataset.make_initializable_iterator()
+    tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer)
+    features, labels = iterator.get_next()
+    init_op = tf.get_collection(tf.GraphKeys.TABLE_INITIALIZERS)
+    gpu_options = tf.GPUOptions(allow_growth=True)
+    session_config = tf.ConfigProto(
+        gpu_options=gpu_options,
+        allow_soft_placement=True,
+        log_device_placement=False)
+    with self.test_session(config=session_config) as sess:
+      sess.run(init_op)
+      feature_dict, label_dict = sess.run([features, labels])
+
 
 if __name__ == '__main__':
   tf.test.main()
diff --git a/easy_rec/python/test/dh_local_run.py b/easy_rec/python/test/dh_local_run.py
new file mode 100644
index 000000000..1cb2a481e
--- /dev/null
+++ b/easy_rec/python/test/dh_local_run.py
@@ -0,0 +1,109 @@
+import argparse
+import logging
+import os
+import shutil
+import sys
+
+import tensorflow as tf
+
+from easy_rec.python.test.dh_test_util import datahub_test_util
+from easy_rec.python.test.odps_command import OdpsCommand
+from easy_rec.python.test.odps_test_prepare import prepare
+from easy_rec.python.test.odps_test_util import OdpsOSSConfig
+from easy_rec.python.test.odps_test_util import delete_oss_path
+from easy_rec.python.test.odps_test_util import get_oss_bucket
+from easy_rec.python.utils import test_utils
+
+logging.basicConfig(
+    level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
+
+odps_oss_config = OdpsOSSConfig(script_path='./samples/dh_script')
+
+
+class TestPipelineOnEmr(tf.test.TestCase):
+  """Train eval test on emr."""
+
+  def setUp(self):
+    logging.info('Testing %s.%s' % (type(self).__name__, self._testMethodName))
+    self._success = True
+    self._test_dir = test_utils.get_tmp_dir()
+    logging.info('test datahub local dir: %s' % self._test_dir)
+
+  def tearDown(self):
+    if self._success:
+      shutil.rmtree(self._test_dir)
+
+  def test_datahub_train_eval(self):
+    end = ['deep_fm/drop_table.sql']
+    odps_cmd = OdpsCommand(odps_oss_config)
+
+    self._success = test_utils.test_datahub_train_eval(
+        '%s/configs/deepfm.config' % odps_oss_config.temp_dir, self._test_dir)
+    odps_cmd.run_list(end)
+    self.assertTrue(self._success)
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--odps_config', type=str, default=None, help='odps config path')
+  parser.add_argument(
+      '--oss_config', type=str, default=None, help='ossutilconfig path')
+  parser.add_argument(
+      '--datahub_config', type=str, default=None, help='datahub_config')
+  parser.add_argument(
+      '--bucket_name', type=str, default=None, help='test oss bucket name')
+  parser.add_argument('--arn', type=str, default=None, help='oss rolearn')
+  parser.add_argument(
+      '--odpscmd', type=str, default='odpscmd', help='odpscmd path')
+  parser.add_argument(
+      '--algo_project', type=str, default=None, help='algo project name')
+  parser.add_argument(
+      '--algo_res_project',
+      type=str,
+      default=None,
+      help='algo resource project name')
+  parser.add_argument(
+      '--algo_version', type=str, default=None, help='algo version')
+  args, unknown_args = parser.parse_known_args()
+
+  sys.argv = [sys.argv[0]]
+  for unk_arg in unknown_args:
+    sys.argv.append(unk_arg)
+
+  if args.odps_config:
+    odps_oss_config.load_odps_config(args.odps_config)
+    os.environ['ODPS_CONFIG_FILE_PATH'] = args.odps_config
+  if args.datahub_config:
+    odps_oss_config.load_dh_config(args.datahub_config)
+  if args.oss_config:
+    odps_oss_config.load_oss_config(args.oss_config)
+  if args.odpscmd:
+    odps_oss_config.odpscmd_path = args.odpscmd
+  if args.algo_project:
+    odps_oss_config.algo_project = args.algo_project
+  if args.algo_res_project:
+    odps_oss_config.algo_res_project = args.algo_res_project
+  if args.algo_version:
+    odps_oss_config.algo_version = args.algo_version
+  if args.arn:
+    odps_oss_config.arn = args.arn
+  if args.bucket_name:
+    odps_oss_config.bucket_name = args.bucket_name
+  print(args)
+  prepare(odps_oss_config)
+  start = [
+      'deep_fm/create_external_deepfm_table.sql',
+      'deep_fm/create_inner_deepfm_table.sql'
+  ]
+  end = ['deep_fm/drop_table.sql']
+  odps_cmd = OdpsCommand(odps_oss_config)
+  odps_cmd.run_list(start)
+  odps_oss_config._subscription()
+  tf.test.main()
+  # delete oss path
+  bucket = get_oss_bucket(odps_oss_config.oss_key, odps_oss_config.oss_secret,
+                          odps_oss_config.endpoint, odps_oss_config.bucket_name)
+  delete_oss_path(bucket, odps_oss_config.exp_dir, odps_oss_config.bucket_name)
+  # delete tmp
+  shutil.rmtree(odps_oss_config.temp_dir)
diff --git a/easy_rec/python/test/dh_test_util.py b/easy_rec/python/test/dh_test_util.py
new file mode 100644
index 000000000..e073ec1e5
--- /dev/null
+++ b/easy_rec/python/test/dh_test_util.py
@@ -0,0 +1,212 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import configparser
+import logging
+import os
+import time
+import traceback
+
+import oss2
+from datahub import DataHub
+from datahub.exceptions import InvalidOperationException
+from datahub.exceptions import ResourceExistException
+from datahub.models import FieldType
+from datahub.models import RecordSchema
+from datahub.models import RecordType
+from datahub.models import TupleRecord
+from odps import ODPS
+from odps.df import DataFrame
+
+
+class OdpsOSSConfig:
+
+  def __init__(self, script_path='./samples/odps_script'):
+    self.time_stamp = int(time.time())
+    temp_dir = os.environ.get('TEST_DIR', '/tmp')
+    self.exp_dir = 'easy_rec_odps_test_%d' % self.time_stamp
+    self.temp_dir = os.path.join(temp_dir, self.exp_dir)
+    self.log_dir = os.path.join(self.temp_dir, 'logs/')
+
+    # public buckets with readyonly access
+    self.ali_bucket_endpoint = 'http://oss-cn-beijing.aliyuncs.com'
+    self.ali_bucket_name = 'easyrec'
+    self.script_path = script_path
+    # read only access
+    self.ali_oss_key = 'oss_key'
+    self.ali_oss_secret = 'oss_secret'
+
+    self.oss_key = ''
+    self.oss_secret = ''
+    self.endpoint = ''
+    self.arn = 'acs:ram::xxx:role/aliyunodpspaidefaultrole'
+    self.bucket_name = ''
+
+    self.odpscmd_path = os.environ.get('ODPS_CMD_PATH', 'odpscmd')
+    self.odps_config_path = ''
+    # input table project name replace {ODPS_PROJ_NAME} in
+    # samples/odps_script:
+    #    grep ODPS_PROJ_NAME -r samples/odps_script/
+    self.project_name = ''
+
+    self.dhid = ''
+    self.dhkey = ''
+    self.dhendpoint = ''
+    self.dhtopic = ''
+    self.dhproject = ''
+    self.dh = ''
+    self.odps = ''
+    self.odpsEnd = ''
+
+    # default to algo_public
+    self.algo_project = None
+    self.algo_res_project = None
+    self.algo_version = None
+
+  def load_dh_config(self, config_path):
+    configer = configparser.ConfigParser()
+    configer.read(config_path, encoding='utf-8')
+    self.dhid = configer.get('datahub', 'access_id')
+    self.dhkey = configer.get('datahub', 'access_key')
+    self.dhendpoint = configer.get('datahub', 'endpoint')
+    self.dhtopic = configer.get('datahub', 'topic_name')
+    self.dhproject = configer.get('datahub', 'project')
+
+  def load_oss_config(self, config_path):
+    with open(config_path, 'r') as fin:
+      for line_str in fin:
+        line_str = line_str.strip()
+        line_str = line_str.replace(' ', '')
+        if line_str.startswith('accessKeyID='):
+          self.oss_key = line_str[len('accessKeyID='):].strip()
+        elif line_str.startswith('accessKeySecret='):
+          self.oss_secret = line_str[len('accessKeySecret='):].strip()
+        elif line_str.startswith('endpoint='):
+          self.endpoint = line_str[len('endpoint='):].strip()
+
+  def load_odps_config(self, config_path):
+    self.odps_config_path = config_path
+    with open(config_path, 'r') as fin:
+      for line_str in fin:
+        line_str = line_str.strip()
+        line_str = line_str.replace(' ', '')
+        if line_str.startswith('project_name='):
+          self.project_name = line_str[len('project_name='):]
+        if line_str.startswith('end_point='):
+          self.odpsEnd = line_str[len('end_point='):]
+
+  def clean_topic(self, dhproject):
+    if not dhproject:
+      logging.error('project is empty .')
+      topic_names = self.dh.list_topic(dhproject).topic_names
+      for topic_name in topic_names:
+        self.clean_subscription(topic_name)
+        self.dh.delete_topic(dhproject, topic_name)
+
+  def clean_project(self):
+    project_names = self.dh.list_project().project_names
+    for dhproject in project_names:
+      if dhproject == self.dhproject:
+        self.clean_topic(dhproject)
+        try:
+          self.dh.delete_project(dhproject)
+        except InvalidOperationException:
+          pass
+
+  def clean_subscription(self, topic_name):
+    subscriptions = self.dh.list_subscription(self.dhproject, topic_name, '', 1,
+                                              100).subscriptions
+    for subscription in subscriptions:
+      self.dh.delete_subscription(self.dhproject, topic_name, subscription)
+
+  def get_input_type(self, input_type):
+    DhDict = {
+        'INT64': FieldType.BIGINT,
+        'INT32': FieldType.BIGINT,
+        'STRING': FieldType.STRING,
+        'BOOLEAN': FieldType.BOOLEAN,
+        'FLOAT32': FieldType.DOUBLE,
+        'FLOAT64': FieldType.DOUBLE
+    }
+
+    return DhDict.get(input_type)
+
+  def _subscription(self):
+    self.dh = DataHub(self.dhid, self.dhkey, self.dhendpoint)
+    self.odps = ODPS(self.dhid, self.dhkey, self.project_name, self.odpsEnd)
+    self.odpsTable = 'deepfm_train_%s' % self.time_stamp
+    self.clean_project()
+    read_odps = DataFrame(self.odps.get_table(self.odpsTable))
+    col = read_odps.schema.names
+    col_type = [self.get_input_type(str(i)) for i in read_odps.schema.types]
+    try:
+      self.dh.create_project(self.dhproject, 'EasyRecTest')
+      logging.info('create project success!')
+    except ResourceExistException:
+      logging.info('project %s already exist!' % self.dhproject)
+    except Exception as e:
+      logging.info(traceback.format_exc(e))
+    record_schema = RecordSchema.from_lists(col, col_type)
+    try:
+      self.dh.create_tuple_topic(self.dhproject, self.dhtopic, 7, 3,
+                                 record_schema, 'easyrec_datahub')
+      logging.info('create tuple topic success!')
+    except ResourceExistException:
+      logging.info('topic %s already exist!' % self.dhtopic)
+    except Exception:
+      logging.error(traceback.format_exc())
+    try:
+      self.dh.wait_shards_ready(self.dhproject, self.dhtopic)
+      logging.info('shards all ready')
+      topic_result = self.dh.get_topic(self.dhproject, self.dhtopic)
+      if topic_result.record_type != RecordType.TUPLE:
+        logging.error('topic type illegal! ')
+      record_schema = topic_result.record_schema
+      t = self.odps.get_table(self.odpsTable)
+      with t.open_reader() as reader:
+        size = 0
+        record_list = []
+        for data in reader[0:1000]:
+          record = TupleRecord(values=data.values, schema=record_schema)
+          record_list.append(record)
+          if size % 1000:
+            self.dh.put_records(self.dhproject, self.dhtopic, record_list)
+            record_list = []
+          size += 1
+
+    except Exception as e:
+      logging.error(e)
+
+
+def get_oss_bucket(oss_key, oss_secret, endpoint, bucket_name):
+  """Build oss2.Bucket instance.
+
+  Args:
+    oss_key: oss access_key
+    oss_secret: oss access_secret
+    endpoint: oss endpoint
+    bucket_name: oss bucket name
+  Return:
+    oss2.Bucket instance
+  """
+  if oss_key is None or oss_secret is None:
+    logging.info('oss_key or oss_secret is None')
+    return None
+  auth = oss2.Auth(oss_key, oss_secret)
+  bucket = oss2.Bucket(auth, endpoint, bucket_name)
+  return bucket
+
+
+def delete_oss_path(bucket, in_prefix, bucket_name):
+  """Delete oss path.
+
+  Args:
+    bucket: oss2.Bucket instance
+    in_prefix: oss path prefix to be removed
+    bucket_name: bucket_name
+  """
+  prefix = in_prefix.replace('oss://' + bucket_name + '/', '')
+  for obj in oss2.ObjectIterator(bucket, prefix=prefix):
+    bucket.delete_object(obj.key)
+  bucket.delete_object(prefix)
+  logging.info('delete oss path: %s, completed.' % in_prefix)
diff --git a/easy_rec/python/test/embed_test.py b/easy_rec/python/test/embed_test.py
new file mode 100644
index 000000000..30ddb2191
--- /dev/null
+++ b/easy_rec/python/test/embed_test.py
@@ -0,0 +1,155 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import logging
+
+import numpy as np
+import tensorflow as tf
+from google.protobuf import text_format
+
+from easy_rec.python.compat.feature_column import feature_column
+from easy_rec.python.feature_column.feature_column import FeatureColumnParser
+from easy_rec.python.input.dummy_input import DummyInput
+from easy_rec.python.protos.dataset_pb2 import DatasetConfig
+from easy_rec.python.protos.feature_config_pb2 import FeatureConfig
+from easy_rec.python.protos.feature_config_pb2 import WideOrDeep
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class EmbedTest(tf.test.TestCase):
+
+  def test_raw_embed(self):
+    # embedding variable is:
+    #    [[1, 2 ],
+    #     [3, 4 ],
+    #     [5, 6 ],
+    #     [7, 8 ],
+    #     [9, 10]
+    #    ]
+    feature_config_str = '''
+      input_names: 'field1'
+      feature_type: RawFeature
+      initializer {
+         constant_initializer {
+            consts: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+         }
+      }
+      separator: ',',
+      raw_input_dim: 5
+      embedding_dim: 2
+      combiner: 'sum'
+    '''
+    feature_config = FeatureConfig()
+    text_format.Merge(feature_config_str, feature_config)
+
+    data_config_str = '''
+        input_fields {
+           input_name: 'clk'
+           input_type: INT32
+           default_val: '0'
+        }
+        input_fields {
+           input_name: 'field1'
+           input_type: STRING
+           default_val: '0'
+        }
+        label_fields: 'clk'
+        batch_size: 1
+    '''
+    data_config = DatasetConfig()
+    text_format.Merge(data_config_str, data_config)
+
+    feature_configs = [feature_config]
+    features = {'field1': tf.constant(['0.1,0.2,0.3,0.4,0.5'])}
+    dummy_input = DummyInput(
+        data_config, feature_configs, '', input_vals=features)
+    field_dict, _ = dummy_input._build(tf.estimator.ModeKeys.TRAIN, {})
+
+    wide_and_deep_dict = {'field1': WideOrDeep.WIDE_AND_DEEP}
+    fc_parser = FeatureColumnParser(feature_configs, wide_and_deep_dict, 2)
+    wide_cols = list(fc_parser._wide_columns.values())
+    wide_features = feature_column.input_layer(field_dict, wide_cols)
+    deep_cols = list(fc_parser._deep_columns.values())
+    deep_features = feature_column.input_layer(field_dict, deep_cols)
+    init = tf.initialize_all_variables()
+    with tf.Session() as sess:
+      sess.run(init)
+      fea_val = sess.run(wide_features)
+      logging.info('wide fea_val = %s' % str(fea_val[0]))
+      assert np.abs(fea_val[0][0] - 9.5) < 1e-6
+      assert np.abs(fea_val[0][1] - 11.0) < 1e-6
+      fea_val = sess.run(deep_features)
+      logging.info('deep fea_val = %s' % str(fea_val[0]))
+      assert np.abs(fea_val[0][0] - 9.5) < 1e-6
+      assert np.abs(fea_val[0][1] - 11.0) < 1e-6
+
+  def test_seq_multi_embed(self):
+    # embedding variable is:
+    #    [[1, 2 ],
+    #     [3, 4 ],
+    #     [5, 6 ],
+    #     [7, 8 ],
+    #     [9, 10]
+    #    ]
+    feature_config_str = '''
+      input_names: 'field1'
+      feature_type: SequenceFeature
+      initializer {
+         constant_initializer {
+            consts: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+         }
+      }
+      separator: '',
+      seq_multi_sep: '',
+      embedding_dim: 2
+      num_buckets: 5
+      combiner: 'mean'
+    '''
+    feature_config = FeatureConfig()
+    text_format.Merge(feature_config_str, feature_config)
+
+    data_config_str = '''
+        input_fields {
+           input_name: 'clk'
+           input_type: INT32
+           default_val: '0'
+        }
+        input_fields {
+           input_name: 'field1'
+           input_type: STRING
+           default_val: '0'
+        }
+        label_fields: 'clk'
+        batch_size: 1
+    '''
+    data_config = DatasetConfig()
+    text_format.Merge(data_config_str, data_config)
+
+    feature_configs = [feature_config]
+    features = {'field1': tf.constant(['0112', '132430'])}
+    dummy_input = DummyInput(
+        data_config, feature_configs, '', input_vals=features)
+    field_dict, _ = dummy_input._build(tf.estimator.ModeKeys.TRAIN, {})
+
+    wide_and_deep_dict = {'field1': WideOrDeep.DEEP}
+    fc_parser = FeatureColumnParser(feature_configs, wide_and_deep_dict)
+    builder = feature_column._LazyBuilder(field_dict)
+    hist_embedding, hist_seq_len = \
+        fc_parser.deep_columns['field1']._get_sequence_dense_tensor(builder)
+
+    init = tf.initialize_all_variables()
+    with tf.Session() as sess:
+      sess.run(init)
+      fea_val, len_val = sess.run([hist_embedding, hist_seq_len])
+      logging.info('length_val = %s' % str(len_val))
+      logging.info('deep fea_val = %s' % str(fea_val))
+      assert np.abs(fea_val[0][0][0] - 2) < 1e-6
+      assert np.abs(fea_val[0][0][1] - 3) < 1e-6
+      assert np.abs(fea_val[0][1][0] - 4) < 1e-6
+      assert np.abs(fea_val[0][1][1] - 5) < 1e-6
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/easy_rec/python/test/emr_run.py b/easy_rec/python/test/emr_run.py
new file mode 100644
index 000000000..305190544
--- /dev/null
+++ b/easy_rec/python/test/emr_run.py
@@ -0,0 +1,119 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import argparse
+import logging
+import os
+import shutil
+import sys
+
+import tensorflow as tf
+
+from easy_rec.python.test.odps_command import OdpsCommand
+from easy_rec.python.test.odps_test_prepare import prepare
+from easy_rec.python.test.odps_test_util import OdpsOSSConfig
+from easy_rec.python.test.odps_test_util import delete_oss_path
+from easy_rec.python.test.odps_test_util import get_oss_bucket
+from easy_rec.python.utils import test_utils
+
+logging.basicConfig(
+    level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
+
+odps_oss_config = OdpsOSSConfig(script_path='./samples/emr_script')
+
+
+class TestPipelineOnEmr(tf.test.TestCase):
+  """Train eval test on emr."""
+
+  def setUp(self):
+    logging.info('Testing %s.%s' % (type(self).__name__, self._testMethodName))
+    self._test_hdfs_dir = test_utils.get_hdfs_tmp_dir(
+        'hdfs://emr-header-1:9000/user/easy_rec/emr_test')
+    self._success = True
+    logging.info('test hdfs dir: %s' % self._test_hdfs_dir)
+
+  def tearDown(self):
+    if self._success:
+      pass
+    test_utils.clean_up_hdfs(self._test_hdfs_dir)
+
+  def test_deepfm_train_eval_export(self):
+    start = [
+        'deep_fm/create_external_deepfm_table.sql',
+        'deep_fm/create_inner_deepfm_table.sql'
+    ]
+    end = ['deep_fm/drop_table.sql']
+    odps_cmd = OdpsCommand(odps_oss_config)
+    odps_cmd.run_list(start)
+    self._success = test_utils.test_hdfs_train_eval(
+        '%s/configs/deepfm.config' % odps_oss_config.temp_dir,
+        '%s/yaml_config/train.paitf.yaml' % odps_oss_config.temp_dir,
+        self._test_hdfs_dir)
+    self.assertTrue(self._success)
+
+    self._success = test_utils.test_hdfs_eval(
+        '%s/configs/deepfm_eval_pipeline.config' % odps_oss_config.temp_dir,
+        '%s/yaml_config/eval.tf.yaml' % odps_oss_config.temp_dir,
+        self._test_hdfs_dir)
+    self.assertTrue(self._success)
+
+    self._success = test_utils.test_hdfs_export(
+        '%s/configs/deepfm_eval_pipeline.config' % odps_oss_config.temp_dir,
+        '%s/yaml_config/export.tf.yaml' % odps_oss_config.temp_dir,
+        self._test_hdfs_dir)
+    self.assertTrue(self._success)
+
+    odps_cmd.run_list(end)
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--odps_config', type=str, default=None, help='odps config path')
+  parser.add_argument(
+      '--oss_config', type=str, default=None, help='ossutilconfig path')
+  parser.add_argument(
+      '--bucket_name', type=str, default=None, help='test oss bucket name')
+  parser.add_argument('--arn', type=str, default=None, help='oss rolearn')
+  parser.add_argument(
+      '--odpscmd', type=str, default='odpscmd', help='odpscmd path')
+  parser.add_argument(
+      '--algo_project', type=str, default=None, help='algo project name')
+  parser.add_argument(
+      '--algo_res_project',
+      type=str,
+      default=None,
+      help='algo resource project name')
+  parser.add_argument(
+      '--algo_version', type=str, default=None, help='algo version')
+  args, unknown_args = parser.parse_known_args()
+  sys.argv = [sys.argv[0]]
+  for unk_arg in unknown_args:
+    sys.argv.append(unk_arg)
+
+  if args.odps_config:
+    odps_oss_config.load_odps_config(args.odps_config)
+    os.environ['ODPS_CONFIG_FILE_PATH'] = args.odps_config
+  if args.oss_config:
+    odps_oss_config.load_oss_config(args.oss_config)
+  if args.odpscmd:
+    odps_oss_config.odpscmd_path = args.odpscmd
+  if args.algo_project:
+    odps_oss_config.algo_project = args.algo_project
+  if args.algo_res_project:
+    odps_oss_config.algo_res_project = args.algo_res_project
+  if args.algo_version:
+    odps_oss_config.algo_version = args.algo_version
+  if args.arn:
+    odps_oss_config.arn = args.arn
+  if args.bucket_name:
+    odps_oss_config.bucket_name = args.bucket_name
+  print(args)
+  prepare(odps_oss_config)
+  tf.test.main()
+  # delete oss path
+  bucket = get_oss_bucket(odps_oss_config.oss_key, odps_oss_config.oss_secret,
+                          odps_oss_config.endpoint, odps_oss_config.bucket_name)
+  delete_oss_path(bucket, odps_oss_config.exp_dir, odps_oss_config.bucket_name)
+  # delete tmp
+  shutil.rmtree(odps_oss_config.temp_dir)
diff --git a/easy_rec/python/test/eval_metric_test.py b/easy_rec/python/test/eval_metric_test.py
new file mode 100644
index 000000000..57d111be1
--- /dev/null
+++ b/easy_rec/python/test/eval_metric_test.py
@@ -0,0 +1,106 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from __future__ import division
+
+import logging
+
+import tensorflow as tf
+from absl.testing import parameterized
+
+from easy_rec.python.core.metrics import gauc
+from easy_rec.python.core.metrics import max_f1
+from easy_rec.python.core.metrics import session_auc
+from easy_rec.python.utils.test_utils import RunAsSubprocess
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+class MetricsTest(tf.test.TestCase, parameterized.TestCase):
+
+  def setUp(self):
+    logging.info('Testing %s.%s' % (type(self).__name__, self._testMethodName))
+
+  @RunAsSubprocess
+  def test_max_f1(self):
+    labels = tf.constant([1, 0, 0, 1], dtype=tf.int32)
+    probs = tf.constant([0.9, 0.8, 0.7, 0.6], dtype=tf.float32)
+    f1, f1_update_op = max_f1(labels, probs)
+
+    with tf.Session() as sess:
+      sess.run(tf.local_variables_initializer())
+      sess.run(f1_update_op)
+      f1_score = sess.run(f1)
+    self.assertAlmostEqual(f1_score, 2.0 / 3)
+
+  @RunAsSubprocess
+  def test_gauc_all_negative_label(self):
+    labels = tf.constant([0, 0, 0, 0], dtype=tf.int32)
+    probs = tf.constant([0.9, 0.8, 0.7, 0.6], dtype=tf.float32)
+    uids = tf.constant([1, 1, 1, 1], dtype=tf.int32)
+    value_op, update_op = gauc(labels, probs, uids)
+    with tf.Session() as sess:
+      sess.run(update_op)
+      score = sess.run(value_op)
+    self.assertAlmostEqual(score, 0.0)
+
+  @parameterized.named_parameters(
+      [['_reduction_mean', 'mean', 0.5833333],
+       ['_reduction_mean_by_sample_num', 'mean_by_sample_num', 0.5925926],
+       ['_reduction_mean_by_positive_num', 'mean_by_positive_num', 0.6]])
+  @RunAsSubprocess
+  def test_gauc(self, reduction, expected):
+    labels = tf.placeholder(dtype=tf.int32, shape=(None,))
+    probs = tf.placeholder(dtype=tf.float32, shape=(None,))
+    uids = tf.placeholder(dtype=tf.int32, shape=(None,))
+    value_op, update_op = gauc(labels, probs, uids, reduction=reduction)
+    with tf.Session() as sess:
+      sess.run(
+          update_op,
+          feed_dict={
+              labels: [1, 0, 1, 1, 0],
+              probs: [0.9, 0.8, 0.7, 0.6, 0.5],
+              uids: [1, 1, 1, 1, 1]
+          })
+      sess.run(
+          update_op,
+          feed_dict={
+              labels: [1, 0, 0, 1],
+              probs: [0.9, 0.8, 0.7, 0.6],
+              uids: [2, 2, 2, 2]
+          })
+      score = sess.run(value_op)
+    self.assertAlmostEqual(score, expected)
+
+  @parameterized.named_parameters(
+      [['_reduction_mean', 'mean', 0.5833333],
+       ['_reduction_mean_by_sample_num', 'mean_by_sample_num', 0.5925926],
+       ['_reduction_mean_by_positive_num', 'mean_by_positive_num', 0.6]])
+  @RunAsSubprocess
+  def test_session_auc(self, reduction, expected):
+    labels = tf.placeholder(dtype=tf.int32, shape=(None,))
+    probs = tf.placeholder(dtype=tf.float32, shape=(None,))
+    session_ids = tf.placeholder(dtype=tf.int32, shape=(None,))
+    value_op, update_op = session_auc(
+        labels, probs, session_ids, reduction=reduction)
+    with tf.Session() as sess:
+      sess.run(
+          update_op,
+          feed_dict={
+              labels: [1, 0, 1, 1, 0],
+              probs: [0.9, 0.8, 0.7, 0.6, 0.5],
+              session_ids: [1, 1, 1, 1, 1]
+          })
+      sess.run(
+          update_op,
+          feed_dict={
+              labels: [1, 0, 0, 1],
+              probs: [0.9, 0.8, 0.7, 0.6],
+              session_ids: [2, 2, 2, 2]
+          })
+      score = sess.run(value_op)
+    self.assertAlmostEqual(score, expected)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/easy_rec/python/test/export_test.py b/easy_rec/python/test/export_test.py
index e01182e1b..3f7bd02b5 100644
--- a/easy_rec/python/test/export_test.py
+++ b/easy_rec/python/test/export_test.py
@@ -2,17 +2,26 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 # Date: 2020-10-06
 # Filename：export_test.py
+import functools
 import json
 import logging
 import os
+import unittest
 
+import numpy as np
 import tensorflow as tf
 
+import easy_rec
 from easy_rec.python.inference.predictor import Predictor
 from easy_rec.python.utils import config_util
 from easy_rec.python.utils import test_utils
 from easy_rec.python.utils.test_utils import RunAsSubprocess
 
+if tf.__version__ >= '2.0':
+  gfile = tf.compat.v1.gfile
+else:
+  gfile = tf.gfile
+
 
 class ExportTest(tf.test.TestCase):
 
@@ -23,30 +32,134 @@ def tearDown(self):
     test_utils.set_gpu_id(None)
 
   @RunAsSubprocess
-  def _predict_and_check(self, data_path, saved_model_dir, cmp_result):
+  def _predict_and_check(self,
+                         data_path,
+                         saved_model_dir,
+                         cmp_result,
+                         keys=['probs'],
+                         separator=',',
+                         tol=1e-4):
     predictor = Predictor(saved_model_dir)
     with open(data_path, 'r') as fin:
       inputs = []
       for line_str in fin:
         line_str = line_str.strip()
-        line_tok = line_str.split(',')
-        inputs.append(','.join(line_tok[1:]))
+        if len(predictor.input_names) > 1:
+          inputs.append(line_str.split(separator))
+        else:
+          inputs.append(line_str)
       output_res = predictor.predict(inputs, batch_size=32)
 
     for i in range(len(output_res)):
-      prob0 = output_res[i]['probs']
-      prob1 = cmp_result[i]['probs']
-      self.assertAllClose(prob0, prob1, atol=1e-4)
+      for key in keys:
+        val0 = output_res[i][key]
+        val1 = cmp_result[i][key]
+        diff = np.abs(val0 - val1)
+        assert diff < tol, \
+            'too much difference: %.6f for %s, tol=%.6f' \
+            % (diff, key, tol)
+
+  def _extract_data(self, input_path, output_path, offset=1, separator=','):
+    with open(input_path, 'r') as fin:
+      with open(output_path, 'w') as fout:
+        for line_str in fin:
+          line_str = line_str.strip()
+          line_toks = line_str.split(separator)
+          if offset > 0:
+            line_toks = line_toks[offset:]
+          fout.write('%s\n' % (separator.join(line_toks)))
+
+  def _extract_rtp_data(self, input_path, output_path, separator=';'):
+    with open(input_path, 'r') as fin:
+      with open(output_path, 'w') as fout:
+        for line_str in fin:
+          line_str = line_str.strip()
+          line_toks = line_str.split(separator)
+          fout.write('%s\n' % line_toks[-1])
+
+  def test_multi_tower(self):
+    self._export_test('samples/model_config/multi_tower_export.config',
+                      self._extract_data)
 
-  def test_export(self):
+  def test_mmoe(self):
+    self._export_test(
+        'samples/model_config/mmoe_on_taobao.config',
+        functools.partial(self._extract_data, offset=2),
+        keys=['probs_ctr', 'probs_cvr'])
+
+  def test_fg(self):
+    self._export_test(
+        'samples/model_config/taobao_fg.config',
+        self._extract_rtp_data,
+        separator='')
+
+  def test_export_with_asset(self):
+    pipeline_config_path = 'samples/model_config/taobao_fg.config'
+    test_dir = test_utils.get_tmp_dir()
+    # prepare model
+    self.assertTrue(
+        test_utils.test_single_train_eval(
+            pipeline_config_path, test_dir=test_dir))
+    test_utils.set_gpu_id(None)
+    config_path = os.path.join(test_dir, 'pipeline.config')
+    export_dir = os.path.join(test_dir, 'export/')
+    export_cmd = """
+      python -m easy_rec.python.export
+        --pipeline_config_path %s
+        --export_dir %s
+        --asset_files samples/model_config/taobao_fg.json
+    """ % (
+        config_path,
+        export_dir,
+    )
+    proc = test_utils.run_cmd(export_cmd,
+                              '%s/log_%s.txt' % (test_dir, 'export'))
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+    files = gfile.Glob(export_dir + '*')
+    export_dir = files[0]
+    assert gfile.Exists(export_dir + '/assets/taobao_fg.json')
+    assert gfile.Exists(export_dir + '/assets/pipeline.config')
+
+  def test_export_with_out_in_ckpt_config(self):
     test_dir = test_utils.get_tmp_dir()
     logging.info('test dir: %s' % test_dir)
 
+    pipeline_config_path = 'samples/model_config/mmoe_on_taobao.config'
+
+    def _post_check_func(pipeline_config):
+      ckpt_path = tf.train.latest_checkpoint(pipeline_config.model_dir)
+      export_dir = os.path.join(test_dir, 'train/export/no_config')
+      export_cmd = """
+        python -m easy_rec.python.export
+          --pipeline_config_path %s
+          --checkpoint_path %s
+          --export_dir %s
+      """ % (pipeline_config_path, ckpt_path, export_dir)
+      proc = test_utils.run_cmd(export_cmd,
+                                '%s/log_%s.txt' % (test_dir, 'export'))
+      proc.wait()
+      return proc.returncode == 0
+
     # prepare model
     self.assertTrue(
         test_utils.test_single_train_eval(
-            'samples/model_config/multi_tower_export.config',
-            test_dir=test_dir))
+            pipeline_config_path,
+            test_dir=test_dir,
+            post_check_func=_post_check_func))
+
+  def _export_test(self,
+                   pipeline_config_path,
+                   extract_data_func=None,
+                   separator=',',
+                   keys=['probs']):
+    test_dir = test_utils.get_tmp_dir()
+    logging.info('test dir: %s' % test_dir)
+
+    # prepare model
+    self.assertTrue(
+        test_utils.test_single_train_eval(
+            pipeline_config_path, test_dir=test_dir))
     test_utils.set_gpu_id(None)
 
     # prepare two version config
@@ -54,7 +167,10 @@ def test_export(self):
     config_path_multi = os.path.join(test_dir, 'pipeline_v2.config')
     pipeline_config = config_util.get_configs_from_pipeline_file(
         config_path_single)
-    pipeline_config.export_config.multi_placeholder = False
+    if pipeline_config.export_config.multi_placeholder:
+      config_path_single, config_path_multi = config_path_multi, config_path_single
+    pipeline_config.export_config.multi_placeholder =\
+        not pipeline_config.export_config.multi_placeholder
     config_util.save_pipeline_config(pipeline_config, test_dir,
                                      'pipeline_v2.config')
 
@@ -88,11 +204,128 @@ def test_export(self):
         line_str = line_str.strip()
         cmp_result.append(json.loads(line_str))
 
-    test_data_path = 'data/test/export/data.csv'
-    self._predict_and_check(test_data_path, export_dir_single, cmp_result)
-    self._predict_and_check(test_data_path, export_dir_multi, cmp_result)
+    test_data_path = pipeline_config.eval_input_path
+    if extract_data_func is not None:
+      tmp_data_path = os.path.join(test_dir, 'pred_input_data')
+      extract_data_func(test_data_path, tmp_data_path)
+      test_data_path = tmp_data_path
+    self._predict_and_check(
+        test_data_path,
+        export_dir_single,
+        cmp_result,
+        keys=keys,
+        separator=separator)
+    self._predict_and_check(
+        test_data_path,
+        export_dir_multi,
+        cmp_result,
+        keys=keys,
+        separator=separator)
     test_utils.clean_up(test_dir)
 
+  @unittest.skip('Only execute when redis is available')
+  def test_big_model_export(self):
+    test_dir = test_utils.get_tmp_dir()
+    logging.info('test dir: %s' % test_dir)
+
+    lookup_op_path = os.path.join(easy_rec.ops_dir, 'libkv_lookup.so')
+    tf.load_op_library(lookup_op_path)
+
+    # prepare model
+    self.assertTrue(
+        test_utils.test_single_train_eval(
+            'samples/model_config/multi_tower_export.config',
+            test_dir=test_dir))
+
+    test_utils.set_gpu_id(None)
+    # the pipeline.config is produced by the prepare model cmd
+    config_path = os.path.join(test_dir, 'pipeline.config')
+    export_dir = os.path.join(test_dir, 'export/')
+    export_cmd = """
+      python -m easy_rec.python.export
+        --pipeline_config_path %s
+        --export_dir %s
+        --redis_url 127.0.0.1:6379
+        --redis_threads 1 --verbose 1
+    """ % (config_path, export_dir)
+    proc = test_utils.run_cmd(export_cmd,
+                              '%s/log_%s.txt' % (test_dir, 'export'))
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+
+    # use checkpoint to prepare result
+    result_path = os.path.join(test_dir, 'result.txt')
+    predict_cmd = """
+      python -m easy_rec.python.predict
+        --pipeline_config_path %s
+        --output_path %s
+    """ % (config_path, result_path)
+    proc = test_utils.run_cmd(predict_cmd % (),
+                              '%s/log_%s.txt' % (test_dir, 'predict'))
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+    with open(result_path, 'r') as fin:
+      cmp_result = []
+      for line_str in fin:
+        line_str = line_str.strip()
+        cmp_result.append(json.loads(line_str))
+
+    test_data_path = 'data/test/export/data.csv'
+    self._predict_and_check(test_data_path, export_dir, cmp_result)
+
+  @unittest.skip('Only execute when pai-tf and redis is available')
+  def test_big_model_embedding_variable_export(self):
+    test_dir = test_utils.get_tmp_dir()
+    logging.info('test dir: %s' % test_dir)
+
+    lookup_op_path = os.path.join(easy_rec.ops_dir, 'libkv_lookup.so')
+    tf.load_op_library(lookup_op_path)
+
+    # prepare model
+    self.assertTrue(
+        test_utils.test_single_train_eval(
+            'samples/model_config/taobao_fg_ev.config',
+            test_dir=test_dir,
+            total_steps=1000))
+
+    test_utils.set_gpu_id(None)
+    # the pipeline.config is produced by the prepare model cmd
+    config_path = os.path.join(test_dir, 'pipeline.config')
+    export_dir = os.path.join(test_dir, 'export/')
+    export_cmd = """
+      python -m easy_rec.python.export
+        --pipeline_config_path %s
+        --export_dir %s
+        --redis_url 127.0.0.1:6379
+        --redis_threads 1 --verbose 1
+    """ % (config_path, export_dir)
+    proc = test_utils.run_cmd(export_cmd,
+                              '%s/log_%s.txt' % (test_dir, 'export'))
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+
+    # use checkpoint to prepare result
+    result_path = os.path.join(test_dir, 'result.txt')
+    predict_cmd = """
+      python -m easy_rec.python.predict
+        --pipeline_config_path %s
+        --input_path %s
+        --output_path %s
+    """ % (config_path, 'data/test/rtp/taobao_valid_feature.txt', result_path)
+    proc = test_utils.run_cmd(predict_cmd % (),
+                              '%s/log_%s.txt' % (test_dir, 'predict'))
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+    with open(result_path, 'r') as fin:
+      cmp_result = []
+      for line_str in fin:
+        line_str = line_str.strip()
+        cmp_result.append(json.loads(line_str))
+
+    test_data_path = 'data/test/rtp/taobao_valid.csv'
+    self._predict_and_check(
+        test_data_path, export_dir, cmp_result, separator='', with_lbl=False)
+
 
 if __name__ == '__main__':
   tf.test.main()
diff --git a/easy_rec/python/test/hpo_test.py b/easy_rec/python/test/hpo_test.py
index 67814a037..db39575dc 100644
--- a/easy_rec/python/test/hpo_test.py
+++ b/easy_rec/python/test/hpo_test.py
@@ -98,6 +98,15 @@ def test_edit_config_v5(self):
       else:
         assert tmp_fea.embedding_dim == 16
 
+  def test_edit_config_v51(self):
+    tmp_file = 'samples/model_config/deepfm_multi_cls_on_avazu_ctr.config'
+    tmp_config = config_util.get_configs_from_pipeline_file(tmp_file)
+    tmp_file = 'samples/hpo/hpo_param_v51.json'
+    tmp_config = config_util.edit_config(tmp_config, self.load_config(tmp_file))
+    for i, tmp_fea in enumerate(tmp_config.feature_configs):
+      if i == 5:
+        assert tmp_fea.embedding_dim == 37
+
   def test_edit_config_v6(self):
     tmp_file = 'samples/model_config/deepfm_multi_cls_on_avazu_ctr.config'
     tmp_config = config_util.get_configs_from_pipeline_file(tmp_file)
@@ -141,6 +150,15 @@ def test_edit_config_v8(self):
                                                        4.0) < 1e-5
         assert tmp_fea.embedding_dim == 32
 
+  def test_edit_config_v81(self):
+    tmp_file = 'samples/model_config/deepfm_multi_cls_on_avazu_ctr.config'
+    tmp_config = config_util.get_configs_from_pipeline_file(tmp_file)
+    tmp_file = 'samples/hpo/hpo_param_v81.json'
+    tmp_config = config_util.edit_config(tmp_config, self.load_config(tmp_file))
+    for i, tmp_fea in enumerate(tmp_config.feature_configs):
+      if tmp_fea.feature_type == tmp_fea.RawFeature:
+        assert tmp_fea.embedding_dim == 24
+
   def test_edit_config_v9(self):
     tmp_file = 'samples/model_config/deepfm_multi_cls_on_avazu_ctr.config'
     tmp_config = config_util.get_configs_from_pipeline_file(tmp_file)
@@ -149,6 +167,37 @@ def test_edit_config_v9(self):
     assert tmp_config.train_config.fine_tune_checkpoint == \
            'oss://easy-rec/test/experiment/ctr_v93/model.ckpt-1000'
 
+  def test_edit_config_v10(self):
+    tmp_file = 'samples/model_config/deepfm_multi_cls_on_avazu_ctr.config'
+    tmp_config = config_util.get_configs_from_pipeline_file(tmp_file)
+    tmp_file = 'samples/hpo/hpo_param_v10.json'
+    tmp_config = config_util.edit_config(tmp_config, self.load_config(tmp_file))
+    for i, tmp_fea in enumerate(tmp_config.feature_configs):
+      if tmp_fea.input_names[0] == 'c21':
+        assert len(tmp_fea.boundaries) == 4 and np.abs(tmp_fea.boundaries[0] -
+                                                       4.0) < 1e-5
+        assert tmp_fea.embedding_dim == 32
+
+  def test_edit_config_v11(self):
+    tmp_file = 'samples/model_config/deepfm_multi_cls_on_avazu_ctr.config'
+    tmp_config = config_util.get_configs_from_pipeline_file(tmp_file)
+    tmp_file = 'samples/hpo/hpo_param_v11.json'
+    tmp_config = config_util.edit_config(tmp_config, self.load_config(tmp_file))
+    for i, tmp_fea in enumerate(tmp_config.feature_configs):
+      if tmp_fea.input_names[0] == 'c21':
+        assert len(tmp_fea.boundaries) == 4 and np.abs(tmp_fea.boundaries[0] -
+                                                       10.0) < 1e-5
+
+  def test_edit_config_v12(self):
+    tmp_file = 'samples/model_config/deepfm_multi_cls_on_avazu_ctr.config'
+    tmp_config = config_util.get_configs_from_pipeline_file(tmp_file)
+    tmp_file = 'samples/hpo/hpo_param_v12.json'
+    tmp_config = config_util.edit_config(tmp_config, self.load_config(tmp_file))
+    for i, tmp_fea in enumerate(tmp_config.feature_configs):
+      if tmp_fea.input_names[0] == 'c21':
+        assert len(tmp_fea.boundaries) == 25
+        assert np.abs(tmp_fea.boundaries[1] - 21.0) < 1e-5
+
   def test_save_eval_metrics_with_env(self):
     os.environ['TF_CONFIG'] = """
                               { "cluster": {
diff --git a/easy_rec/python/test/odps_command.py b/easy_rec/python/test/odps_command.py
index cd246fe8e..33298f727 100644
--- a/easy_rec/python/test/odps_command.py
+++ b/easy_rec/python/test/odps_command.py
@@ -21,7 +21,7 @@ def __init__(self, odps_oss_config):
                                  odps_oss_config.endpoint,
                                  odps_oss_config.bucket_name)
     self.bucket_name = odps_oss_config.bucket_name
-    self.script_path = odps_oss_config.temp_dir
+    self.temp_dir = odps_oss_config.temp_dir
     self.log_path = odps_oss_config.log_dir
     self.odpscmd = odps_oss_config.odpscmd_path
     self.odps_config_path = odps_oss_config.odps_config_path
@@ -37,7 +37,7 @@ def run_odps_cmd(self, script_file):
     Raise:
       ValueError if failed
     """
-    exec_file_path = os.path.join(self.script_path, script_file)
+    exec_file_path = os.path.join(self.temp_dir, script_file)
     file_name = os.path.split(script_file)[1]
     log_file = os.path.join(self.log_path, file_name)
 
diff --git a/easy_rec/python/test/odps_local_run.py b/easy_rec/python/test/odps_local_run.py
new file mode 100644
index 000000000..8c1d15274
--- /dev/null
+++ b/easy_rec/python/test/odps_local_run.py
@@ -0,0 +1,86 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import argparse
+import logging
+import os
+import shutil
+import sys
+
+import tensorflow as tf
+
+from easy_rec.python.test.odps_command import OdpsCommand
+from easy_rec.python.test.odps_test_prepare import prepare
+from easy_rec.python.test.odps_test_util import OdpsOSSConfig
+from easy_rec.python.test.odps_test_util import delete_oss_path
+from easy_rec.python.test.odps_test_util import get_oss_bucket
+from easy_rec.python.utils import test_utils
+
+logging.basicConfig(
+    level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
+
+odps_oss_config = OdpsOSSConfig(script_path='./samples/emr_script')
+
+
+class TestPipelineLocal(tf.test.TestCase):
+  """Train eval test on emr."""
+
+  def setUp(self):
+    logging.info('Testing %s.%s' % (type(self).__name__, self._testMethodName))
+    self._success = True
+    self._test_dir = test_utils.get_tmp_dir()
+
+  def tearDown(self):
+    if self._success:
+      shutil.rmtree(self._test_dir)
+
+  def test_deepfm_local_with_common_io(self):
+    start = [
+        'deep_fm/create_external_deepfm_table.sql',
+        'deep_fm/create_inner_deepfm_table.sql'
+    ]
+    end = ['deep_fm/drop_table.sql']
+    odps_cmd = OdpsCommand(odps_oss_config)
+    odps_cmd.run_list(start)
+    self._success = test_utils.test_single_train_eval(
+        '%s/configs/deepfm.config' % odps_oss_config.temp_dir, self._test_dir)
+    odps_cmd.run_list(end)
+    self.assertTrue(self._success)
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--odps_config', type=str, default=None, help='odps config path')
+  parser.add_argument(
+      '--oss_config', type=str, default=None, help='ossutilconfig path')
+  parser.add_argument(
+      '--bucket_name', type=str, default=None, help='test oss bucket name')
+  parser.add_argument('--arn', type=str, default=None, help='oss rolearn')
+  parser.add_argument(
+      '--odpscmd', type=str, default='odpscmd', help='odpscmd path')
+  args, unknown_args = parser.parse_known_args()
+  sys.argv = [sys.argv[0]]
+  for unk_arg in unknown_args:
+    sys.argv.append(unk_arg)
+
+  if args.odps_config:
+    odps_oss_config.load_odps_config(args.odps_config)
+    os.environ['ODPS_CONFIG_FILE_PATH'] = args.odps_config
+  if args.oss_config:
+    odps_oss_config.load_oss_config(args.oss_config)
+  if args.odpscmd:
+    odps_oss_config.odpscmd_path = args.odpscmd
+  if args.arn:
+    odps_oss_config.arn = args.arn
+  if args.bucket_name:
+    odps_oss_config.bucket_name = args.bucket_name
+
+  prepare(odps_oss_config)
+  tf.test.main()
+  # delete oss path
+  bucket = get_oss_bucket(odps_oss_config.oss_key, odps_oss_config.oss_secret,
+                          odps_oss_config.endpoint, odps_oss_config.bucket_name)
+  delete_oss_path(bucket, odps_oss_config.exp_dir, odps_oss_config.bucket_name)
+  # delete tmp
+  shutil.rmtree(odps_oss_config.temp_dir)
diff --git a/easy_rec/python/test/odps_run.py b/easy_rec/python/test/odps_run.py
index 2acb66fbe..a588a2970 100644
--- a/easy_rec/python/test/odps_run.py
+++ b/easy_rec/python/test/odps_run.py
@@ -3,9 +3,11 @@
 
 import argparse
 import logging
+import os
 import shutil
 import sys
 
+import oss2
 import tensorflow as tf
 
 from easy_rec.python.test.odps_test import OdpsTest
@@ -13,6 +15,7 @@
 from easy_rec.python.test.odps_test_util import OdpsOSSConfig
 from easy_rec.python.test.odps_test_util import delete_oss_path
 from easy_rec.python.test.odps_test_util import get_oss_bucket
+from easy_rec.python.utils import config_util
 
 logging.basicConfig(
     level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
@@ -100,8 +103,10 @@ def test_other(self):
         'other_test/test_train_hpo_with_evaluator.sql',
         'other_test/test_train_version.sql',
         'other_test/test_train_distribute_strategy_ess.sql',
+        'other_test/test_train_before_export.sql',
         'other_test/test_eval_checkpoint_path.sql',
         'other_test/test_export_checkpoint_path.sql',
+        'other_test/test_export_update_model_dir.sql',
         'other_test/test_predict_selected_cols.sql',
     ]
     end_file = ['other_test/drop_table.sql']
@@ -109,18 +114,85 @@ def test_other(self):
     tot.start_test()
     tot.drop_table()
 
+  def test_best_exporter(self):
+    start_files = [
+        'deep_fm/create_external_deepfm_table.sql',
+        'deep_fm/create_inner_deepfm_table.sql'
+    ]
+    test_files = [
+        'other_test/test_train_best_export.sql',
+    ]
+    end_file = ['other_test/drop_table.sql']
+    tot = OdpsTest(start_files, test_files, end_file, odps_oss_config)
+    tot.start_test()
+    config_path = os.path.join(
+        odps_oss_config.temp_dir,
+        'configs/dwd_avazu_ctr_deepmodel_ext_best_export.config')
+    config = config_util.get_configs_from_pipeline_file(config_path)
+    model_dir = config.model_dir
+    logging.info('raw model_dir = %s' % model_dir)
+    if model_dir.startswith('oss://'):
+      spos = model_dir.index('/', len('oss://') + 1) + 1
+      model_dir = model_dir[spos:]
+    logging.info('stripped model_dir = %s' % model_dir)
+
+    bucket = get_oss_bucket(odps_oss_config.oss_key, odps_oss_config.oss_secret,
+                            odps_oss_config.endpoint,
+                            odps_oss_config.bucket_name)
+    best_ckpt_prefix = os.path.join(model_dir, 'best_ckpt/model.ckpt')
+    best_ckpts = [
+        x.key
+        for x in oss2.ObjectIterator(bucket, prefix=best_ckpt_prefix)
+        if x.key.endswith('.meta')
+    ]
+    logging.info('best ckpts: %s' % str(best_ckpts))
+    assert len(best_ckpts) <= 2, 'too many best ckpts: %s' % str(best_ckpts)
+    best_export_prefix = os.path.join(model_dir, 'export/best/')
+    best_exports = [
+        x.key
+        for x in oss2.ObjectIterator(bucket, prefix=best_export_prefix)
+        if x.key.endswith('/saved_model.pb')
+    ]
+    logging.info('best exports: %s' % str(best_exports))
+    assert len(
+        best_exports) <= 2, 'too many best exports: %s' % str(best_exports)
+    return True
+
   def test_embedding_variable(self):
     start_files = [
         'embedding_variable/create_table.sql',
     ]
     test_files = [
-        'embedding_variable/train.sql', 'embedding_variable/export.sql'
+        'embedding_variable/train.sql', 'embedding_variable/train_work_que.sql',
+        'embedding_variable/export.sql'
     ]
     end_file = ['embedding_variable/drop_table.sql']
     tot = OdpsTest(start_files, test_files, end_file, odps_oss_config)
     tot.start_test()
     tot.drop_table()
 
+  def test_multi_value_export(self):
+    start_files = [
+        'multi_value/create_external_multi_value_table.sql',
+        'multi_value/create_inner_multi_value_table.sql',
+    ]
+    test_files = ['multi_value/train_multi_tower_model.sql']
+    end_file = ['multi_value/drop_table.sql']
+    tot = OdpsTest(start_files, test_files, end_file, odps_oss_config)
+    tot.start_test()
+    tot.drop_table()
+
+  def test_boundary_test(self):
+    start_files = [
+        'boundary/create_external_boundary_table.sql',
+        'boundary/create_inner_boundary_table.sql',
+    ]
+    test_files = ['boundary/train_multi_tower_model.sql']
+    end_file = ['boundary/drop_table.sql']
+    tot = OdpsTest(start_files, test_files, end_file, odps_oss_config)
+    tot.start_test()
+    tot.drop_table()
+
 
 if __name__ == '__main__':
   parser = argparse.ArgumentParser()
diff --git a/easy_rec/python/test/odps_test_prepare.py b/easy_rec/python/test/odps_test_prepare.py
index 35348fbac..155c8baaa 100644
--- a/easy_rec/python/test/odps_test_prepare.py
+++ b/easy_rec/python/test/odps_test_prepare.py
@@ -76,6 +76,10 @@ def change_files(odps_oss_config, file_path):
 
       line = line.replace('{OSS_BUCKET_NAME}', odps_oss_config.bucket_name)
       line = line.replace('{TIME_STAMP}', str(odps_oss_config.time_stamp))
+
+      # for emr odps test only
+      line = line.replace('{TEMP_DIR}', str(odps_oss_config.temp_dir))
+
       line = line.replace('{ROLEARN}', odps_oss_config.arn)
       line = line.replace('{OSS_ENDPOINT_INTERNAL}', endpoint_internal)
       line = line.replace('{OSS_ENDPOINT}', endpoint)
@@ -121,6 +125,11 @@ def prepare(odps_oss_config):
   for root, dirs, files in os.walk(odps_oss_config.temp_dir):
     for file in files:
       file_path = os.path.join(root, file)
+      # drop .template
+      if file_path.endswith('.template'):
+        tmp_path = file_path[:-len('.template')]
+        os.rename(file_path, tmp_path)
+        file_path = tmp_path
       if 'data' not in file_path:
         logging.info('modify %s' % file_path)
         change_files(odps_oss_config, file_path)
diff --git a/easy_rec/python/test/odps_test_util.py b/easy_rec/python/test/odps_test_util.py
index e72adada8..16f53444d 100644
--- a/easy_rec/python/test/odps_test_util.py
+++ b/easy_rec/python/test/odps_test_util.py
@@ -1,16 +1,32 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
+import configparser
 import logging
 import os
 import time
+import traceback
 
 import oss2
+from datahub import DataHub
+from datahub.exceptions import InvalidOperationException
+from datahub.exceptions import LimitExceededException
+from datahub.exceptions import ResourceExistException
+from datahub.exceptions import ResourceNotFoundException
+from datahub.models import BlobRecord
+from datahub.models import CursorType
+from datahub.models import FieldType
+from datahub.models import RecordSchema
+from datahub.models import RecordType
+from datahub.models import TupleRecord
+from odps import ODPS
+from odps.df import DataFrame
+from six.moves import configparser
 
 
 class OdpsOSSConfig:
 
-  def __init__(self):
+  def __init__(self, script_path='./samples/odps_script'):
     self.time_stamp = int(time.time())
     temp_dir = os.environ.get('TEST_DIR', '/tmp')
     self.exp_dir = 'easy_rec_odps_test_%d' % self.time_stamp
@@ -20,10 +36,10 @@ def __init__(self):
     # public buckets with readyonly access
     self.ali_bucket_endpoint = 'http://oss-cn-beijing.aliyuncs.com'
     self.ali_bucket_name = 'easyrec'
-    self.script_path = './samples/odps_script'
+    self.script_path = script_path
     # read only access
-    self.ali_oss_key = 'LTAI4GHNdHdXvYQQW7NyHS8Y'
-    self.ali_oss_secret = 'dOm1BrTGIVjmZwUnRFIapZreOD03Gw'
+    self.ali_oss_key = ''
+    self.ali_oss_secret = ''
 
     self.oss_key = ''
     self.oss_secret = ''
@@ -31,18 +47,36 @@ def __init__(self):
     self.arn = 'acs:ram::xxx:role/aliyunodpspaidefaultrole'
     self.bucket_name = ''
 
-    self.odpscmd_path = 'odpscmd'
+    self.odpscmd_path = os.environ.get('ODPS_CMD_PATH', 'odpscmd')
     self.odps_config_path = ''
     # input table project name replace {ODPS_PROJ_NAME} in
     # samples/odps_script:
     #    grep ODPS_PROJ_NAME -r samples/odps_script/
     self.project_name = ''
 
+    self.dhid = ''
+    self.dhkey = ''
+    self.dhendpoint = ''
+    self.dhtopic = ''
+    self.dhproject = ''
+    self.dh = ''
+    self.odps = ''
+    self.odpsEnd = ''
+
     # default to algo_public
     self.algo_project = None
     self.algo_res_project = None
     self.algo_version = None
 
+  def load_dh_config(self, config_path):
+    configer = configparser.ConfigParser()
+    configer.read(config_path, encoding='utf-8')
+    self.dhid = configer.get('datahub', 'access_id')
+    self.dhkey = configer.get('datahub', 'access_key')
+    self.dhendpoint = configer.get('datahub', 'endpoint')
+    self.dhtopic = configer.get('datahub', 'topic_name')
+    self.dhproject = configer.get('datahub', 'project')
+
   def load_oss_config(self, config_path):
     with open(config_path, 'r') as fin:
       for line_str in fin:
@@ -63,6 +97,91 @@ def load_odps_config(self, config_path):
         line_str = line_str.replace(' ', '')
         if line_str.startswith('project_name='):
           self.project_name = line_str[len('project_name='):]
+        if line_str.startswith('end_point='):
+          self.odpsEnd = line_str[len('end_point='):]
+
+  def clean_topic(self, dhproject):
+    if not dhproject:
+      logging.error('project is empty .')
+      topic_names = self.dh.list_topic(dhproject).topic_names
+      for topic_name in topic_names:
+        self.clean_subscription(topic_name)
+        self.dh.delete_topic(dhproject, topic_name)
+
+  def clean_project(self):
+    project_names = self.dh.list_project().project_names
+    for dhproject in project_names:
+      if dhproject == self.dhproject:
+        self.clean_topic(dhproject)
+        try:
+          self.dh.delete_project(dhproject)
+        except InvalidOperationException:
+          pass
+
+  def clean_subscription(self, topic_name):
+    subscriptions = self.dh.list_subscription(self.dhproject, topic_name, '', 1,
+                                              100).subscriptions
+    for subscription in subscriptions:
+      self.dh.delete_subscription(self.dhproject, topic_name, subscription)
+
+  def get_input_type(self, input_type):
+    DhDict = {
+        'INT64': FieldType.BIGINT,
+        'INT32': FieldType.BIGINT,
+        'STRING': FieldType.STRING,
+        'BOOLEAN': FieldType.BOOLEAN,
+        'FLOAT32': FieldType.DOUBLE,
+        'FLOAT64': FieldType.DOUBLE
+    }
+
+    return DhDict.get(input_type)
+
+  def _subscription(self):
+    self.dh = DataHub(self.dhid, self.dhkey, self.dhendpoint)
+    self.odps = ODPS(self.dhid, self.dhkey, self.project_name, self.odpsEnd)
+    self.odpsTable = 'deepfm_train_%s' % self.time_stamp
+    self.clean_project()
+    read_odps = DataFrame(self.odps.get_table(self.odpsTable))
+    col = read_odps.schema.names
+    col_type = [self.get_input_type(str(i)) for i in read_odps.schema.types]
+    try:
+      self.dh.create_project(self.dhproject, 'EasyRecTest')
+      logging.info('create project success!')
+    except ResourceExistException:
+      logging.info('project %s already exist!' % self.dhproject)
+    except Exception as e:
+      logging.info(traceback.format_exc(e))
+    record_schema = RecordSchema.from_lists(col, col_type)
+    try:
+      self.dh.create_tuple_topic(self.dhproject, self.dhtopic, 7, 3,
+                                 record_schema, 'easyrec_datahub')
+      logging.info('create tuple topic success!')
+    except ResourceExistException:
+      logging.info('topic %s already exist!' % self.dhtopic)
+    except Exception as e:
+      logging.error(traceback.format_exc())
+    try:
+      self.dh.wait_shards_ready(self.dhproject, self.dhtopic)
+      logging.info('shards all ready')
+      topic_result = self.dh.get_topic(self.dhproject, self.dhtopic)
+      if topic_result.record_type != RecordType.TUPLE:
+        logging.error('topic type illegal! ')
+      record_schema = topic_result.record_schema
+      t = self.odps.get_table(self.odpsTable)
+      with t.open_reader() as reader:
+        size = 0
+        record_list = []
+        for data in reader[0:1000]:
+          record = TupleRecord(values=data.values, schema=record_schema)
+          record_list.append(record)
+          if size % 1000:
+            put_result = self.dh.put_records(self.dhproject, self.dhtopic,
+                                             record_list)
+            record_list = []
+          size += 1
+
+    except Exception as e:
+      logging.error(e)
 
 
 def get_oss_bucket(oss_key, oss_secret, endpoint, bucket_name):
diff --git a/easy_rec/python/test/predictor_test.py b/easy_rec/python/test/predictor_test.py
index 940d192fd..8c8362de6 100644
--- a/easy_rec/python/test/predictor_test.py
+++ b/easy_rec/python/test/predictor_test.py
@@ -65,6 +65,58 @@ def test_pred_dict(self):
       output_res = predictor.predict(inputs, batch_size=32)
       self.assertTrue(len(output_res) == 100)
 
+  @RunAsSubprocess
+  def test_pred_placeholder_named_by_input(self):
+    predictor = Predictor(
+        'data/test/inference/tb_multitower_placeholder_rename_export/')
+    field_keys = [
+        'pid', 'adgroup_id', 'cate_id', 'campaign_id', 'customer', 'brand',
+        'user_id', 'cms_segid', 'cms_group_id', 'final_gender_code',
+        'age_level', 'pvalue_level', 'shopping_level', 'occupation',
+        'new_user_class_level', 'tag_category_list', 'tag_brand_list', 'price'
+    ]
+    with open(self._test_path, 'r') as fin:
+      reader = csv.reader(fin)
+      inputs = []
+      for row in reader:
+        line_input = {}
+        for fid, f in enumerate(field_keys):
+          if f in ['tag_category_list', 'tag_brand_list']:
+            line_input[f] = ['12', '23']
+          else:
+            line_input[f] = row[fid + 2]
+        inputs.append(line_input)
+      output_res = predictor.predict(inputs, batch_size=32)
+      self.assertTrue(len(output_res) == 100)
+
+  @RunAsSubprocess
+  def test_fm_pred_list(self):
+    predictor = Predictor('data/test/inference/fm_export/')
+    with open(self._test_path, 'r') as fin:
+      reader = csv.reader(fin)
+      inputs = []
+      for row in reader:
+        inputs.append(row[2:])
+      output_res = predictor.predict(inputs, batch_size=32)
+      self.assertTrue(len(output_res) == 100)
+
+  @RunAsSubprocess
+  def test_fm_pred_dict(self):
+    predictor = Predictor('data/test/inference/fm_export/')
+    field_keys = [
+        'pid', 'adgroup_id', 'cate_id', 'campaign_id', 'customer', 'brand',
+        'user_id', 'cms_segid', 'cms_group_id', 'final_gender_code',
+        'age_level', 'pvalue_level', 'shopping_level', 'occupation',
+        'new_user_class_level', 'tag_category_list', 'tag_brand_list', 'price'
+    ]
+    with open(self._test_path, 'r') as fin:
+      reader = csv.reader(fin)
+      inputs = []
+      for row in reader:
+        inputs.append({f: row[fid + 2] for fid, f in enumerate(field_keys)})
+      output_res = predictor.predict(inputs, batch_size=32)
+      self.assertTrue(len(output_res) == 100)
+
 
 class PredictorTestV2(tf.test.TestCase):
 
diff --git a/easy_rec/python/test/rtp_convert_test.py b/easy_rec/python/test/rtp_convert_test.py
index f9e85d98d..4210c9dc2 100644
--- a/easy_rec/python/test/rtp_convert_test.py
+++ b/easy_rec/python/test/rtp_convert_test.py
@@ -6,6 +6,7 @@
 
 import tensorflow as tf
 
+from easy_rec.python.utils import config_util
 from easy_rec.python.utils import test_utils
 
 
@@ -26,6 +27,7 @@ def test_rtp_convert(self):
         --rtp_fg samples/rtp_fg/fg.json
         --label clk
         --output_path  %s
+        --input_type RTPInput
         --model_type multi_tower
         --train_input_path data/test/rtp/taobao_train_feature.txt
         --eval_input_path data/test/rtp/taobao_test_feature.txt
@@ -40,6 +42,92 @@ def test_rtp_convert(self):
             pipeline_config_path, test_dir=test_dir))
     test_utils.clean_up(test_dir)
 
+  def test_rtp_convert_bucketize(self):
+    test_dir = test_utils.get_tmp_dir()
+    logging.info('test dir: %s' % test_dir)
+    pipeline_config_path = os.path.join(test_dir, 'fg_multi_tower.config')
+    convert_cmd = """
+      python -m easy_rec.python.tools.convert_rtp_fg
+        --rtp_fg samples/rtp_fg/fg_bucketize.json
+        --label clk
+        --output_path  %s
+        --input_type RTPInput
+        --model_type multi_tower
+        --train_input_path data/test/rtp/taobao_train_bucketize_feature.txt
+        --eval_input_path data/test/rtp/taobao_test_bucketize_feature.txt
+        --selected_cols 0,3 --num_steps 400
+    """ % pipeline_config_path
+    proc = test_utils.run_cmd(convert_cmd,
+                              '%s/log_%s.txt' % (test_dir, 'convert'))
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+    self.assertTrue(
+        test_utils.test_single_train_eval(
+            pipeline_config_path, test_dir=test_dir))
+    test_utils.clean_up(test_dir)
+
+  def test_rtp_convert_bucketize_v2(self):
+    test_dir = test_utils.get_tmp_dir()
+    logging.info('test dir: %s' % test_dir)
+    pipeline_config_path = os.path.join(test_dir, 'fg_multi_tower.config')
+    convert_cmd = """
+      python -m easy_rec.python.tools.convert_rtp_fg
+        --rtp_fg samples/rtp_fg/fg_bucketize_v2.json
+        --label clk
+        --output_path  %s
+        --input_type RTPInput
+        --model_type multi_tower
+        --train_input_path data/test/rtp/taobao_train_feature.txt
+        --eval_input_path data/test/rtp/taobao_test_feature.txt
+        --selected_cols 0,3 --num_steps 400
+    """ % pipeline_config_path
+    proc = test_utils.run_cmd(convert_cmd,
+                              '%s/log_%s.txt' % (test_dir, 'convert'))
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+
+    tmp_config = config_util.get_configs_from_pipeline_file(
+        pipeline_config_path)
+    for feature_config in tmp_config.feature_configs:
+      if feature_config.input_names[0] == 'price':
+        assert len(feature_config.boundaries) == 6
+
+    self.assertTrue(
+        test_utils.test_single_train_eval(
+            pipeline_config_path, test_dir=test_dir))
+    test_utils.clean_up(test_dir)
+
+  def test_rtp_convert_test_model_config(self):
+    test_dir = test_utils.get_tmp_dir()
+    logging.info('test dir: %s' % test_dir)
+    pipeline_config_path = os.path.join(test_dir, 'fg_wide_and_deep.config')
+    convert_cmd = """
+      python -m easy_rec.python.tools.convert_rtp_fg
+        --rtp_fg samples/rtp_fg/fg_bucketize_model_config.json
+        --label clk
+        --output_path  %s
+        --input_type RTPInput
+        --train_input_path data/test/rtp/taobao_train_feature.txt
+        --eval_input_path data/test/rtp/taobao_test_feature.txt
+        --selected_cols 0,3 --num_steps 400
+    """ % pipeline_config_path
+    proc = test_utils.run_cmd(convert_cmd,
+                              '%s/log_%s.txt' % (test_dir, 'convert'))
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+
+    tmp_config = config_util.get_configs_from_pipeline_file(
+        pipeline_config_path)
+    assert len(tmp_config.model_config.wide_and_deep.dnn.hidden_units) == 2
+    assert tmp_config.model_config.wide_and_deep.dnn.hidden_units[0] == 48
+    assert tmp_config.model_config.wide_and_deep.dnn.hidden_units[1] == 24
+    assert tmp_config.model_dir == 'experiments/rtp_fg/wide_and_deep_update_model'
+
+    self.assertTrue(
+        test_utils.test_single_train_eval(
+            pipeline_config_path, test_dir=test_dir))
+    test_utils.clean_up(test_dir)
+
 
 if __name__ == '__main__':
   tf.test.main()
diff --git a/easy_rec/python/test/train_eval_test.py b/easy_rec/python/test/train_eval_test.py
index 7f9144de5..99a87961a 100644
--- a/easy_rec/python/test/train_eval_test.py
+++ b/easy_rec/python/test/train_eval_test.py
@@ -9,8 +9,15 @@
 import numpy as np
 import tensorflow as tf
 
+from easy_rec.python.main import predict
+from easy_rec.python.utils import config_util
+from easy_rec.python.utils import estimator_utils
 from easy_rec.python.utils import test_utils
 
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+gfile = tf.gfile
+
 
 class TrainEvalTest(tf.test.TestCase):
 
@@ -37,7 +44,7 @@ def test_deepfm_with_combo_feature(self):
 
   def test_deepfm_with_vocab_list(self):
     self._success = test_utils.test_single_train_eval(
-        'samples/model_config/deepfm_vocab_list_on_taobao.config',
+        'samples/model_config/deepfm_vocab_list_on_avazu_ctr.config',
         self._test_dir)
     self.assertTrue(self._success)
 
@@ -47,6 +54,30 @@ def test_deepfm_with_multi_class(self):
         self._test_dir)
     self.assertTrue(self._success)
 
+  def test_wide_and_deep_no_final(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/wide_and_deep_no_final_on_avazau_ctr.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_wide_and_deep(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/wide_and_deep_on_avazau_ctr.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_adamw_optimizer(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/deepfm_combo_on_avazu_adamw_ctr.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_momentumw_optimizer(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/deepfm_combo_on_avazu_momentumw_ctr.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
   def test_deepfm_with_param_edit(self):
     self._success = test_utils.test_single_train_eval(
         'samples/model_config/deepfm_multi_cls_on_avazu_ctr.config',
@@ -60,6 +91,12 @@ def test_multi_tower(self):
         'samples/model_config/multi_tower_on_taobao.config', self._test_dir)
     self.assertTrue(self._success)
 
+  def test_multi_tower_gauc(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/multi_tower_on_taobao_gauc.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
   def test_multi_tower_save_checkpoint_secs(self):
     self._success = test_utils.test_single_train_eval(
         'samples/model_config/multi_tower_save_secs_on_taobao.config',
@@ -75,12 +112,82 @@ def test_multi_tower_save_checkpoint_secs(self):
     # ensure interval is 20s
     self.assertAllClose(
         ckpts_times[1:] - ckpts_times[:-1], [20] * (len(ckpts_times) - 1),
-        atol=5)
+        atol=8)
     self.assertTrue(self._success)
 
+  def test_keep_ckpt_max(self):
+
+    def _post_check_func(pipeline_config):
+      ckpt_prefix = os.path.join(pipeline_config.model_dir, 'model.ckpt-*.meta')
+      ckpts = gfile.Glob(ckpt_prefix)
+      print(ckpts)
+      assert len(ckpts) == 3, 'invalid number of checkpoints: %d' % len(ckpts)
+
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/multi_tower_ckpt_keep_3_on_taobao.config',
+        self._test_dir,
+        total_steps=500,
+        post_check_func=_post_check_func)
+
   def test_multi_tower_with_best_exporter(self):
+
+    def _post_check_func(pipeline_config):
+      model_dir = pipeline_config.model_dir
+      best_ckpts = os.path.join(model_dir, 'best_ckpt/model.ckpt-*.meta')
+      best_ckpts = gfile.Glob(best_ckpts)
+      assert len(best_ckpts) <= 2, 'too many best ckpts: %s' % str(best_ckpts)
+      best_exports = os.path.join(model_dir, 'export/best/*')
+      best_exports = gfile.Glob(best_exports)
+      assert len(
+          best_exports) <= 2, 'too many best exports: %s' % str(best_exports)
+      return True
+
     self._success = test_utils.test_single_train_eval(
         'samples/model_config/multi_tower_best_export_on_taobao.config',
+        self._test_dir,
+        total_steps=1000,
+        post_check_func=_post_check_func)
+    self.assertTrue(self._success)
+
+  def test_latest_ckpt(self):
+    tmp = estimator_utils.latest_checkpoint('data/test/latest_ckpt_test')
+    assert tmp.endswith('model.ckpt-500')
+    tmp = estimator_utils.latest_checkpoint('data/test/latest_ckpt_test/')
+    assert tmp.endswith('model.ckpt-500')
+
+  def test_latest_ckpt_v2(self):
+
+    def _post_check_func(pipeline_config):
+      logging.info('model_dir: %s' % pipeline_config.model_dir)
+      logging.info('latest_checkpoint: %s' %
+                   estimator_utils.latest_checkpoint(pipeline_config.model_dir))
+      return tf.train.latest_checkpoint(pipeline_config.model_dir) == \
+          estimator_utils.latest_checkpoint(pipeline_config.model_dir)
+
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/taobao_fg.config',
+        self._test_dir,
+        post_check_func=_post_check_func)
+    self.assertTrue(self._success)
+
+  def test_fine_tune_ckpt(self):
+
+    def _post_check_func(pipeline_config):
+      pipeline_config.train_config.fine_tune_checkpoint = \
+          estimator_utils.latest_checkpoint(pipeline_config.model_dir)
+      test_dir = os.path.join(self._test_dir, 'fine_tune')
+      pipeline_config.model_dir = os.path.join(test_dir, 'ckpt')
+      return test_utils.test_single_train_eval(pipeline_config, test_dir)
+
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/taobao_fg.config',
+        self._test_dir,
+        post_check_func=_post_check_func)
+    self.assertTrue(self._success)
+
+  def test_multi_tower_multi_value_export(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/multi_tower_multi_value_export_on_taobao.config',
         self._test_dir)
     self.assertTrue(self._success)
 
@@ -109,21 +216,147 @@ def test_bst(self):
         'samples/model_config/bst_on_taobao.config', self._test_dir)
     self.assertTrue(self._success)
 
+  def test_dcn(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dcn_on_taobao.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_dcn_with_f1(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dcn_f1_on_taobao.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_autoint(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/autoint_on_taobao.config', self._test_dir)
+    self.assertTrue(self._success)
+
   def test_dssm(self):
     self._success = test_utils.test_single_train_eval(
         'samples/model_config/dssm_on_taobao.config', self._test_dir)
     self.assertTrue(self._success)
 
+  def test_dssm_neg_sampler(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dssm_neg_sampler_on_taobao.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_dssm_neg_sampler_v2(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dssm_neg_sampler_v2_on_taobao.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_dssm_hard_neg_sampler(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dssm_hard_neg_sampler_on_taobao.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_dssm_hard_neg_sampler_v2(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dssm_hard_neg_sampler_v2_on_taobao.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_dssm_no_norm(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dssm_inner_prod_on_taobao.config', self._test_dir)
+    self.assertTrue(self._success)
+
   def test_dssm_with_regression(self):
     self._success = test_utils.test_single_train_eval(
         'samples/model_config/dssm_reg_on_taobao.config', self._test_dir)
     self.assertTrue(self._success)
 
+  def _test_kd(self, config0, config1):
+    self._success = test_utils.test_single_train_eval(config0, self._test_dir)
+    self.assertTrue(self._success)
+    config_path = os.path.join(self._test_dir, 'pipeline.config')
+    pipeline_config = config_util.get_configs_from_pipeline_file(config_path)
+
+    train_path = os.path.join(self._test_dir, 'train_kd')
+    eval_path = os.path.join(self._test_dir, 'eval_kd')
+
+    @test_utils.RunAsSubprocess
+    def _gen_kd_data(train_path, eval_path):
+      pred_result = predict(config_path, None, pipeline_config.train_input_path)
+      with gfile.GFile(pipeline_config.train_input_path, 'r') as fin:
+        with gfile.GFile(train_path, 'w') as fout:
+          for line, pred in zip(fin, pred_result):
+            if isinstance(pred['logits'], type(np.array([]))):
+              pred_logits = ''.join([str(x) for x in pred['logits']])
+            else:
+              pred_logits = str(pred['logits'])
+            fout.write(line.strip() + ',' + pred_logits + '\n')
+      pred_result = predict(config_path, None, pipeline_config.eval_input_path)
+      with gfile.GFile(pipeline_config.eval_input_path, 'r') as fin:
+        with gfile.GFile(eval_path, 'w') as fout:
+          for line, pred in zip(fin, pred_result):
+            if isinstance(pred['logits'], type(np.array([]))):
+              pred_logits = ''.join([str(x) for x in pred['logits']])
+            else:
+              pred_logits = str(pred['logits'])
+            fout.write(line.strip() + ',' + pred_logits + '\n')
+
+    _gen_kd_data(train_path, eval_path)
+    pipeline_config = config_util.get_configs_from_pipeline_file(config1)
+    pipeline_config.train_input_path = train_path
+    pipeline_config.eval_input_path = eval_path
+    config_util.save_pipeline_config(pipeline_config, self._test_dir,
+                                     'kd_pipeline.config')
+    self._success = test_utils.test_single_train_eval(
+        os.path.join(self._test_dir, 'kd_pipeline.config'),
+        os.path.join(self._test_dir, 'kd'))
+    self.assertTrue(self._success)
+
+  def test_dssm_with_kd(self):
+    self._test_kd('samples/model_config/multi_tower_on_taobao.config',
+                  'samples/model_config/dssm_kd_on_taobao.config')
+
+  def test_deepfm_multi_class_with_kd(self):
+    self._test_kd('samples/model_config/deepfm_multi_cls_on_avazu_ctr.config',
+                  'samples/model_config/deepfm_multi_cls_small.config')
+
+  def test_mind(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/mind_on_taobao.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_mind_with_time_id(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/mind_on_taobao_with_time.config', self._test_dir)
+    self.assertTrue(self._success)
+
   def test_deepfm_with_regression(self):
     self._success = test_utils.test_single_train_eval(
         'samples/model_config/deepfm_combo_on_avazu_reg.config', self._test_dir)
     self.assertTrue(self._success)
 
+  def test_deepfm_with_sigmoid_l2_loss(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/deepfm_combo_on_avazu_sigmoid_l2.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_deepfm_with_sequence_attention(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/deppfm_seq_attn_on_taobao.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_deepfm_with_embedding_learning_rate(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/deepfm_combo_on_avazu_emblr_ctr.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_deepfm_with_eval_online(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/deepfm_combo_on_avazu_eval_online_ctr.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
   def test_mmoe(self):
     self._success = test_utils.test_single_train_eval(
         'samples/model_config/mmoe_on_taobao.config', self._test_dir)
@@ -145,11 +378,38 @@ def test_essm(self):
         'samples/model_config/esmm_on_taobao.config', self._test_dir)
     self.assertTrue(self._success)
 
+  def test_tag_kv_input(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/kv_tag.config', self._test_dir)
+    self.assertTrue(self._success)
+
   def test_dbmtl(self):
     self._success = test_utils.test_single_train_eval(
         'samples/model_config/dbmtl_on_taobao.config', self._test_dir)
     self.assertTrue(self._success)
 
+  def test_dbmtl_variational_dropout(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dbmtl_variational_dropout.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_dbmtl_variational_dropout_feature_num(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dbmtl_variational_dropout_feature_num.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_rocket_launching(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/rocket_launching.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_rocket_launching_feature_based(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/rocket_launching_feature_based.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
   def test_dbmtl_mmoe(self):
     self._success = test_utils.test_single_train_eval(
         'samples/model_config/dbmtl_mmoe_on_taobao.config', self._test_dir)
@@ -160,6 +420,43 @@ def test_train_with_ps_worker(self):
         'samples/model_config/multi_tower_on_taobao.config', self._test_dir)
     self.assertTrue(self._success)
 
+  def test_train_with_ps_worker_chief_redundant(self):
+    self._success = test_utils.test_distributed_train_eval(
+        'samples/model_config/multi_tower_on_taobao_chief_redundant.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_deepfm_embed_input(self):
+    self._success = test_utils.test_distributed_train_eval(
+        'samples/model_config/deepfm_with_embed.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_multi_tower_embed_input(self):
+    self._success = test_utils.test_distributed_train_eval(
+        'samples/model_config/multi_tower_with_embed.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_tfrecord_input(self):
+    self._success = test_utils.test_distributed_train_eval(
+        'samples/model_config/deepfm_on_criteo_tfrecord.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_batch_tfrecord_input(self):
+    self._success = test_utils.test_distributed_train_eval(
+        'samples/model_config/deepfm_on_criteo_batch_tfrecord.config',
+        self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_sample_weight(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/deepfm_with_sample_weight.config', self._test_dir)
+    self.assertTrue(self._success)
+
+  def test_dssm_sample_weight(self):
+    self._success = test_utils.test_single_train_eval(
+        'samples/model_config/dssm_with_sample_weight.config', self._test_dir)
+    self.assertTrue(self._success)
+
   @unittest.skipIf(
       LooseVersion(tf.__version__) < LooseVersion('2.3.0'),
       'MultiWorkerMirroredStrategy need tf version > 2.3')
diff --git a/easy_rec/python/test/util_test.py b/easy_rec/python/test/util_test.py
new file mode 100644
index 000000000..c660145f6
--- /dev/null
+++ b/easy_rec/python/test/util_test.py
@@ -0,0 +1,25 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import tensorflow as tf
+
+from easy_rec.python.utils import estimator_utils
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+gfile = tf.gfile
+
+
+class UtilTest(tf.test.TestCase):
+
+  def test_get_ckpt_version(self):
+    ver = estimator_utils.get_ckpt_version(
+        'oss://easyrec/ckpts/model.ckpt-6500.meta')
+    assert ver == 6500, 'invalid version: %s' % str(ver)
+    ver = estimator_utils.get_ckpt_version(
+        'oss://easyrec/ckpts/model.ckpt-6500')
+    assert ver == 6500, 'invalid version: %s' % str(ver)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/easy_rec/python/tools/convert_rtp_fg.py b/easy_rec/python/tools/convert_rtp_fg.py
index cc39d85de..bd090e0a3 100644
--- a/easy_rec/python/tools/convert_rtp_fg.py
+++ b/easy_rec/python/tools/convert_rtp_fg.py
@@ -7,8 +7,8 @@
 
 import tensorflow as tf
 
-from easy_rec.python.utils.config_util import convert_rtp_fg
 from easy_rec.python.utils.config_util import save_message
+from easy_rec.python.utils.convert_rtp_fg import convert_rtp_fg
 
 logging.basicConfig(
     format='[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d : %(message)s',
@@ -17,7 +17,7 @@
 if tf.__version__ >= '2.0':
   tf = tf.compat.v1
 
-model_types = ['deepfm', 'multi_tower', '']
+model_types = ['deepfm', 'multi_tower', 'wide_and_deep', 'esmm', 'dbmtl', '']
 if __name__ == '__main__':
   parser = argparse.ArgumentParser()
   parser.add_argument(
@@ -66,6 +66,16 @@
       help='selected cols, for csv input, it is in the format of: label_col_id0,...,lable_cold_idn,feature_col_id '
       'for odps table input, it is in the format of: label_col_name0,...,label_col_namen,feature_col_name '
   )
+  parser.add_argument(
+      '--rtp_separator', type=str, default=';', help='separator')
+  parser.add_argument(
+      '--input_type',
+      type=str,
+      default='OdpsRTPInput',
+      help='default to OdpsRTPInput, if test local, change it to RTPInput')
+  parser.add_argument(
+      '--is_async', action='store_true', help='async mode, debug to false')
+
   args = parser.parse_args()
 
   if not args.rtp_fg:
@@ -80,15 +90,17 @@
                                    args.batch_size, args.label, args.num_steps,
                                    args.model_type, args.separator,
                                    args.incol_separator, args.train_input_path,
-                                   args.eval_input_path, args.selected_cols)
+                                   args.eval_input_path, args.selected_cols,
+                                   args.input_type, args.is_async)
   save_message(pipeline_config, args.output_path)
   logging.info('Conversion done.')
   logging.info('Tips:')
   logging.info(
-      'if run on odps, please change data_config.input_type to OdpsRTPInput, '
-      'and model_dir/train_input_path/eval_input_path must also be changed, '
-      'selected_cols are label0_col_name, label1_col_name, ..., feature_col_name'
-  )
+      'if run on local, please change data_config.input_type to RTPInput, '
+      'and model_dir/train_input_path/eval_input_path must also be set, ')
   logging.info(
       'if run local, please set data_config.selected_cols in the format '
       'label_col_id0,label_col_id1,...,label_col_idn,feature_col_id')
+  logging.info(
+      'if run on odps, selected_cols must be set, which are label0_col,'
+      'label1_col, ..., feature_col_name')
diff --git a/easy_rec/python/tools/create_config_from_excel.py b/easy_rec/python/tools/create_config_from_excel.py
index a2207b5f3..c0f3e0a2b 100644
--- a/easy_rec/python/tools/create_config_from_excel.py
+++ b/easy_rec/python/tools/create_config_from_excel.py
@@ -187,7 +187,7 @@ def _write_train_eval_config(self, fout):
     fout.write('train_input_path: "%s"\n' % self._train_input_path)
     fout.write('eval_input_path: "%s"\n' % self._eval_input_path)
     fout.write("""
-    model_dir: "experiment/"
+    model_dir: "%s"
 
     train_config {
       log_step_count_steps: 200
@@ -212,7 +212,7 @@ def _write_train_eval_config(self, fout):
       metrics_set: {
            auc {}
       }
-    }""")
+    }""" % self._model_dir)
 
   def _write_deepfm_config(self, fout):
     # write model_config
diff --git a/easy_rec/python/tools/edit_lookup_graph.py b/easy_rec/python/tools/edit_lookup_graph.py
index d6c8189e7..7f1fc1fbf 100644
--- a/easy_rec/python/tools/edit_lookup_graph.py
+++ b/easy_rec/python/tools/edit_lookup_graph.py
@@ -3,586 +3,21 @@
 import argparse
 import logging
 import os
-import shutil
 import sys
 
 import tensorflow as tf
-from google.protobuf import text_format
 from tensorflow.core.protobuf import saved_model_pb2
-from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.lib.io.file_io import file_exists
+from tensorflow.python.lib.io.file_io import recursive_create_dir
+from tensorflow.python.platform.gfile import GFile
+
+import easy_rec
+from easy_rec.python.utils.meta_graph_editor import MetaGraphEditor
 
 logging.basicConfig(
     format='[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d : %(message)s',
     level=logging.INFO)
 
-
-class MetaGraphEditor:
-
-  def __init__(self,
-               lookup_lib_path,
-               saved_model_dir,
-               redis_url,
-               redis_passwd,
-               redis_timeout,
-               verbose=False):
-    self._lookup_op = tf.load_op_library(lookup_lib_path)
-    self._verbose = verbose
-    with tf.Session() as sess:
-      meta_graph_def = tf.saved_model.loader.load(sess, ['serve'],
-                                                  saved_model_dir)
-      self._meta_graph_version = meta_graph_def.meta_info_def.meta_graph_version
-      self._signature_def = meta_graph_def.signature_def[
-          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-
-      if self._verbose:
-        with open('meta_graph_raw.txt', 'w') as fout:
-          fout.write(text_format.MessageToString(meta_graph_def, as_utf8=True))
-      self._meta_graph_def = meta_graph_def
-    self._old_node_num = len(self._meta_graph_def.graph_def.node)
-    self._all_graph_nodes = None
-    self._all_graph_node_flags = None
-    self._restore_tensor_node = None
-    self._restore_shard_node = None
-    self._lookup_outs = None
-    self._feature_names = None
-    self._embed_names = None
-    self._redis_url = redis_url
-    self._redis_passwd = redis_passwd
-    self._redis_timeout = redis_timeout
-
-  @property
-  def graph_def(self):
-    return self._meta_graph_def.graph_def
-
-  @property
-  def signature_def(self):
-    return self._signature_def
-
-  @property
-  def meta_graph_version(self):
-    return self._meta_graph_version
-
-  def init_graph_node_clear_flags(self):
-    graph_def = self._meta_graph_def.graph_def
-    self._all_graph_nodes = [n for n in graph_def.node]
-    self._all_graph_node_flags = [True for n in graph_def.node]
-
-  def _get_input_name(self, node_name):
-    # input_layer/combo_shared/u_city_shared_embedding/u_city_shared_embedding_weights/SparseReshape
-    toks = node_name.split('/')
-    if '_shared_embedding/' in node_name:
-      for x in toks:
-        if x.endswith('_shared_embedding'):
-          return x[:x.index('_shared_embedding')]
-    else:
-      # for not shared embedding
-      for tok in toks:
-        if tok.endswith('_embedding_weights'):
-          return tok[:-len('_embedding_weights')]
-    return None
-
-  def find_lookup_inputs(self):
-    values = {}
-    indices = {}
-    shapes = {}
-    embed_names = {}
-
-    def _get_output_shape(graph_def, input_name):
-      out_id = 0
-      if ':' in input_name:
-        node_name, out_id = input_name.split(':')
-        out_id = int(out_id)
-      else:
-        node_name = input_name
-      for node in graph_def.node:
-        if node.name == node_name:
-          return node.attr['_output_shapes'].list.shape[out_id]
-      return None
-
-    def _get_embed_name(node_name):
-      toks = node_name.split('/')
-      if '_shared_embedding' in node_name:
-        # for shared embedding
-        for tid, x in enumerate(toks):
-          if x.endswith('_shared_embedding'):
-            assert tid > 0
-            tmp_name = toks[tid - 1]
-            tmp_toks = tmp_name.split('_')
-            try:
-              int(tmp_toks[-1])
-              return '_'.join(tmp_toks[:-1])
-            except Exception:
-              return '_'.join(tmp_toks)
-        assert False, 'failed to get embed name from: %s' % node_name
-      else:
-        # for not shared embedding
-        for tok in toks:
-          if tok.endswith('_embedding_weights'):
-            return tok.replace('_embedding_weights', '')
-      return None
-
-    logging.info('Extract embedding_lookup inputs and embedding dimensions')
-
-    # use the specific _embedding_weights/SparseReshape to find out
-    # lookup inputs: indices, values, dense_shape
-    for node in self._meta_graph_def.graph_def.node:
-      if '_embedding_weights/SparseReshape' in node.name:
-        if node.op == 'SparseReshape':
-          embed_name = _get_embed_name(node.name)
-          fea_name = self._get_input_name(node.name)
-          for tmp_input in node.input:
-            tmp_shape = _get_output_shape(self._meta_graph_def.graph_def,
-                                          tmp_input)
-            if '_embedding_weights/Cast' in tmp_input:
-              continue
-            elif len(tmp_shape.dim) == 2:
-              indices[fea_name] = tmp_input
-            elif len(tmp_shape.dim) == 1:
-              shapes[fea_name] = tmp_input
-            embed_names[fea_name] = embed_name
-        elif node.op == 'Identity':
-          embed_name = _get_embed_name(node.name)
-          fea_name = self._get_input_name(node.name)
-          embed_names[fea_name] = embed_name
-          values[fea_name] = node.input[0]
-
-    for fea in shapes.keys():
-      logging.info('Lookup Input: indices=%s values=%s shapes=%s' %
-                   (indices[fea], values[fea], shapes[fea]))
-
-    graph = tf.get_default_graph()
-
-    lookup_input_values = []
-    lookup_input_indices = []
-    lookup_input_shapes = []
-    for key in values.keys():
-      tmp_val, tmp_ind, tmp_shape = values[key], indices[key], shapes[key]
-      if ':' not in tmp_val:
-        tmp_val = tmp_val + ':0'
-      if ':' not in tmp_ind:
-        tmp_ind = tmp_ind + ':0'
-      if ':' not in tmp_shape:
-        tmp_shape = tmp_shape + ':0'
-      lookup_input_values.append(graph.get_tensor_by_name(tmp_val))
-      lookup_input_indices.append(graph.get_tensor_by_name(tmp_ind))
-      lookup_input_shapes.append(graph.get_tensor_by_name(tmp_shape))
-    lookup_input_weights = [[] for x in lookup_input_shapes]
-
-    # get embedding dimensions
-    embed_dims = {}
-
-    def _get_embed_var_name(node_name):
-      tmp_toks = node_name.split('/')
-      for i in range(1, len(tmp_toks)):
-        if tmp_toks[i] == 'embedding_weights':
-          tmp_name = tmp_toks[i - 1]
-          if tmp_name.startswith('emd_'):
-            # emd_article_id
-            # emd_item_tag_id
-            tmp_name = tmp_name[len('emd_'):]
-          elif tmp_name.startswith('hist_emd_'):
-            # hist_emd_usersequence
-            tmp_name = tmp_name[len('hist_emd_'):]
-          if tmp_name.endswith('_embedding'):
-            # city_embedding
-            # login_time_span_embedding
-            # prefer_category_hist_ctr_embedding
-            return tmp_name[:-len('_embedding')]
-          else:
-            return tmp_name
-      return None
-
-    for node in self._meta_graph_def.graph_def.node:
-      if 'embedding_weights' in node.name and node.op in [
-          'VariableV2', 'KvVarHandleOp'
-      ]:
-        tmp = node.attr['shape'].shape.dim[-1].size
-        embed_dims[_get_embed_var_name(node.name)] = tmp
-
-    embed_dims = [embed_dims[embed_names[x]] for x in values.keys()]
-    self._feature_names = values.keys()
-    self._embed_names = [embed_names[x] for x in values.keys()]
-    return lookup_input_indices, lookup_input_values, lookup_input_shapes,\
-        lookup_input_weights, embed_dims, self._embed_names
-
-  def add_lookup_op(self, lookup_input_indices, lookup_input_values,
-                    lookup_input_shapes, lookup_input_weights, embed_dims,
-                    embed_names):
-    logging.info('add custom lookup operation to lookup embeddings from redis')
-    for i in range(len(lookup_input_values)):
-      if lookup_input_values[i].dtype == tf.int32:
-        lookup_input_values[i] = tf.to_int64(lookup_input_values[i])
-    self._lookup_outs = self._lookup_op.kv_lookup(
-        lookup_input_indices,
-        lookup_input_values,
-        lookup_input_shapes,
-        lookup_input_weights,
-        url=self._redis_url,
-        password=self._redis_passwd,
-        timeout=self._redis_timeout,
-        combiners=['mean'] * len(lookup_input_weights),
-        embedding_dims=embed_dims,
-        embedding_names=embed_names,
-        version=self._meta_graph_version)
-    meta_graph_def = tf.train.export_meta_graph()
-
-    if self._verbose:
-      with open('graph_raw.txt', 'w') as fout:
-        fout.write(
-            text_format.MessageToString(
-                self._meta_graph_def.graph_def, as_utf8=True))
-    return meta_graph_def
-
-  def clear_meta_graph_embeding(self, meta_graph_def):
-    logging.info('clear meta graph embedding_weights')
-
-    def _clear_embedding_in_meta_collect(meta_graph_def, collect_name):
-      tmp_vals = [
-          x
-          for x in meta_graph_def.collection_def[collect_name].bytes_list.value
-          if 'embedding_weights' not in x
-      ]
-      meta_graph_def.collection_def[collect_name].bytes_list.ClearField('value')
-      for tmp_v in tmp_vals:
-        meta_graph_def.collection_def[collect_name].bytes_list.value.append(
-            tmp_v)
-
-    _clear_embedding_in_meta_collect(meta_graph_def, 'model_variables')
-    _clear_embedding_in_meta_collect(meta_graph_def, 'trainable_variables')
-    _clear_embedding_in_meta_collect(meta_graph_def, 'variables')
-
-    # clear Kv(pai embedding variable) ops in meta_info_def.stripped_op_list.op
-    kept_ops = [
-        x for x in meta_graph_def.meta_info_def.stripped_op_list.op
-        if x.name not in [
-            'InitializeKvVariableOp', 'KvResourceGather', 'KvResourceImportV2',
-            'KvVarHandleOp', 'KvVarIsInitializedOp', 'ReadKvVariableOp'
-        ]
-    ]
-    meta_graph_def.meta_info_def.stripped_op_list.ClearField('op')
-    meta_graph_def.meta_info_def.stripped_op_list.op.extend(kept_ops)
-    for tmp_op in meta_graph_def.meta_info_def.stripped_op_list.op:
-      if tmp_op.name == 'SaveV2':
-        for tmp_id, tmp_attr in enumerate(tmp_op.attr):
-          if tmp_attr.name == 'has_ev':
-            tmp_op.attr.remove(tmp_attr)
-            break
-
-  def clear_meta_collect(self, meta_graph_def):
-    drop_meta_collects = []
-    for key in meta_graph_def.collection_def:
-      val = meta_graph_def.collection_def[key]
-      if val.HasField('node_list'):
-        if 'embedding_weights' in val.node_list.value[
-            0] and 'easy_rec' not in val.node_list.value[0]:
-          drop_meta_collects.append(key)
-        elif 'asset_filepaths' == key and val.node_list.value[
-            0] == 'pipeline.config:0':
-          # we have to drop this to avoid Load tensorflow model exception:
-          # Endpoint "pipeline.config:0" fed more than once.
-          drop_meta_collects.append(key)
-    for key in drop_meta_collects:
-      meta_graph_def.collection_def.pop(key)
-
-  def remove_embedding_weights_and_update_lookup_outputs(self):
-
-    def _should_drop(name):
-      if '_embedding_weights' in name:
-        if self._verbose:
-          logging.info('[SHOULD_DROP] %s' % name)
-        return True
-
-    logging.info('remove embedding_weights node in graph_def.node')
-    logging.info(
-        'and replace the old embedding_lookup outputs with new lookup_op outputs'
-    )
-
-    for tid, node in enumerate(self._all_graph_nodes):
-      # drop the nodes
-      if _should_drop(node.name):
-        self._all_graph_node_flags[tid] = False
-      else:
-        for i in range(len(node.input)):
-          if _should_drop(node.input[i]):
-            input_name = self._get_input_name(node.input[i])
-            input_name = self._lookup_outs[self._feature_names.index(
-                input_name)].name
-            if input_name.endswith(':0'):
-              input_name = input_name.replace(':0', '')
-            node.input[i] = input_name
-
-  # drop by ids
-  def _drop_by_ids(self, tmp_obj, key, drop_ids):
-    keep_vals = [
-        x for i, x in enumerate(getattr(tmp_obj, key)) if i not in drop_ids
-    ]
-    tmp_obj.ClearField(key)
-    getattr(tmp_obj, key).extend(keep_vals)
-
-  def clear_save_restore(self):
-    """Clear save restore ops.
-
-    save/restore_all need save/restore_shard as input
-    save/restore_shard needs save/Assign_[0-N] as input
-    save/Assign_[0-N] needs save/RestoreV2 as input
-    save/RestoreV2 use save/RestoreV2/tensor_names and save/RestoreV2/shape_and_slices as input
-    edit [ save/RestoreV2/tensor_names save/RestoreV2/shape_and_slices save/RestoreV2 save/restore_shard ]
-    """
-    for tid, node in enumerate(self._all_graph_nodes):
-      if not self._all_graph_node_flags[tid]:
-        continue
-      if node.name == 'save/RestoreV2/tensor_names':
-        self._restore_tensor_node = node
-        break
-    assert self._restore_tensor_node is not None, 'save/RestoreV2/tensor_names is not found'
-
-    drop_ids = []
-    for tmp_id, tmp_name in enumerate(
-        self._restore_tensor_node.attr['value'].tensor.string_val):
-      if 'embedding_weights' in tmp_name:
-        drop_ids.append(tmp_id)
-
-    self._drop_by_ids(self._restore_tensor_node.attr['value'].tensor,
-                      'string_val', drop_ids)
-    keep_node_num = len(
-        self._restore_tensor_node.attr['value'].tensor.string_val)
-    logging.info(
-        'update self._restore_tensor_node: string_val keep_num = %d drop_num = %d'
-        % (keep_node_num, len(drop_ids)))
-    self._restore_tensor_node.attr['value'].tensor.tensor_shape.dim[
-        0].size = keep_node_num
-    self._restore_tensor_node.attr['_output_shapes'].list.shape[0].dim[
-        0].size = keep_node_num
-
-    logging.info(
-        'update save/RestoreV2, drop tensor_shapes, _output_shapes, related to embedding_weights'
-    )
-    self._restore_shard_node = None
-    for node_id, node in enumerate(self._all_graph_nodes):
-      if not self._all_graph_node_flags[tid]:
-        continue
-      if node.name == 'save/RestoreV2/shape_and_slices':
-        node.attr['value'].tensor.tensor_shape.dim[0].size = keep_node_num
-        node.attr['_output_shapes'].list.shape[0].dim[0].size = keep_node_num
-        self._drop_by_ids(node.attr['value'].tensor, 'string_val', drop_ids)
-      elif node.name == 'save/RestoreV2':
-        self._drop_by_ids(node.attr['_output_shapes'].list, 'shape', drop_ids)
-        self._drop_by_ids(node.attr['dtypes'].list, 'type', drop_ids)
-      elif node.name == 'save/restore_shard':
-        self._restore_shard_node = node
-
-  def clear_save_assign(self):
-    logging.info(
-        'update save/Assign, drop tensor_shapes, _output_shapes, related to embedding_weights'
-    )
-    # edit save/Assign
-    drop_save_assigns = []
-    all_kv_drop = []
-    for tid, node in enumerate(self._all_graph_nodes):
-      if not self._all_graph_node_flags[tid]:
-        continue
-      if node.op == 'Assign' and 'save/Assign' in node.name and \
-         'embedding_weights' in node.input[0]:
-        drop_save_assigns.append('^' + node.name)
-        self._all_graph_node_flags[tid] = False
-      elif 'embedding_weights/ConcatPartitions/concat' in node.name:
-        self._all_graph_node_flags[tid] = False
-      elif node.name.endswith('/embedding_weights') and node.op == 'Identity':
-        self._all_graph_node_flags[tid] = False
-      elif 'save/KvResourceImportV2' in node.name and node.op == 'KvResourceImportV2':
-        drop_save_assigns.append('^' + node.name)
-        self._all_graph_node_flags[tid] = False
-      elif 'KvResourceImportV2' in node.name:
-        self._all_graph_node_flags[tid] = False
-      elif 'save/Const' in node.name and node.op == 'Const':
-        if '_class' in node.attr and 'embedding_weights' in node.attr[
-            '_class'].list.s[0]:
-          self._all_graph_node_flags[tid] = False
-      elif 'ReadKvVariableOp' in node.name and node.op == 'ReadKvVariableOp':
-        all_kv_drop.append(node.name)
-        self._all_graph_node_flags[tid] = False
-      elif node.op == 'Assign' and 'save/Assign' in node.name:
-        # update node(save/Assign_[0-N])'s input[1] by the position of
-        #     node.input[0] in save/RestoreV2/tensor_names
-        # the outputs of save/RestoreV2 is connected to save/Assign
-        tmp_id = list(
-            self._restore_tensor_node.attr['value'].tensor.string_val).index(
-                node.input[0])
-        if tmp_id != 0:
-          tmp_input2 = 'save/RestoreV2:%d' % tmp_id
-        else:
-          tmp_input2 = 'save/RestoreV2'
-        if tmp_input2 != node.input[1]:
-          if self._verbose:
-            logging.info("update save/Assign[%s]'s input from %s to %s" %
-                         (node.name, node.input[1], tmp_input2))
-          node.input[1] = tmp_input2
-
-    # save/restore_all need save/restore_shard as input
-    # save/restore_shard needs save/Assign_[0-N] as input
-    # save/Assign_[0-N] needs save/RestoreV2 as input
-    for tmp_input in drop_save_assigns:
-      self._restore_shard_node.input.remove(tmp_input)
-      if self._verbose:
-        logging.info('drop restore_shard input: %s' % tmp_input)
-
-  def clear_save_v2(self):
-    """Clear SaveV2 ops.
-
-    save/Identity need [ save/MergeV2Checkpoints, save/control_dependency ]
-    as input. Save/MergeV2Checkpoints need [save/MergeV2Checkpoints/checkpoint_prefixes]
-    as input. Save/MergeV2Checkpoints/checkpoint_prefixes need [ save/ShardedFilename,
-    save/control_dependency ] as input. save/control_dependency need save/SaveV2 as input.
-    save/SaveV2 input: [ save/SaveV2/tensor_names, save/SaveV2/shape_and_slices ]
-    edit save/SaveV2  save/SaveV2/shape_and_slices save/SaveV2/tensor_names.
-    """
-    logging.info('update save/SaveV2 input shape, _output_shapes, tensor_shape')
-    save_drop_ids = []
-    for tid, node in enumerate(self._all_graph_nodes):
-      if not self._all_graph_node_flags[tid]:
-        continue
-      if node.name == 'save/SaveV2' and node.op == 'SaveV2':
-        for tmp_id, tmp_input in enumerate(node.input):
-          if '/embedding_weights' in tmp_input:
-            save_drop_ids.append(tmp_id)
-        diff_num = len(node.input) - len(node.attr['dtypes'].list.type)
-        self._drop_by_ids(node, 'input', save_drop_ids)
-        save_drop_ids = [x - diff_num for x in save_drop_ids]
-        self._drop_by_ids(node.attr['dtypes'].list, 'type', save_drop_ids)
-        if 'has_ev' in node.attr:
-          del node.attr['has_ev']
-    for node in self._all_graph_nodes:
-      if node.name == 'save/SaveV2/shape_and_slices' and node.op == 'Const':
-        # _output_shapes # size # string_val
-        node.attr['_output_shapes'].list.shape[0].dim[0].size -= len(
-            save_drop_ids)
-        node.attr['value'].tensor.tensor_shape.dim[0].size -= len(save_drop_ids)
-        self._drop_by_ids(node.attr['value'].tensor, 'string_val',
-                          save_drop_ids)
-      elif node.name == 'save/SaveV2/tensor_names':
-        # tensor_names may not have the same order as save/SaveV2/shape_and_slices
-        tmp_drop_ids = [
-            tmp_id for tmp_id, tmp_val in enumerate(
-                node.attr['value'].tensor.string_val)
-            if 'embedding_weights' in tmp_val
-        ]
-        # attr['value'].tensor.string_val  # tensor_shape  # size
-        assert len(save_drop_ids) == len(save_drop_ids)
-        node.attr['_output_shapes'].list.shape[0].dim[0].size -= len(
-            tmp_drop_ids)
-        node.attr['value'].tensor.tensor_shape.dim[0].size -= len(tmp_drop_ids)
-        self._drop_by_ids(node.attr['value'].tensor, 'string_val', tmp_drop_ids)
-
-  def clear_initialize(self):
-    """Clear initialization ops.
-
-    */read(Identity) depend on [*(VariableV2)]
-    */Assign depend on [*/Initializer/*, *(VariableV2)]
-    drop embedding_weights initialization nodes
-    */embedding_weights/part_x [,/Assign,/read]
-    */embedding_weights/part_1/Initializer/truncated_normal [,/shape,/mean,/stddev,/TruncatedNormal,/mul]
-    """
-    logging.info('Remove Initialization nodes for embedding_weights')
-    for tid, node in enumerate(self._all_graph_nodes):
-      if not self._all_graph_node_flags[tid]:
-        continue
-      if 'embedding_weights' in node.name and 'Initializer' in node.name:
-        self._all_graph_node_flags[tid] = False
-      elif 'embedding_weights' in node.name and 'Assign' in node.name:
-        self._all_graph_node_flags[tid] = False
-      elif 'embedding_weights' in node.name and node.op == 'VariableV2':
-        self._all_graph_node_flags[tid] = False
-      elif 'embedding_weights' in node.name and node.name.endswith(
-          '/read') and node.op == 'Identity':
-        self._all_graph_node_flags[tid] = False
-      elif 'embedding_weights' in node.name and node.op == 'Identity':
-        node_toks = node.name.split('/')
-        node_tok = node_toks[-1]
-        if 'embedding_weights_' in node_tok:
-          node_tok = node_tok[len('embedding_weights_'):]
-          try:
-            int(node_tok)
-            self._all_graph_node_flags[tid] = False
-          except Exception:
-            pass
-
-  def clear_embedding_variable(self):
-    # for pai embedding variable, we drop some special nodes
-    for tid, node in enumerate(self._all_graph_nodes):
-      if not self._all_graph_node_flags[tid]:
-        continue
-      if node.op in [
-          'ReadKvVariableOp', 'KvVarIsInitializedOp', 'KvVarHandleOp'
-      ]:
-        self._all_graph_node_flags[tid] = False
-
-  # there maybe some nodes depend on the dropped nodes, they are dropped as well
-  def drop_dependent_nodes(self):
-    drop_names = [
-        tmp_node.name
-        for tid, tmp_node in enumerate(self._all_graph_nodes)
-        if not self._all_graph_node_flags[tid]
-    ]
-    while True:
-      more_drop_names = []
-      for tid, tmp_node in enumerate(self._all_graph_nodes):
-        if not self._all_graph_node_flags[tid]:
-          continue
-        if len(tmp_node.input) > 0 and tmp_node.input[0] in drop_names:
-          logging.info('drop dependent node: %s depend on %s' %
-                       (tmp_node.name, tmp_node.input[0]))
-          self._all_graph_node_flags[tid] = False
-          more_drop_names.append(tmp_node.name)
-      drop_names = more_drop_names
-      if not drop_names:
-        break
-
-  def edit_graph(self):
-    # the main entrance
-    lookup_input_indices, lookup_input_values, lookup_input_shapes,\
-        lookup_input_weights, embed_dims, embed_names =\
-        self.find_lookup_inputs()
-
-    # add lookup op to the graph
-    self._meta_graph_def = self.add_lookup_op(lookup_input_indices,
-                                              lookup_input_values,
-                                              lookup_input_shapes,
-                                              lookup_input_weights, embed_dims,
-                                              embed_names)
-
-    self.clear_meta_graph_embeding(self._meta_graph_def)
-
-    self.clear_meta_collect(self._meta_graph_def)
-
-    self.init_graph_node_clear_flags()
-
-    self.remove_embedding_weights_and_update_lookup_outputs()
-
-    # save/RestoreV2
-    self.clear_save_restore()
-
-    # save/Assign
-    self.clear_save_assign()
-
-    # save/SaveV2
-    self.clear_save_v2()
-
-    self.clear_initialize()
-
-    self.clear_embedding_variable()
-
-    self.drop_dependent_nodes()
-
-    self._meta_graph_def.graph_def.ClearField('node')
-    self._meta_graph_def.graph_def.node.extend([
-        x for tid, x in enumerate(self._all_graph_nodes)
-        if self._all_graph_node_flags[tid]
-    ])
-
-    logging.info('old node number = %d' % self._old_node_num)
-    logging.info('node number = %d' % len(self._meta_graph_def.graph_def.node))
-
-
 if __name__ == '__main__':
   """Replace the default embedding_lookup ops with self defined embedding lookup ops.
 
@@ -599,21 +34,13 @@ def edit_graph(self):
   parser.add_argument(
       '--saved_model_dir', type=str, default=None, help='saved model dir')
   parser.add_argument('--output_dir', type=str, default=None, help='output dir')
-  parser.add_argument(
-      '--lookup_lib_path',
-      type=str,
-      default='libkv_lookup.so',
-      help='lookup library path')
   parser.add_argument(
       '--redis_url', type=str, default='127.0.0.1:6379', help='redis url')
   parser.add_argument(
       '--redis_passwd', type=str, default='', help='redis password')
   parser.add_argument('--time_out', type=int, default=1500, help='timeout')
   parser.add_argument(
-      '--test_data_path',
-      type=str,
-      default='data/test/rtp/xys_cxr_fg_sample_test2_with_lbl.txt',
-      help='test data path')
+      '--test_data_path', type=str, default='', help='test data path')
   parser.add_argument('--verbose', action='store_true', default=False)
 
   args = parser.parse_args()
@@ -625,30 +52,44 @@ def edit_graph(self):
 
   logging.info('output_dir: %s' % args.output_dir)
   logging.info('redis_url: %s' % args.redis_url)
-  logging.info('lookup_lib_path: %s' % args.lookup_lib_path)
-
-  if os.path.isdir(args.output_dir):
-    shutil.rmtree(args.output_dir)
-
-  meta_graph_editor = MetaGraphEditor(args.lookup_lib_path,
-                                      args.saved_model_dir, args.redis_url,
-                                      args.redis_passwd, args.time_out,
-                                      args.verbose)
+  lookup_lib_path = os.path.join(easy_rec.ops_dir, 'libkv_lookup.so')
+  logging.info('lookup_lib_path: %s' % lookup_lib_path)
+
+  if not file_exists(args.output_dir):
+    recursive_create_dir(args.output_dir)
+
+  meta_graph_editor = MetaGraphEditor(
+      lookup_lib_path,
+      args.saved_model_dir,
+      args.redis_url,
+      args.redis_passwd,
+      args.time_out,
+      meta_graph_def=None,
+      debug_dir=args.output_dir if args.verbose else '')
   meta_graph_editor.edit_graph()
 
-  if args.verbose:
-    with open('graph.txt', 'w') as fout:
-      fout.write(
-          text_format.MessageToString(
-              meta_graph_editor.graph_def, as_utf8=True))
-    with open('meta_graph.txt', 'w') as fout:
-      fout.write(
-          text_format.MessageToString(
-              meta_graph_editor._meta_graph_def, as_utf8=True))
+  meta_graph_version = meta_graph_editor.meta_graph_version
+  if meta_graph_version == '':
+    export_ts = [
+        x for x in args.saved_model_dir.split('/') if x != '' and x is not None
+    ]
+    meta_graph_version = export_ts[-1]
 
   # import edit graph
   tf.reset_default_graph()
   saver = tf.train.import_meta_graph(meta_graph_editor._meta_graph_def)
+
+  embed_name_to_id_file = os.path.join(args.output_dir, 'embed_name_to_ids.txt')
+  with GFile(embed_name_to_id_file, 'w') as fout:
+    for tmp_norm_name in meta_graph_editor._embed_name_to_ids:
+      fout.write(
+          '%s\t%s\n' %
+          (tmp_norm_name, meta_graph_editor._embed_name_to_ids[tmp_norm_name]))
+  tf.add_to_collection(
+      tf.GraphKeys.ASSET_FILEPATHS,
+      tf.constant(
+          embed_name_to_id_file, dtype=tf.string, name='embed_name_to_ids.txt'))
+
   graph = tf.get_default_graph()
   inputs = meta_graph_editor.signature_def.inputs
   inputs_map = {}
@@ -663,21 +104,23 @@ def edit_graph(self):
     outputs_map[name] = graph.get_tensor_by_name(tensor.name)
   with tf.Session() as sess:
     saver.restore(sess, args.saved_model_dir + '/variables/variables')
+    output_dir = os.path.join(args.output_dir, meta_graph_version)
     tf.saved_model.simple_save(
-        sess, args.output_dir, inputs=inputs_map, outputs=outputs_map)
+        sess, output_dir, inputs=inputs_map, outputs=outputs_map)
     # the meta_graph_version could not be passed via existing interfaces
     # so we could only write it by the raw methods
     saved_model = saved_model_pb2.SavedModel()
-    with open(os.path.join(args.output_dir, 'saved_model.pb'), 'rb') as fin:
+    with GFile(os.path.join(output_dir, 'saved_model.pb'), 'rb') as fin:
       saved_model.ParseFromString(fin.read())
+
     saved_model.meta_graphs[
         0].meta_info_def.meta_graph_version = meta_graph_editor.meta_graph_version
-    with open(os.path.join(args.output_dir, 'saved_model.pb'), 'wb') as fout:
+    with GFile(os.path.join(output_dir, 'saved_model.pb'), 'wb') as fout:
       fout.write(saved_model.SerializeToString())
 
-    logging.info('save output to %s' % args.output_dir)
-    if args.test_data_path != '':
-      with open(args.test_data_path, 'r') as fin:
+    logging.info('save output to %s' % output_dir)
+    if args.test_data_path:
+      with GFile(args.test_data_path, 'r') as fin:
         feature_vals = []
         for line_str in fin:
           line_str = line_str.strip()
diff --git a/easy_rec/python/tools/predict_and_chk.py b/easy_rec/python/tools/predict_and_chk.py
index d3e8ed2df..8cc0f70f1 100644
--- a/easy_rec/python/tools/predict_and_chk.py
+++ b/easy_rec/python/tools/predict_and_chk.py
@@ -24,7 +24,15 @@
   parser.add_argument(
       '--cmp_key', type=str, default='probs', help='compare key')
   parser.add_argument('--tol', type=float, default=1e-5, help='tolerance')
-  parser.add_argument('--separator', type=str, default='', help='separator')
+  parser.add_argument(
+      '--label_id',
+      nargs='*',
+      type=int,
+      help='the label column, which is to be excluded')
+  parser.add_argument(
+      '--separator', type=str, default='', help='separator between features')
+  parser.add_argument(
+      '--rtp_separator', type=str, default='', help='separator')
   args = parser.parse_args()
 
   if not args.saved_model_dir:
@@ -35,6 +43,9 @@
     logging.error('input_path is not set')
     sys.exit(1)
 
+  if args.label_id is None:
+    args.label_id = []
+
   logging.info('input_path: ' + args.input_path)
   logging.info('save_path: ' + args.save_path)
   logging.info('separator: ' + args.separator)
@@ -44,8 +55,14 @@
     batch_input = []
     for line_str in fin:
       line_str = line_str.strip()
-      line_tok = line_str.split(args.separator)
+      line_tok = line_str.split(args.rtp_separator)
       feature = line_tok[-1]
+      feature = [
+          x for fid, x in enumerate(feature.split(args.separator))
+          if fid not in args.label_id
+      ]
+      if len(predictor.input_names) == 1:
+        feature = args.separator.join(feature)
       batch_input.append(feature)
     output = predictor.predict(batch_input)
 
diff --git a/easy_rec/python/tools/test_saved_model.py b/easy_rec/python/tools/test_saved_model.py
index 10fc3d3c0..35889a668 100644
--- a/easy_rec/python/tools/test_saved_model.py
+++ b/easy_rec/python/tools/test_saved_model.py
@@ -1,32 +1,43 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import argparse
+import json
 import logging
+import os
 
+import numpy as np
 import tensorflow as tf
-from tensorflow.python.saved_model import signature_constants
+
+import easy_rec
+from easy_rec.python.inference.predictor import Predictor
 
 logging.basicConfig(
     format='[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d : %(message)s',
     level=logging.INFO)
 
-lookup_lib_path = 'libs/libkv_lookup.so'
-lookup_op = tf.load_op_library(lookup_lib_path)
+lookup_op_path = os.path.join(easy_rec.ops_dir, 'libkv_lookup.so')
+lookup_op = tf.load_op_library(lookup_op_path)
 
 if __name__ == '__main__':
   """Test saved model, an example:
 
   python -m easy_rec.python.tools.test_saved_model
       --saved_model_dir after_edit_save
-      --test_data_path data/test/rtp/xys_cxr_fg_sample_test2_with_lbl.txt
+      --input_path data/test/rtp/xys_cxr_fg_sample_test2_with_lbl.txt
       --with_lbl
   """
 
   parser = argparse.ArgumentParser()
   parser.add_argument(
       '--saved_model_dir', type=str, default=None, help='saved model dir')
+  parser.add_argument('--input_path', type=str, default=None, help='output dir')
+  parser.add_argument('--save_path', type=str, default=None, help='save path')
+  parser.add_argument('--separator', type=str, default=',', help='separator')
+  parser.add_argument(
+      '--cmp_res_path', type=str, default=None, help='compare result path')
   parser.add_argument(
-      '--test_data_path', type=str, default=None, help='output dir')
+      '--cmp_key', type=str, default='probs', help='compare key')
+  parser.add_argument('--tol', type=float, default=1e-5, help='tolerance')
   parser.add_argument(
       '--with_lbl',
       action='store_true',
@@ -35,35 +46,35 @@
   args = parser.parse_args()
 
   logging.info('saved_model_dir: %s' % args.saved_model_dir)
-  logging.info('test_data_path: %s' % args.test_data_path)
+  logging.info('test_data_path: %s' % args.input_path)
   logging.info('test_data has lbl: %s' % args.with_lbl)
 
-  with tf.Session() as sess:
-    meta_graph_def = tf.saved_model.loader.load(sess, ['serve'],
-                                                args.saved_model_dir)
-    signature_def = meta_graph_def.signature_def[
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-    graph = tf.get_default_graph()
-    inputs = signature_def.inputs
-    inputs_map = {}
-    for name, tensor in inputs.items():
-      logging.info('inputs: %s => %s' % (name, tensor.name))
-      inputs_map[name] = graph.get_tensor_by_name(tensor.name)
-    outputs = signature_def.outputs
-    outputs_map = {}
-    for name, tensor in outputs.items():
-      logging.info('outputs: %s => %s' % (name, tensor.name))
-      outputs_map[name] = graph.get_tensor_by_name(tensor.name)
-    with open(args.test_data_path, 'r') as fin:
-      feature_vals = []
-      for line_str in fin:
+  predictor = Predictor(args.saved_model_dir)
+  with open(args.input_path, 'r') as fin:
+    feature_vals = []
+    for line_str in fin:
+      line_str = line_str.strip()
+      line_toks = line_str.split(args.separator)
+      if args.with_lbl:
+        line_toks = line_toks[1:]
+      feature_vals.append(args.separator.join(line_toks))
+  output = predictor.predict(feature_vals, batch_size=4096)
+
+  if args.save_path:
+    with open(args.save_path, 'w') as fout:
+      for one in output:
+        fout.write(str(one) + '\n')
+
+  if args.cmp_res_path:
+    logging.info('compare result path: ' + args.cmp_res_path)
+    logging.info('compare key: ' + args.cmp_key)
+    logging.info('tolerance: ' + str(args.tol))
+    with open(args.cmp_res_path, 'r') as fin:
+      for line_id, line_str in enumerate(fin):
         line_str = line_str.strip()
-        line_toks = line_str.split('')
-        if args.with_lbl:
-          line_toks = line_toks[1:]
-        feature_vals.append(''.join(line_toks))
-        if len(feature_vals) >= 128:
-          break
-      out_vals = sess.run(
-          outputs_map, feed_dict={inputs_map['features']: feature_vals})
-      logging.info(out_vals)
+        line_pred = json.loads(line_str)
+        assert np.abs(
+            line_pred[args.cmp_key] -
+            output[line_id][args.cmp_key]) < args.tol, 'line[%d]: %.8f' % (
+                line_id,
+                np.abs(line_pred[args.cmp_key] - output[line_id][args.cmp_key]))
diff --git a/easy_rec/python/train_eval.py b/easy_rec/python/train_eval.py
index 4ad78d81f..ab824cc1f 100644
--- a/easy_rec/python/train_eval.py
+++ b/easy_rec/python/train_eval.py
@@ -2,8 +2,10 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import json
 import logging
+import os
 
 import tensorflow as tf
+from tensorflow.python.lib.io import file_io
 
 from easy_rec.python.main import _train_and_evaluate_impl
 from easy_rec.python.utils import config_util
@@ -27,12 +29,24 @@
     'hpo_metric_save_path', None, help='hyperparameter save metric path')
 tf.app.flags.DEFINE_string(
     'model_dir', None, help='will update the model_dir in pipeline_config')
+tf.app.flags.DEFINE_multi_string(
+    'train_input_path', None, help='train data input path')
+tf.app.flags.DEFINE_multi_string(
+    'eval_input_path', None, help='eval data input path')
+tf.app.flags.DEFINE_string(
+    'fine_tune_checkpoint',
+    None,
+    help='will update the train_config.fine_tune_checkpoint in pipeline_config')
 tf.app.flags.DEFINE_string(
     'edit_config_json',
     None,
     help='edit pipeline config str, example: {"model_dir":"experiments/",'
     '"feature_config[0].boundaries":[4,5,6,7]}')
-
+tf.app.flags.DEFINE_bool(
+    'ignore_finetune_ckpt_error', False,
+    'During incremental training, ignore the problem of missing fine_tune_checkpoint files'
+)
+tf.app.flags.DEFINE_string('odps_config', None, help='odps config path')
 FLAGS = tf.app.flags.FLAGS
 
 
@@ -43,6 +57,24 @@ def main(argv):
     if FLAGS.model_dir:
       pipeline_config.model_dir = FLAGS.model_dir
       logging.info('update model_dir to %s' % pipeline_config.model_dir)
+    if FLAGS.train_input_path:
+      pipeline_config.train_input_path = ','.join(FLAGS.train_input_path)
+      logging.info('update train_input_path to %s' %
+                   pipeline_config.train_input_path)
+    if FLAGS.eval_input_path:
+      pipeline_config.eval_input_path = ','.join(FLAGS.eval_input_path)
+      logging.info('update eval_input_path to %s' %
+                   pipeline_config.eval_input_path)
+    if FLAGS.fine_tune_checkpoint:
+      if file_io.file_exists(FLAGS.fine_tune_checkpoint):
+        pipeline_config.train_config.fine_tune_checkpoint = FLAGS.fine_tune_checkpoint
+        logging.info('update fine_tune_checkpoint to %s' %
+                     pipeline_config.train_config.fine_tune_checkpoint)
+      else:
+        assert FLAGS.ignore_finetune_ckpt_error, 'fine_tune_checkpoint(%s) is not exists.' % FLAGS.fine_tune_checkpoint
+
+    if FLAGS.odps_config:
+      os.environ['ODPS_CONFIG_FILE_PATH'] = FLAGS.odps_config
     if FLAGS.hpo_param_path:
       with tf.gfile.GFile(FLAGS.hpo_param_path, 'r') as fin:
         hpo_config = json.load(fin)
@@ -56,6 +88,14 @@ def main(argv):
           has_evaluator=False)
     elif FLAGS.edit_config_json:
       config_json = json.loads(FLAGS.edit_config_json)
+      fine_tune_checkpoint = config_json.get(
+          'train_config.fine_tune_checkpoint', None)
+      if fine_tune_checkpoint:
+        if not file_io.file_exists(fine_tune_checkpoint):
+          assert FLAGS.ignore_finetune_ckpt_error, 'fine_tune_checkpoint(%s) is not exists.' % fine_tune_checkpoint
+          config_json.pop('train_config.fine_tune_checkpoint', None)
+          logging.info('fine_tune_checkpoint(%s) is not exists. Drop it.' %
+                       fine_tune_checkpoint)
       config_util.edit_config(pipeline_config, config_json)
       config_util.auto_expand_share_feature_configs(pipeline_config)
       _train_and_evaluate_impl(pipeline_config, FLAGS.continue_train)
diff --git a/easy_rec/python/utils/config_util.py b/easy_rec/python/utils/config_util.py
index 85de312f7..642456f16 100644
--- a/easy_rec/python/utils/config_util.py
+++ b/easy_rec/python/utils/config_util.py
@@ -9,20 +9,14 @@
 import logging
 import os
 import re
-import sys
 
-import six
 import tensorflow as tf
 from google.protobuf import json_format
 from google.protobuf import text_format
 from tensorflow.python.lib.io import file_io
 
 from easy_rec.python.protos import pipeline_pb2
-from easy_rec.python.protos.dataset_pb2 import DatasetConfig
 from easy_rec.python.protos.feature_config_pb2 import FeatureConfig
-from easy_rec.python.protos.feature_config_pb2 import FeatureGroupConfig
-from easy_rec.python.protos.feature_config_pb2 import WideOrDeep
-from easy_rec.python.protos.pipeline_pb2 import EasyRecConfig
 
 if tf.__version__ >= '2.0':
   tf = tf.compat.v1
@@ -55,7 +49,7 @@ def get_configs_from_pipeline_file(pipeline_config_path, auto_expand=True):
     elif pipeline_config_path.endswith('.json'):
       json_format.Parse(config_str, pipeline_config)
     else:
-      assert 'invalid file format(%s), currently support formats: .config(prototxt) .json' % pipeline_config_path
+      assert False, 'invalid file format(%s), currently support formats: .config(prototxt) .json' % pipeline_config_path
 
   if auto_expand:
     return auto_expand_share_feature_configs(pipeline_config)
@@ -210,6 +204,7 @@ def _get_attr(obj, attr, only_last=False):
             obj_id = int(cond)
             obj = update_objs[obj_id]
             paths.append((obj, update_objs, None, obj_id))
+            nobjs.append(obj)
             continue
           except ValueError:
             pass
@@ -237,9 +232,16 @@ def _get_attr(obj, attr, only_last=False):
           assert cond_val is not None, 'invalid cond: %s' % cond
 
           for tid, update_obj in enumerate(update_objs):
-            tmp, _, _, _ = _get_attr(update_obj, cond_key, only_last=True)
+            tmp, tmp_parent, _, _ = _get_attr(
+                update_obj, cond_key, only_last=True)
             if type(cond_val) != type(tmp):
-              cond_val = type(tmp)(cond_val)
+              try:
+                cond_val = type(tmp)(cond_val)
+              except ValueError:
+                # to support for enumerations like IdFeature
+                assert isinstance(tmp, int)
+                cond_val = getattr(tmp_parent, cond_val)
+                assert isinstance(cond_val, int)
             if op_func(tmp, cond_val):
               obj_id = tid
               paths.append((update_obj, update_objs, None, obj_id))
@@ -266,17 +268,19 @@ def _get_attr(obj, attr, only_last=False):
       tmp_paths = _get_attr(update_obj, param_key)
       # update a set of objs
       for tmp_val, tmp_obj, tmp_name, tmp_id in tmp_paths:
-        basic_types = [int, str, float, bool]
-        if six.PY2:
-          basic_types.append(unicode)  # noqa: F821
+        basic_types = [int, str, float, bool, type(u'')]
         if type(tmp_val) in basic_types:
           # simple type cast
-          tmp_val = type(tmp_val)(param_val)
-          if tmp_name is None:
-            tmp_obj[tmp_id] = tmp_val
-          else:
-            setattr(tmp_obj, tmp_name, tmp_val)
-        elif 'RepeatedScalarContainer' in str(type(tmp_val)):
+          try:
+            tmp_val = type(tmp_val)(param_val)
+            if tmp_name is None:
+              tmp_obj[tmp_id] = tmp_val
+            else:
+              setattr(tmp_obj, tmp_name, tmp_val)
+          except ValueError:
+            # for enumeration types
+            text_format.Merge('%s:%s' % (tmp_name, param_val), tmp_obj)
+        elif 'Scalar' in str(type(tmp_val)) and 'ClearField' in dir(tmp_obj):
           tmp_obj.ClearField(tmp_name)
           text_format.Parse('%s:%s' % (tmp_name, param_val), tmp_obj)
         else:
@@ -306,222 +310,31 @@ def save_message(protobuf_message, filename):
     f.write(config_text)
 
 
-def convert_rtp_fg(rtp_fg,
-                   embedding_dim=16,
-                   batch_size=1024,
-                   label_fields=[],
-                   num_steps=10,
-                   model_type='',
-                   separator='\002',
-                   incol_separator='\003',
-                   train_input_path=None,
-                   eval_input_path=None,
-                   selected_cols=''):
-  pipeline_config = EasyRecConfig()
-  with tf.gfile.GFile(rtp_fg, 'r') as fin:
-    rtp_fg = json.load(fin)
-  for tmp_lbl in label_fields:
-    input_field = DatasetConfig.Field()
-    input_field.input_name = tmp_lbl
-    input_field.input_type = DatasetConfig.INT32
-    input_field.default_val = '0'
-    pipeline_config.data_config.input_fields.append(input_field)
-
-  pipeline_config.data_config.separator = separator
-  if selected_cols:
-    pipeline_config.data_config.selected_cols = selected_cols
-  if train_input_path is not None:
-    pipeline_config.train_input_path = train_input_path
-  if eval_input_path is not None:
-    pipeline_config.eval_input_path = eval_input_path
-  pipeline_config.model_dir = 'experiments/rtp_fg_demo'
-
-  rtp_features = rtp_fg['features']
-  for feature in rtp_features:
+def add_boundaries_to_config(pipeline_config, tables):
+  import common_io
+
+  feature_boundaries_info = {}
+  reader = common_io.table.TableReader(tables, selected_cols='feature,json')
+  while True:
     try:
-      feature_type = feature['feature_type']
-      feature_name = feature['feature_name']
-      feature_config = FeatureConfig()
-      feature_config.input_names.append(feature_name)
-      feature_config.separator = incol_separator
-      input_field = DatasetConfig.Field()
-      input_field.input_name = feature_name
-      if feature_type == 'id_feature':
-        feature_config.feature_type = feature_config.TagFeature
-        feature_config.embedding_dim = embedding_dim
-        feature_config.hash_bucket_size = feature['hash_bucket_size']
-      elif feature_type == 'lookup_feature':
-        need_discrete = feature.get('needDiscrete', True)
-        need_key = feature.get('needKey', True)  # noqa: F841
-        if not need_discrete:
-          feature_config.feature_type = feature_config.RawFeature
-          input_field.input_type = DatasetConfig.DOUBLE
-          input_field.default_val = '0.0'
-        else:
-          feature_config.feature_type = feature_config.TagFeature
-          feature_config.embedding_dim = embedding_dim
-          feature_config.hash_bucket_size = feature['hash_bucket_size']
-      elif feature_type == 'raw_feature':
-        feature_config.feature_type = feature_config.RawFeature
-        input_field.input_type = DatasetConfig.DOUBLE
-        input_field.default_val = '0.0'
-      elif feature_type == 'match_feature':
-        feature_config.input_names.append(feature_name + '_wgt')
-        feature_config.feature_type = feature_config.TagFeature
-        feature_config.embedding_dim = embedding_dim
-        feature_config.hash_bucket_size = feature['hash_bucket_size']
-      elif feature_type == 'combo_feature':
-        feature_config.feature_type = feature_config.TagFeature
-        feature_config.hash_bucket_size = feature['hash_bucket_size']
-        feature_config.embedding_dim = embedding_dim
-      elif feature_type == 'overlap_feature':
-        if feature['method'] in ['common_word_divided', 'diff_word_divided']:
-          feature_config.feature_type = feature_config.TagFeature
-        else:
-          feature_config.feature_type = feature_config.IdFeature
-        feature_config.hash_bucket_size = feature['hash_bucket_size']
-        feature_config.embedding_dim = embedding_dim
-      elif feature_type == 'expr_feature':
-        feature_config.feature_type = feature_config.RawFeature
-        input_field.input_type = DatasetConfig.DOUBLE
-        input_field.default_val = '0.0'
-      else:
-        assert 'unknown feature type %s, currently not supported' % feature_type
-      if 'shared_name' in feature:
-        feature_config.embedding_name = feature['shared_name']
-      pipeline_config.feature_configs.append(feature_config)
-      pipeline_config.data_config.input_fields.append(input_field)
-    except Exception as ex:
-      print('Exception: %s %s' % (type(ex), str(ex)))
-      print(feature)
-      sys.exit(1)
-  pipeline_config.data_config.input_type = pipeline_config.data_config.RTPInput
-  pipeline_config.data_config.batch_size = batch_size
-  pipeline_config.data_config.rtp_separator = ';'
-  pipeline_config.data_config.label_fields.extend(label_fields)
-  pipeline_config.train_config.num_steps = num_steps
-
-  if model_type:
-    train_config_str = """
-    train_config {
-      log_step_count_steps: 200
-      optimizer_config: {
-        adam_optimizer: {
-          learning_rate: {
-            exponential_decay_learning_rate {
-              initial_learning_rate: 0.0001
-              decay_steps: 100000
-              decay_factor: 0.5
-              min_learning_rate: 0.0000001
-            }
-          }
-        }
-        use_moving_average: false
-      }
-
-      sync_replicas: true
-    }
-    """
-    text_format.Merge(train_config_str, pipeline_config)
-
-  if model_type == 'deepfm':
-    pipeline_config.model_config.model_class = 'DeepFM'
-    wide_group = FeatureGroupConfig()
-    wide_group.group_name = 'wide'
-    wide_group.wide_deep = WideOrDeep.WIDE
-    for feature in rtp_features:
-      feature_name = feature['feature_name']
-      wide_group.feature_names.append(feature_name)
-    pipeline_config.model_config.feature_groups.append(wide_group)
-    deep_group = FeatureGroupConfig()
-    deep_group.CopyFrom(wide_group)
-    deep_group.group_name = 'deep'
-    deep_group.wide_deep = WideOrDeep.DEEP
-    pipeline_config.model_config.feature_groups.append(deep_group)
-    deepfm_config_str = """
-    deepfm {
-      dnn {
-        hidden_units: [128, 64, 32]
-      }
-      final_dnn {
-        hidden_units: [128, 64]
-      }
-      wide_output_dim: 32
-      l2_regularization: 1e-5
-    }
-    """
-    text_format.Merge(deepfm_config_str, pipeline_config.model_config)
-    pipeline_config.model_config.embedding_regularization = 1e-5
-  elif model_type == 'multi_tower':
-    pipeline_config.model_config.model_class = 'MultiTower'
-
-    feature_groups = {}
-    group_map = {
-        'u': 'user',
-        'i': 'item',
-        'ctx': 'combo',
-        'q': 'combo',
-        'comb': 'combo'
-    }
-    for feature in rtp_features:
-      feature_name = feature['feature_name'].strip()
-      group_name = ''
-      if 'group' in feature:
-        group_name = feature['group']
-      else:
-        toks = feature_name.split('_')
-        group_name = toks[0]
-        if group_name in group_map:
-          group_name = group_map[group_name]
-      if group_name in feature_groups:
-        feature_groups[group_name].append(feature_name)
-      else:
-        feature_groups[group_name] = [feature_name]
-
-    logging.info(
-        'if group is specified, group will be used as feature group name; '
-        'otherwise, the prefix of feature_name in fg.json is used as feature group name'
-    )
-    logging.info('prefix map: %s' % str(group_map))
-    for group_name in feature_groups:
-      logging.info('add group = %s' % group_name)
-      group = FeatureGroupConfig()
-      group.group_name = group_name
-      for fea_name in feature_groups[group_name]:
-        group.feature_names.append(fea_name)
-      group.wide_deep = WideOrDeep.DEEP
-      pipeline_config.model_config.feature_groups.append(group)
-
-    multi_tower_config_str = '  multi_tower {\n'
-    for group_name in feature_groups:
-      multi_tower_config_str += """
-      towers {
-        input: "%s"
-        dnn {
-          hidden_units: [256, 192, 128]
-        }
-      }
-      """ % group_name
-
-    multi_tower_config_str = multi_tower_config_str + """
-      final_dnn {
-        hidden_units: [192, 128, 64]
-      }
-      l2_regularization: 1e-4
-    }
-    """
-    text_format.Merge(multi_tower_config_str, pipeline_config.model_config)
-    pipeline_config.model_config.embedding_regularization = 1e-5
-    text_format.Merge("""
-    metrics_set {
-      auc {}
-    }
-    """, pipeline_config.eval_config)
-
-    text_format.Merge(
-        """
-      export_config {
-        multi_placeholder: false
-      }
-    """, pipeline_config)
-  return pipeline_config
+      record = reader.read()
+      raw_info = json.loads(record[0][1])
+      bin_info = []
+      for info in raw_info['bin']['norm'][:-1]:
+        split_point = float(info['value'].split(',')[1][:-1])
+        bin_info.append(split_point)
+      feature_boundaries_info[record[0][0]] = bin_info
+    except common_io.exception.OutOfRangeException:
+      reader.close()
+      break
+
+  logging.info('feature boundaries: %s' % feature_boundaries_info)
+
+  for feature_config in pipeline_config.feature_configs:
+    feature_name = feature_config.input_names[0]
+    if feature_name in feature_boundaries_info:
+      feature_config.feature_type = feature_config.RawFeature
+      feature_config.hash_bucket_size = 0
+      feature_config.ClearField('boundaries')
+      feature_config.boundaries.extend(feature_boundaries_info[feature_name])
+      logging.info('edited %s' % feature_name)
diff --git a/easy_rec/python/utils/constant.py b/easy_rec/python/utils/constant.py
new file mode 100644
index 000000000..9df831a89
--- /dev/null
+++ b/easy_rec/python/utils/constant.py
@@ -0,0 +1,4 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+SAMPLE_WEIGHT = 'SAMPLE_WEIGHT'
diff --git a/easy_rec/python/utils/convert_rtp_fg.py b/easy_rec/python/utils/convert_rtp_fg.py
new file mode 100644
index 000000000..98dcede72
--- /dev/null
+++ b/easy_rec/python/utils/convert_rtp_fg.py
@@ -0,0 +1,513 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import logging
+import sys
+
+import tensorflow as tf
+from google.protobuf import text_format
+
+from easy_rec.python.protos.dataset_pb2 import DatasetConfig
+from easy_rec.python.protos.feature_config_pb2 import FeatureConfig
+from easy_rec.python.protos.feature_config_pb2 import FeatureGroupConfig
+from easy_rec.python.protos.feature_config_pb2 import WideOrDeep
+from easy_rec.python.protos.pipeline_pb2 import EasyRecConfig
+from easy_rec.python.utils import config_util
+
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+
+
+def _gen_raw_config(feature, input_field, feature_config, is_multi,
+                    curr_embed_dim):
+  if 'bucketize_boundaries' in feature:
+    if is_multi:
+      input_field.input_type = DatasetConfig.STRING
+      feature_config.feature_type = feature_config.TagFeature
+    else:
+      input_field.input_type = DatasetConfig.INT32
+      feature_config.feature_type = feature_config.IdFeature
+    feature_config.num_buckets = len(
+        feature['bucketize_boundaries'].split(',')) + 1
+    feature_config.embedding_dim = curr_embed_dim
+  else:
+    feature_config.feature_type = feature_config.RawFeature
+    input_field.default_val = feature.get('default_value', '0.0')
+    raw_input_dim = feature.get('value_dimension', 1)
+    if raw_input_dim > 1:
+      feature_config.raw_input_dim = raw_input_dim
+      input_field.input_type = DatasetConfig.STRING
+    else:
+      input_field.input_type = DatasetConfig.DOUBLE
+    if 'boundaries' in feature:
+      feature_config.boundaries.extend(feature['boundaries'])
+      feature_config.embedding_dim = curr_embed_dim
+
+
+def _set_hash_bucket(feature, feature_config, input_field):
+  if 'max_partitions' in feature:
+    feature_config.max_partitions = feature['max_partitions']
+  if 'hash_bucket_size' in feature:
+    feature_config.hash_bucket_size = feature['hash_bucket_size']
+    if feature_config.hash_bucket_size > 10000000:
+      if 'max_partitions' not in feature:
+        logging.error(
+            'it is suggested to set max_partitions > 1 for large hash buckets[%s]'
+            % feature['feature_name'])
+        sys.exit(1)
+  elif 'vocab_file' in feature:
+    feature_config.vocab_file = feature['vocab_file']
+  elif 'vocab_list' in feature:
+    feature_config.vocab_list = feature['vocab_list']
+  elif 'num_buckets' in feature:
+    feature_config.num_buckets = feature['num_buckets']
+    input_field.default_val = feature.get('default_value', '0')
+  else:
+    assert False, 'one of hash_bucket_size,vocab_file,vocab_list,num_buckets must be set'
+
+
+def convert_rtp_fg(rtp_fg,
+                   embedding_dim=16,
+                   batch_size=1024,
+                   label_fields=[],
+                   num_steps=10,
+                   model_type='',
+                   separator='\002',
+                   incol_separator='\003',
+                   train_input_path=None,
+                   eval_input_path=None,
+                   selected_cols='',
+                   input_type='OdpsRTPInput',
+                   is_async=False):
+  with tf.gfile.GFile(rtp_fg, 'r') as fin:
+    rtp_fg = json.load(fin)
+
+  model_dir = rtp_fg.get('model_dir', 'experiments/rtp_fg_demo')
+  num_steps = rtp_fg.get('num_steps', num_steps)
+  model_type = rtp_fg.get('model_type', model_type)
+  embedding_dim = rtp_fg.get('embedding_dim', embedding_dim)
+  label_fields = rtp_fg.get('label_fields', label_fields)
+  model_path = rtp_fg.get('model_path', '')
+  edit_config_json = rtp_fg.get('edit_config_json', None)
+
+  logging.info('model_dir = %s' % model_dir)
+  logging.info('num_steps = %d' % num_steps)
+  logging.info('model_type = %s' % model_type)
+  logging.info('embedding_dim = %s' % embedding_dim)
+  logging.info('label_fields = %s' % ','.join(label_fields))
+  logging.info('model_path = %s' % model_path)
+  logging.info('edit_config_json = %s' % edit_config_json)
+
+  pipeline_config = EasyRecConfig()
+
+  for tmp_lbl in label_fields:
+    input_field = DatasetConfig.Field()
+    input_field.input_name = tmp_lbl
+    input_field.input_type = DatasetConfig.INT32
+    input_field.default_val = '0'
+    pipeline_config.data_config.input_fields.append(input_field)
+
+  pipeline_config.data_config.separator = separator
+  if selected_cols:
+    pipeline_config.data_config.selected_cols = selected_cols
+  if train_input_path is not None:
+    pipeline_config.train_input_path = train_input_path
+  if eval_input_path is not None:
+    pipeline_config.eval_input_path = eval_input_path
+
+  pipeline_config.model_dir = model_dir
+
+  rtp_features = rtp_fg['features']
+  for feature in rtp_features:
+    try:
+      feature_type = feature['feature_type']
+      feature_name = feature['feature_name']
+      feature_config = FeatureConfig()
+      feature_config.input_names.append(feature_name)
+      feature_config.separator = incol_separator
+      input_field = DatasetConfig.Field()
+      input_field.input_name = feature_name
+      curr_embed_dim = feature.get('embedding_dimension',
+                                   feature.get('embedding_dim', embedding_dim))
+      curr_combiner = feature.get('combiner', 'mean')
+      if feature.get('is_cache', False):
+        logging.info('will cache %s' % feature_name)
+        feature_config.is_cache = True
+      is_multi = feature.get('is_multi', False)
+      if feature_type == 'id_feature':
+        if is_multi:
+          feature_config.feature_type = feature_config.TagFeature
+        else:
+          feature_config.feature_type = feature_config.IdFeature
+        feature_config.embedding_dim = curr_embed_dim
+        _set_hash_bucket(feature, feature_config, input_field)
+        feature_config.combiner = curr_combiner
+      elif feature_type == 'lookup_feature':
+        need_discrete = feature.get('needDiscrete', True)
+        if not need_discrete:
+          _gen_raw_config(feature, input_field, feature_config, is_multi,
+                          curr_embed_dim)
+        else:
+          if is_multi:
+            feature_config.feature_type = feature_config.TagFeature
+            if feature_config.get('needWeighting', False):
+              feature_config.kv_separator = ''
+          else:
+            feature_config.feature_type = feature_config.IdFeature
+          feature_config.embedding_dim = curr_embed_dim
+          _set_hash_bucket(feature, feature_config, input_field)
+          feature_config.combiner = curr_combiner
+      elif feature_type == 'raw_feature':
+        _gen_raw_config(feature, input_field, feature_config, is_multi,
+                        curr_embed_dim)
+      elif feature_type == 'match_feature':
+        need_discrete = feature.get('needDiscrete', True)
+        if feature.get('matchType', '') == 'multihit':
+          is_multi = True
+        if need_discrete:
+          if is_multi:
+            feature_config.feature_type = feature_config.TagFeature
+            if feature_config.get('needWeighting', False):
+              feature_config.kv_separator = ''
+          else:
+            feature_config.feature_type = feature_config.IdFeature
+          feature_config.embedding_dim = curr_embed_dim
+          _set_hash_bucket(feature, feature_config, input_field)
+          feature_config.combiner = curr_combiner
+        else:
+          assert 'bucketize_boundaries' not in feature
+          _gen_raw_config(feature, input_field, feature_config, is_multi,
+                          curr_embed_dim)
+      elif feature_type == 'combo_feature':
+        feature_config.feature_type = feature_config.TagFeature
+        _set_hash_bucket(feature, feature_config, input_field)
+        feature_config.embedding_dim = curr_embed_dim
+        feature_config.combiner = curr_combiner
+      elif feature_type == 'overlap_feature':
+        if feature['method'] in ['common_word_divided', 'diff_word_divided']:
+          feature_config.feature_type = feature_config.TagFeature
+        else:
+          feature_config.feature_type = feature_config.IdFeature
+        _set_hash_bucket(feature, feature_config, input_field)
+        feature_config.embedding_dim = curr_embed_dim
+        feature_config.combiner = curr_combiner
+      else:
+        assert 'unknown feature type %s, currently not supported' % feature_type
+      if 'shared_name' in feature:
+        feature_config.embedding_name = feature['shared_name']
+      pipeline_config.feature_configs.append(feature_config)
+      pipeline_config.data_config.input_fields.append(input_field)
+    except Exception as ex:
+      print('Exception: %s %s' % (type(ex), str(ex)))
+      print(feature)
+      sys.exit(1)
+  pipeline_config.data_config.batch_size = batch_size
+  pipeline_config.data_config.rtp_separator = ';'
+  pipeline_config.data_config.label_fields.extend(label_fields)
+
+  text_format.Merge('input_type: %s' % input_type, pipeline_config.data_config)
+
+  if model_path:
+    model_type = None
+    with tf.gfile.GFile(model_path, 'r') as fin:
+      model_config = fin.read()
+      text_format.Merge(model_config, pipeline_config)
+
+  if not pipeline_config.HasField('train_config'):
+    train_config_str = """
+    train_config {
+      log_step_count_steps: 200
+      optimizer_config: {
+        %s: {
+          learning_rate: {
+            exponential_decay_learning_rate {
+              initial_learning_rate: 0.0001
+              decay_steps: 100000
+              decay_factor: 0.5
+              min_learning_rate: 0.0000001
+            }
+          }
+        }
+        use_moving_average: false
+      }
+
+      sync_replicas: %s
+    }
+    """ % ('adam_optimizer' if not is_async else 'adam_async_optimizer',
+           'true' if not is_async else 'false')
+    text_format.Merge(train_config_str, pipeline_config)
+
+  pipeline_config.train_config.num_steps = num_steps
+
+  if model_type == 'deepfm':
+    pipeline_config.model_config.model_class = 'DeepFM'
+    wide_group = FeatureGroupConfig()
+    wide_group.group_name = 'wide'
+    wide_group.wide_deep = WideOrDeep.WIDE
+    for feature in rtp_features:
+      feature_name = feature['feature_name']
+      wide_group.feature_names.append(feature_name)
+    pipeline_config.model_config.feature_groups.append(wide_group)
+    deep_group = FeatureGroupConfig()
+    deep_group.CopyFrom(wide_group)
+    deep_group.group_name = 'deep'
+    deep_group.wide_deep = WideOrDeep.DEEP
+    pipeline_config.model_config.feature_groups.append(deep_group)
+    deepfm_config_str = """
+    deepfm {
+      dnn {
+        hidden_units: [128, 64, 32]
+      }
+      final_dnn {
+        hidden_units: [128, 64]
+      }
+      wide_output_dim: 32
+      l2_regularization: 1e-5
+    }
+    """
+    text_format.Merge(deepfm_config_str, pipeline_config.model_config)
+    pipeline_config.model_config.embedding_regularization = 1e-5
+  elif model_type == 'wide_and_deep':
+    pipeline_config.model_config.model_class = 'WideAndDeep'
+    wide_group = FeatureGroupConfig()
+    wide_group.group_name = 'wide'
+    wide_group.wide_deep = WideOrDeep.WIDE
+    for feature in rtp_features:
+      feature_name = feature['feature_name']
+      group = feature.get('group', 'wide_and_deep')
+      if group not in ['wide', 'deep', 'wide_and_deep']:
+        logging.warning('invalid group %s for %s' % (group, feature_name))
+        group = 'wide_and_deep'
+      if group in ['wide', 'wide_and_deep']:
+        wide_group.feature_names.append(feature_name)
+    pipeline_config.model_config.feature_groups.append(wide_group)
+    deep_group = FeatureGroupConfig()
+    deep_group.group_name = 'deep'
+    deep_group.wide_deep = WideOrDeep.DEEP
+    for feature in rtp_features:
+      feature_name = feature['feature_name']
+      group = feature.get('group', 'wide_and_deep')
+      if group not in ['wide', 'deep', 'wide_and_deep']:
+        group = 'wide_and_deep'
+      if group in ['deep', 'wide_and_deep']:
+        deep_group.feature_names.append(feature_name)
+    pipeline_config.model_config.feature_groups.append(deep_group)
+    deepfm_config_str = """
+    wide_and_deep {
+      dnn {
+        hidden_units: [128, 64, 32]
+      }
+      l2_regularization: 1e-5
+    }
+    """
+    text_format.Merge(deepfm_config_str, pipeline_config.model_config)
+    pipeline_config.model_config.embedding_regularization = 1e-5
+  elif model_type == 'multi_tower':
+    pipeline_config.model_config.model_class = 'MultiTower'
+
+    feature_groups = {}
+    group_map = {
+        'u': 'user',
+        'i': 'item',
+        'ctx': 'combo',
+        'q': 'combo',
+        'comb': 'combo'
+    }
+    for feature in rtp_features:
+      feature_name = feature['feature_name'].strip()
+      group_name = ''
+      if 'group' in feature:
+        group_name = feature['group']
+      else:
+        toks = feature_name.split('_')
+        group_name = toks[0]
+        if group_name in group_map:
+          group_name = group_map[group_name]
+      if group_name in feature_groups:
+        feature_groups[group_name].append(feature_name)
+      else:
+        feature_groups[group_name] = [feature_name]
+
+    logging.info(
+        'if group is specified, group will be used as feature group name; '
+        'otherwise, the prefix of feature_name in fg.json is used as feature group name'
+    )
+    logging.info('prefix map: %s' % str(group_map))
+    for group_name in feature_groups:
+      logging.info('add group = %s' % group_name)
+      group = FeatureGroupConfig()
+      group.group_name = group_name
+      for fea_name in feature_groups[group_name]:
+        group.feature_names.append(fea_name)
+      group.wide_deep = WideOrDeep.DEEP
+      pipeline_config.model_config.feature_groups.append(group)
+
+    multi_tower_config_str = '  multi_tower {\n'
+    for group_name in feature_groups:
+      multi_tower_config_str += """
+      towers {
+        input: "%s"
+        dnn {
+          hidden_units: [256, 192, 128]
+        }
+      }
+      """ % group_name
+
+    multi_tower_config_str = multi_tower_config_str + """
+      final_dnn {
+        hidden_units: [192, 128, 64]
+      }
+      l2_regularization: 1e-4
+    }
+    """
+    text_format.Merge(multi_tower_config_str, pipeline_config.model_config)
+    pipeline_config.model_config.embedding_regularization = 1e-5
+
+  elif model_type == 'esmm':
+    pipeline_config.model_config.model_class = 'ESMM'
+
+    feature_groups = {}
+    for feature in rtp_features:
+      feature_name = feature['feature_name']
+      group = feature.get('group', 'all')
+      if group in feature_groups:
+        feature_groups[group].append(feature_name)
+      else:
+        feature_groups[group] = [feature_name]
+
+    for group_name in feature_groups:
+      logging.info('add group = %s' % group_name)
+      group = FeatureGroupConfig()
+      group.group_name = group_name
+      for fea_name in feature_groups[group_name]:
+        group.feature_names.append(fea_name)
+      group.wide_deep = WideOrDeep.DEEP
+      pipeline_config.model_config.feature_groups.append(group)
+
+    esmm_config_str = '  esmm {\n'
+    for group_name in feature_groups:
+      esmm_config_str += """
+        groups {
+          input: "%s"
+          dnn {
+            hidden_units: [256, 128, 96, 64]
+          }
+        }""" % group_name
+
+    esmm_config_str += """
+        ctr_tower {
+          tower_name: "ctr"
+          label_name: "%s"
+          dnn {
+            hidden_units: [128, 96, 64, 32, 16]
+          }
+          num_class: 1
+          weight: 1.0
+          loss_type: CLASSIFICATION
+          metrics_set: {
+           auc {}
+          }
+        }
+        cvr_tower {
+          tower_name: "cvr"
+          label_name: "%s"
+          dnn {
+            hidden_units: [128, 96, 64, 32, 16]
+          }
+          num_class: 1
+          weight: 1.0
+          loss_type: CLASSIFICATION
+          metrics_set: {
+           auc {}
+          }
+        }
+        l2_regularization: 1e-6
+      }""" % (label_fields[0], label_fields[1])
+    text_format.Merge(esmm_config_str, pipeline_config.model_config)
+    pipeline_config.model_config.embedding_regularization = 5e-5
+  elif model_type == 'dbmtl':
+    pipeline_config.model_config.model_class = 'DBMTL'
+
+    feature_groups = {}
+    for feature in rtp_features:
+      feature_name = feature['feature_name']
+      group = 'all'
+      if group in feature_groups:
+        feature_groups[group].append(feature_name)
+      else:
+        feature_groups[group] = [feature_name]
+
+    for group_name in feature_groups:
+      logging.info('add group = %s' % group_name)
+      group = FeatureGroupConfig()
+      group.group_name = group_name
+      for fea_name in feature_groups[group_name]:
+        group.feature_names.append(fea_name)
+      group.wide_deep = WideOrDeep.DEEP
+      pipeline_config.model_config.feature_groups.append(group)
+
+    dbmtl_config_str = """
+      dbmtl {
+        bottom_dnn {
+          hidden_units: [1024]
+        }
+        expert_dnn {
+          hidden_units: [256, 128, 64, 32]
+        }
+        num_expert: 8
+        task_towers {
+          tower_name: "ctr"
+          label_name: "%s"
+          loss_type: CLASSIFICATION
+          metrics_set: {
+            auc {}
+          }
+          dnn {
+            hidden_units: [256, 128, 64, 32]
+          }
+          relation_dnn {
+            hidden_units: [32]
+          }
+          weight: 1.0
+        }
+        task_towers {
+          tower_name: "cvr"
+          label_name: "%s"
+          loss_type: CLASSIFICATION
+          metrics_set: {
+            auc {}
+          }
+          dnn {
+            hidden_units: [256, 128, 64, 32]
+          }
+          relation_tower_names: ["ctr"]
+          relation_dnn {
+            hidden_units: [32]
+          }
+          weight: 1.0
+        }
+        l2_regularization: 1e-6
+      }
+    """ % (label_fields[0], label_fields[1])
+    text_format.Merge(dbmtl_config_str, pipeline_config.model_config)
+    pipeline_config.model_config.embedding_regularization = 5e-6
+
+  if model_type in ['wide_and_deep', 'deepfm', 'multi_tower']:
+    text_format.Merge("""
+      metrics_set {
+        auc {}
+      }
+      """, pipeline_config.eval_config)
+
+  text_format.Merge(
+      """ export_config {
+          multi_placeholder: false
+        }
+    """, pipeline_config)
+
+  if edit_config_json:
+    for edit_obj in edit_config_json:
+      config_util.edit_config(pipeline_config, edit_obj)
+
+    pipeline_config.model_config.embedding_regularization = 1e-5
+  return pipeline_config
diff --git a/easy_rec/python/utils/estimator_utils.py b/easy_rec/python/utils/estimator_utils.py
index 2599cca53..7570fcb52 100644
--- a/easy_rec/python/utils/estimator_utils.py
+++ b/easy_rec/python/utils/estimator_utils.py
@@ -14,9 +14,10 @@
 import numpy as np
 import six
 import tensorflow as tf
+from tensorflow.core.framework.summary_pb2 import Summary
 from tensorflow.python.framework import meta_graph
+from tensorflow.python.training.summary_io import SummaryWriterCache
 
-from easy_rec.python.protos.eas_serving_pb2 import EmbeddingPartData
 from easy_rec.python.utils import shape_utils
 
 if tf.__version__ >= '2.0':
@@ -165,60 +166,6 @@ def end(self, session):
       self._progress_file.close()
 
 
-class EmbeddingPartSaver:
-  """Large Embedding Saver.
-
-  For large embedding serving on eas, large embeddings are partitioned and saved separately.
-  """
-
-  def __init__(self, var):
-    self._var = var
-    # normalize var names
-    var_name = var.name.split('/')
-    if var_name[-2] == 'embedding_weights' and 'part_' in var_name[-1]:
-      # input_layer uid embedding_weights part_0:0
-      part_name = var_name[-1].split(':')[0]
-      var_name = var_name[-3]
-      if var_name.endswith('_embedding'):
-        var_name = var_name[:-len('_embedding')]
-      var_name = var_name + '.' + part_name
-    else:
-      # input_layer tag embedding_weights:0
-      var_name = var_name[-2]
-      if var_name.endswith('_embedding'):
-        var_name = var_name[:-len('_embedding')]
-    logging.info('embedding variable name: %s normalize_name: %s' %
-                 (var.name, var_name))
-    self._var_name = var_name
-
-  @property
-  def name(self):
-    return self._var_name
-
-  def save(self, session, save_path, global_step):
-    """Save embedding data as EmbeddingPartData as .pb files.
-
-    Args:
-      session: tf.Session instance
-      save_path: data save path
-      global_step: train step
-    """
-    var_data = session.run(self._var)
-    embed_part_data = EmbeddingPartData()
-
-    for x in var_data.shape:
-      embed_part_data.shape.append(x)
-
-    for r in range(var_data.shape[0]):
-      for c in range(var_data.shape[1]):
-        embed_part_data.data.append(var_data[r, c])
-
-    save_path = save_path + '.pb.' + str(global_step)
-    with tf.gfile.GFile(save_path, 'wb') as fout:
-      fout.write(embed_part_data.SerializeToString())
-    logging.info('save embedding %s to %s done' % (self._var_name, save_path))
-
-
 class CheckpointSaverHook(CheckpointSaverHook):
   """Saves checkpoints every N steps or seconds."""
 
@@ -296,9 +243,6 @@ def _save(self, session, step):
         global_step=step,
         write_meta_graph=self._write_graph)
     save_dir, save_name = os.path.split(self._save_path)
-    save_dir = os.path.join(save_dir, 'embeddings')
-    if not tf.gfile.Exists(save_dir):
-      tf.gfile.MakeDirs(save_dir)
 
     self._summary_writer.add_session_log(
         tf.SessionLog(
@@ -449,6 +393,38 @@ def after_create_session(self, session, coord):
       saver.restore(session, ckpt_path)
 
 
+class OnlineEvaluationHook(SessionRunHook):
+
+  def __init__(self, metric_dict, output_dir):
+    self._metric_dict = metric_dict
+    self._output_dir = output_dir
+    self._summary_writer = SummaryWriterCache.get(self._output_dir)
+
+  def end(self, session):
+    metric_tensor_dict = {k: v[0] for k, v in self._metric_dict.items()}
+    metric_value_dict = session.run(metric_tensor_dict)
+    tf.logging.info('Eval metric: %s' % metric_value_dict)
+
+    global_step_tensor = tf.train.get_or_create_global_step()
+    global_step = session.run(global_step_tensor)
+
+    summary = Summary()
+    for k, v in metric_value_dict.items():
+      summary.value.add(tag=k, simple_value=v)
+    self._summary_writer.add_summary(summary, global_step=global_step)
+    self._summary_writer.flush()
+
+    eval_result_file = os.path.join(self._output_dir,
+                                    'online_eval_result.txt-%s' % global_step)
+    logging.info('Saving online eval result to file %s' % eval_result_file)
+    with tf.gfile.GFile(eval_result_file, 'w') as ofile:
+      result_to_write = {}
+      for key in sorted(metric_value_dict):
+        # convert numpy float to python float
+        result_to_write[key] = metric_value_dict[key].item()
+      ofile.write(json.dumps(result_to_write, indent=2))
+
+
 def parse_tf_config():
   tf_config_str = os.environ.get('TF_CONFIG', '')
   if 'TF_CONFIG' in os.environ:
@@ -477,3 +453,77 @@ def get_task_index_and_num():
     if task_type not in ['chief', 'master']:
       task_index += 1
   return task_index, task_num
+
+
+def get_ckpt_version(ckpt_path):
+  """Get checkpoint version from ckpt_path.
+
+  Args:
+    ckpt_path: such as xx/model.ckpt-2000 or xx/model.ckpt-2000.meta
+
+  Return:
+    ckpt_version: such as 2000
+  """
+  _, ckpt_name = os.path.split(ckpt_path)
+  ckpt_name, ext = os.path.splitext(ckpt_name)
+  if ext.startswith('.ckpt-'):
+    ckpt_name = ext
+  toks = ckpt_name.split('-')
+  return int(toks[-1])
+
+
+def latest_checkpoint(model_dir):
+  """Find lastest checkpoint under a directory.
+
+  Args:
+    model_dir: model directory
+
+  Return:
+    model_path: xx/model.ckpt-2000
+  """
+  ckpt_metas = tf.gfile.Glob(os.path.join(model_dir, 'model.ckpt-*.meta'))
+  if len(ckpt_metas) == 0:
+    return None
+
+  if len(ckpt_metas) > 1:
+    ckpt_metas.sort(key=lambda x: get_ckpt_version(x))
+  ckpt_path = os.path.splitext(ckpt_metas[-1])[0]
+  return ckpt_path
+
+
+def master_to_chief():
+  if 'TF_CONFIG' in os.environ:
+    tf_config = json.loads(os.environ['TF_CONFIG'])
+    # change chief to master
+    if 'master' in tf_config['cluster']:
+      tf_config['cluster']['chief'] = tf_config['cluster']['master']
+      del tf_config['cluster']['chief']
+      if tf_config['task']['type'] == 'master':
+        tf_config['task']['type'] = 'chief'
+    os.environ['TF_CONFIG'] = json.dumps(tf_config)
+    return tf_config
+  else:
+    return None
+
+
+def chief_to_master():
+  if 'TF_CONFIG' in os.environ:
+    tf_config = json.loads(os.environ['TF_CONFIG'])
+    # change chief to master
+    if 'chief' in tf_config['cluster']:
+      tf_config['cluster']['master'] = tf_config['cluster']['chief']
+      del tf_config['cluster']['chief']
+      if tf_config['task']['type'] == 'chief':
+        tf_config['task']['type'] = 'master'
+    os.environ['TF_CONFIG'] = json.dumps(tf_config)
+    return tf_config
+  else:
+    return None
+
+
+def is_chief():
+  if 'TF_CONFIG' in os.environ:
+    tf_config = json.loads(os.environ['TF_CONFIG'])
+    if 'task' in tf_config:
+      return tf_config['task']['type'] in ['chief', 'master']
+  return True
diff --git a/easy_rec/python/utils/export_big_model.py b/easy_rec/python/utils/export_big_model.py
new file mode 100644
index 000000000..faf356277
--- /dev/null
+++ b/easy_rec/python/utils/export_big_model.py
@@ -0,0 +1,281 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import logging
+import os
+import time
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.framework import ops
+from tensorflow.python.ops.variables import global_variables
+from tensorflow.python.platform.gfile import DeleteRecursively
+from tensorflow.python.platform.gfile import Exists
+from tensorflow.python.platform.gfile import GFile
+from tensorflow.python.platform.gfile import Remove
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.training.device_setter import replica_device_setter
+from tensorflow.python.training.monitored_session import ChiefSessionCreator
+from tensorflow.python.training.saver import export_meta_graph
+
+import easy_rec
+from easy_rec.python.utils import estimator_utils
+from easy_rec.python.utils import io_util
+from easy_rec.python.utils import proto_util
+from easy_rec.python.utils.meta_graph_editor import MetaGraphEditor
+
+if tf.__version__ >= '2.0':
+  from tensorflow.python.framework.ops import disable_eager_execution
+  disable_eager_execution()
+
+ConfigProto = config_pb2.ConfigProto
+GPUOptions = config_pb2.GPUOptions
+
+
+def export_big_model(export_dir, pipeline_config, redis_params,
+                     serving_input_fn, estimator, checkpoint_path, verbose):
+  for key in redis_params:
+    logging.info('%s: %s' % (key, redis_params[key]))
+  write_kv_lib_path = os.path.join(easy_rec.ops_dir, 'libwrite_kv.so')
+  kv_module = tf.load_op_library(write_kv_lib_path)
+
+  try:
+    sparse_kv_lib_path = os.path.join(easy_rec.ops_dir, 'libwrite_sparse_kv.so')
+    sparse_kv_module = tf.load_op_library(sparse_kv_lib_path)
+  except Exception as ex:
+    logging.warning('load libwrite_sparse_kv.so failed: %s' % str(ex))
+    sparse_kv_module = None
+  if not checkpoint_path:
+    checkpoint_path = tf.train.latest_checkpoint(pipeline_config.model_dir)
+  logging.info('checkpoint_path = %s' % checkpoint_path)
+
+  if 'TF_CONFIG' in os.environ:
+    # change chief to master
+    tf_config = estimator_utils.chief_to_master()
+    if tf_config['task']['type'] == 'ps':
+      cluster = tf.train.ClusterSpec(tf_config['cluster'])
+      server = tf.train.Server(
+          cluster, job_name='ps', task_index=tf_config['task']['index'])
+      server.join()
+    elif tf_config['task']['type'] == 'master':
+      if 'ps' in tf_config['cluster']:
+        cluster = tf.train.ClusterSpec(tf_config['cluster'])
+        server = tf.train.Server(cluster, job_name='master', task_index=0)
+        server_target = server.target
+        logging.info('server_target = %s' % server_target)
+  else:
+    server = None
+    cluster = None
+
+  serving_input = serving_input_fn()
+  features = serving_input.features
+  inputs = serving_input.receiver_tensors
+
+  if cluster:
+    logging.info('cluster = ' + str(cluster))
+  with tf.device(
+      replica_device_setter(
+          worker_device='/job:master/task:0', cluster=cluster)):
+    outputs = estimator._export_model_fn(features, None, None).predictions
+
+  meta_graph_def = export_meta_graph()
+  redis_embedding_version = redis_params.get('redis_embedding_version', '')
+  if not redis_embedding_version:
+    meta_graph_def.meta_info_def.meta_graph_version =\
+        str(int(time.time()))
+  else:
+    meta_graph_def.meta_info_def.meta_graph_version = redis_embedding_version
+
+  logging.info('meta_graph_version = %s' %
+               meta_graph_def.meta_info_def.meta_graph_version)
+
+  embed_var_parts = {}
+  embed_norm_name = {}
+  embed_spos = {}
+  # pai embedding variable
+  embedding_vars = {}
+  norm_name_to_ids = {}
+  for x in global_variables():
+    if 'EmbeddingVariable' in str(type(x)):
+      norm_name, part_id = proto_util.get_norm_embed_name(x.name)
+      norm_name_to_ids[norm_name] = 1
+      tmp_export = x.export()
+      if x.device not in embedding_vars:
+        embedding_vars[x.device] = [(norm_name, tmp_export.keys,
+                                     tmp_export.values)]
+      else:
+        embedding_vars[x.device].append(
+            (norm_name, tmp_export.keys, tmp_export.values))
+    elif '/embedding_weights:' in x.name or '/embedding_weights/part_' in x.name:
+      norm_name, part_id = proto_util.get_norm_embed_name(x.name)
+      norm_name_to_ids[norm_name] = 1
+      embed_norm_name[x] = norm_name
+      if norm_name not in embed_var_parts:
+        embed_var_parts[norm_name] = {part_id: x}
+      else:
+        embed_var_parts[norm_name][part_id] = x
+
+  for tid, t in enumerate(norm_name_to_ids.keys()):
+    norm_name_to_ids[t] = str(tid)
+  for x in embed_norm_name:
+    embed_norm_name[x] = norm_name_to_ids[embed_norm_name[x]]
+
+  total_num = 0
+  for norm_name in embed_var_parts:
+    parts = embed_var_parts[norm_name]
+    spos = 0
+    part_ids = list(parts.keys())
+    part_ids.sort()
+    total_num += len(part_ids)
+    for part_id in part_ids:
+      embed_spos[parts[part_id]] = spos
+      spos += parts[part_id].get_shape()[0]
+
+  redis_url = redis_params.get('redis_url', '')
+  redis_passwd = redis_params.get('redis_passwd', '')
+  logging.info('will export to redis: %s %s' % (redis_url, redis_passwd))
+
+  if redis_params.get('redis_write_kv', ''):
+    # group embed by devices
+    per_device_vars = {}
+    for x in embed_norm_name:
+      if x.device not in per_device_vars:
+        per_device_vars[x.device] = [x]
+      else:
+        per_device_vars[x.device].append(x)
+
+    all_write_res = []
+    for tmp_dev in per_device_vars:
+      tmp_vars = per_device_vars[tmp_dev]
+      with tf.device(tmp_dev):
+        tmp_names = [embed_norm_name[v] for v in tmp_vars]
+        tmp_spos = [np.array(embed_spos[v], dtype=np.int64) for v in tmp_vars]
+        write_kv_res = kv_module.write_kv(
+            tmp_names,
+            tmp_vars,
+            tmp_spos,
+            url=redis_url,
+            password=redis_passwd,
+            timeout=redis_params.get('redis_timeout', 1500),
+            version=meta_graph_def.meta_info_def.meta_graph_version,
+            threads=redis_params.get('redis_threads', 5),
+            batch_size=redis_params.get('redis_batch_size', 32),
+            expire=redis_params.get('redis_expire', 24),
+            verbose=verbose)
+        all_write_res.append(write_kv_res)
+
+    for tmp_dev in embedding_vars:
+      with tf.device(tmp_dev):
+        tmp_vs = embedding_vars[tmp_dev]
+        tmp_sparse_names = [norm_name_to_ids[x[0]] for x in tmp_vs]
+        tmp_sparse_keys = [x[1] for x in tmp_vs]
+        tmp_sparse_vals = [x[2] for x in tmp_vs]
+        write_sparse_kv_res = sparse_kv_module.write_sparse_kv(
+            tmp_sparse_names,
+            tmp_sparse_vals,
+            tmp_sparse_keys,
+            url=redis_url,
+            password=redis_passwd,
+            timeout=redis_params.get('redis_timeout', 1500),
+            version=meta_graph_def.meta_info_def.meta_graph_version,
+            threads=redis_params.get('redis_threads', 5),
+            batch_size=redis_params.get('redis_batch_size', 32),
+            expire=redis_params.get('redis_expire', 24),
+            verbose=verbose)
+        all_write_res.append(write_sparse_kv_res)
+
+    session_config = ConfigProto(
+        allow_soft_placement=True, log_device_placement=False)
+    chief_sess_creator = ChiefSessionCreator(
+        master=server.target if server else '',
+        checkpoint_filename_with_path=checkpoint_path,
+        config=session_config)
+    with tf.train.MonitoredSession(
+        session_creator=chief_sess_creator,
+        hooks=None,
+        stop_grace_period_secs=120) as sess:
+      dump_flags = sess.run(all_write_res)
+      logging.info('write embedding to redis succeed: %s' % str(dump_flags))
+  else:
+    logging.info('will skip write embedding to redis because '
+                 'redis_write_kv is set to 0.')
+
+  # delete embedding_weights collections so that it could be re imported
+  tmp_drop = []
+  for k in meta_graph_def.collection_def:
+    v = meta_graph_def.collection_def[k]
+    if len(
+        v.node_list.value) > 0 and 'embedding_weights' in v.node_list.value[0]:
+      tmp_drop.append(k)
+  for k in tmp_drop:
+    meta_graph_def.collection_def.pop(k)
+
+  meta_graph_editor = MetaGraphEditor(
+      os.path.join(easy_rec.ops_dir, 'libkv_lookup.so'),
+      None,
+      redis_url,
+      redis_passwd,
+      redis_timeout=redis_params.get('redis_timeout', 600),
+      meta_graph_def=meta_graph_def,
+      norm_name_to_ids=norm_name_to_ids,
+      debug_dir=export_dir if verbose else '')
+  meta_graph_editor.edit_graph()
+  tf.reset_default_graph()
+
+  saver = tf.train.import_meta_graph(meta_graph_editor._meta_graph_def)
+  graph = tf.get_default_graph()
+
+  embed_name_to_id_file = os.path.join(export_dir, 'embed_name_to_ids.txt')
+  with GFile(embed_name_to_id_file, 'w') as fout:
+    for tmp_norm_name in norm_name_to_ids:
+      fout.write('%s\t%s\n' % (tmp_norm_name, norm_name_to_ids[tmp_norm_name]))
+  tf.add_to_collection(
+      tf.GraphKeys.ASSET_FILEPATHS,
+      tf.constant(
+          embed_name_to_id_file, dtype=tf.string, name='embed_name_to_ids.txt'))
+
+  export_dir = os.path.join(export_dir,
+                            meta_graph_def.meta_info_def.meta_graph_version)
+  export_dir = io_util.fix_oss_dir(export_dir)
+  if Exists(export_dir):
+    logging.info('will delete old dir: %s' % export_dir)
+    DeleteRecursively(export_dir)
+
+  builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
+  tensor_info_inputs = {}
+  for tmp_key in inputs:
+    tmp = graph.get_tensor_by_name(inputs[tmp_key].name)
+    tensor_info_inputs[tmp_key] = \
+        tf.saved_model.utils.build_tensor_info(tmp)
+  tensor_info_outputs = {}
+  for tmp_key in outputs:
+    tmp = graph.get_tensor_by_name(outputs[tmp_key].name)
+    tensor_info_outputs[tmp_key] = \
+        tf.saved_model.utils.build_tensor_info(tmp)
+  signature = (
+      tf.saved_model.signature_def_utils.build_signature_def(
+          inputs=tensor_info_inputs,
+          outputs=tensor_info_outputs,
+          method_name=signature_constants.PREDICT_METHOD_NAME))
+
+  session_config = ConfigProto(
+      allow_soft_placement=True, log_device_placement=True)
+
+  saver = tf.train.Saver()
+  with tf.Session(target=server.target if server else '') as sess:
+    saver.restore(sess, checkpoint_path)
+    builder.add_meta_graph_and_variables(
+        sess, [tf.saved_model.tag_constants.SERVING],
+        signature_def_map={
+            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature,
+        },
+        assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS),
+        saver=saver,
+        strip_default_attrs=True,
+        clear_devices=True)
+    builder.save()
+
+  # remove temporary files
+  Remove(embed_name_to_id_file)
+  return
diff --git a/easy_rec/python/utils/hpo_util.py b/easy_rec/python/utils/hpo_util.py
index 4a265dc48..753c2eb99 100644
--- a/easy_rec/python/utils/hpo_util.py
+++ b/easy_rec/python/utils/hpo_util.py
@@ -4,6 +4,7 @@
 import logging
 import os
 
+import psutil
 import tensorflow as tf
 from tensorflow.python.summary import summary_iterator
 
@@ -77,36 +78,39 @@ def _get_eval_event_file_pattern():
 
 
 def kill_old_proc(tmp_dir, platform='pai'):
-  old_proc_file = os.path.join(tmp_dir, 'old_proc.txt')
+  curr_pid = os.getpid()
   if platform == 'pai':
-    os.system(
-        "ps -auxwww | grep easy_rec.python.hpo.pai_hpo | grep python | grep -v grep | awk '{ print $2 }' >> %s"
-        % old_proc_file)
-    os.system(
-        "ps -auxwww | grep client/experiment_main.py | grep python | grep -v grep | awk '{ print $2 }' >> %s"
-        % old_proc_file)
+    for p in psutil.process_iter():
+      try:
+        cmd = ' '.join(p.cmdline())
+        if 'easy_rec.python.hpo.pai_hpo' in cmd and 'python' in cmd:
+          if p.pid != curr_pid:
+            logging.info('will kill: [%d] %s' % (p.pid, cmd))
+            p.terminate()
+        if 'client/experiment_main.py' in cmd and 'python' in cmd:
+          if p.pid != curr_pid:
+            logging.info('will kill: [%d] %s' % (p.pid, cmd))
+            p.terminate()
+      except Exception:
+        pass
   else:
-    os.system(
-        "ps -auxwww | grep easy_rec.python.hpo.emr_hpo  | grep -v grep | awk '{ print $2 }' > %s"
-        % old_proc_file)
-    os.system(
-        "ps -auxwww | grep client/experiment_main.py | grep python | grep -v grep | awk '{ print $2 }' >> %s "
-        % old_proc_file)
-    os.system(
-        "ps -auxwww | grep el_submit | grep easy_rec_hpo | grep -v grep | awk '{ print $2 }' >> %s "
-        % old_proc_file)
-  proc_arr = []
-  with open(old_proc_file, 'r') as fin:
-    for line_str in fin:
-      line_str = line_str.strip()
-      proc_arr.append(line_str)
-  proc_arr = list(set(proc_arr))
-  # remove current pid to avoid current process being killed
-  pid = os.getpid()
-  proc_arr.remove(str(pid))
-  if len(proc_arr) > 0:
-    logging.info('old process to be killed: %s' % ','.join(proc_arr))
-    os.system('kill -9 %s' % (' '.join(proc_arr)))
+    for p in psutil.process_iter():
+      try:
+        cmd = ' '.join(p.cmdline())
+        if 'easy_rec.python.hpo.emr_hpo' in cmd and 'python' in cmd:
+          if p.pid != curr_pid:
+            logging.info('will kill: [%d] %s' % (p.pid, cmd))
+            p.terminate()
+        if 'client/experiment_main.py' in cmd and 'python' in cmd:
+          if p.pid != curr_pid:
+            logging.info('will kill: [%d] %s' % (p.pid, cmd))
+            p.terminate()
+        if 'el_submit' in cmd and 'easy_rec_hpo' in cmd:
+          if p.pid != curr_pid:
+            logging.info('will kill: [%d] %s' % (p.pid, cmd))
+            p.terminate()
+      except Exception:
+        pass
 
   if platform == 'emr':
     # clear easy_rec_hpo yarn jobs
diff --git a/easy_rec/python/utils/io_util.py b/easy_rec/python/utils/io_util.py
index 540472918..c7702c318 100644
--- a/easy_rec/python/utils/io_util.py
+++ b/easy_rec/python/utils/io_util.py
@@ -158,3 +158,10 @@ def oss_has_t_mode(target_file):
     return True
   except:  # noqa: E722
     return False
+
+
+def fix_oss_dir(path):
+  """Make sure that oss dir endswith /."""
+  if path.startswith('oss://') and not path.endswith('/'):
+    return path + '/'
+  return path
diff --git a/easy_rec/python/utils/meta_graph_editor.py b/easy_rec/python/utils/meta_graph_editor.py
new file mode 100644
index 000000000..2cdde4952
--- /dev/null
+++ b/easy_rec/python/utils/meta_graph_editor.py
@@ -0,0 +1,701 @@
+# -*- encoding:utf-8 -*-
+import logging
+import os
+
+import tensorflow as tf
+from google.protobuf import text_format
+from tensorflow.python.platform.gfile import GFile
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model.loader_impl import SavedModelLoader
+
+from easy_rec.python.utils import proto_util
+
+
+class MetaGraphEditor:
+
+  def __init__(self,
+               lookup_lib_path,
+               saved_model_dir,
+               redis_url,
+               redis_passwd,
+               redis_timeout,
+               meta_graph_def=None,
+               norm_name_to_ids=None,
+               debug_dir=''):
+    self._lookup_op = tf.load_op_library(lookup_lib_path)
+    self._debug_dir = debug_dir
+    self._verbose = debug_dir != ''
+    if saved_model_dir:
+      tags = ['serve']
+      loader = SavedModelLoader(saved_model_dir)
+      saver, _ = loader.load_graph(tf.get_default_graph(), tags, None)
+      meta_graph_def = loader.get_meta_graph_def_from_tags(tags)
+    else:
+      assert meta_graph_def, 'either saved_model_dir or meta_graph_def must be set'
+      tf.reset_default_graph()
+      tf.train.import_meta_graph(meta_graph_def)
+    self._meta_graph_version = meta_graph_def.meta_info_def.meta_graph_version
+    self._signature_def = meta_graph_def.signature_def[
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+
+    if self._verbose:
+      debug_out_path = os.path.join(self._debug_dir, 'meta_graph_raw.txt')
+      with GFile(debug_out_path, 'w') as fout:
+        fout.write(text_format.MessageToString(meta_graph_def, as_utf8=True))
+    self._meta_graph_def = meta_graph_def
+    self._old_node_num = len(self._meta_graph_def.graph_def.node)
+    self._all_graph_nodes = None
+    self._all_graph_node_flags = None
+    self._restore_tensor_node = None
+    self._restore_shard_node = None
+    self._restore_all_node = []
+    self._lookup_outs = None
+    self._feature_names = None
+    self._embed_names = None
+    self._embed_name_to_ids = norm_name_to_ids
+    self._embed_ids = None
+    self._embed_dims = None
+    self._embed_combiners = None
+    self._redis_url = redis_url
+    self._redis_passwd = redis_passwd
+    self._redis_timeout = redis_timeout
+
+  @property
+  def graph_def(self):
+    return self._meta_graph_def.graph_def
+
+  @property
+  def signature_def(self):
+    return self._signature_def
+
+  @property
+  def meta_graph_version(self):
+    return self._meta_graph_version
+
+  def init_graph_node_clear_flags(self):
+    graph_def = self._meta_graph_def.graph_def
+    self._all_graph_nodes = [n for n in graph_def.node]
+    self._all_graph_node_flags = [True for n in graph_def.node]
+
+  def _get_share_embed_name(self, x, embed_names):
+    """Map share embedding tensor names to embed names.
+
+    Args:
+      x: string, embedding tensor names, such as:
+        input_layer_1/shared_embed_1/field16_shared_embedding
+        input_layer_1/shared_embed_2/field17_shared_embedding
+        input_layer/shared_embed_wide/field15_shared_embedding
+        input_layer/shared_embed_wide_1/field16_shared_embedding
+      embed_names: all the optional embedding_names
+    Return:
+      one element in embed_names, such as:
+         input_layer_1/shared_embed
+         input_layer_1/shared_embed
+         input_layer/shared_embed_wide
+         input_layer/shared_embed_wide
+    """
+    assert x.endswith('_shared_embedding')
+    name_toks = x.split('/')
+    name_toks = name_toks[:-1]
+    tmp = name_toks[-1]
+    tmp = tmp.split('_')
+    try:
+      int(tmp[-1])
+      name_toks[-1] = '_'.join(tmp[:-1])
+    except Exception:
+      pass
+    tmp_name = '/'.join(name_toks[1:])
+    sel_embed_name = ''
+    for embed_name in embed_names:
+      tmp_toks = embed_name.split('/')
+      tmp_toks = tmp_toks[1:]
+      embed_name_sub = '/'.join(tmp_toks)
+      if tmp_name == embed_name_sub:
+        assert not sel_embed_name, 'confusions encountered: %s %s' % (
+            x, ','.join(embed_names))
+        sel_embed_name = embed_name
+    assert sel_embed_name, '%s not find in shared_embeddings: %s' % (
+        tmp_name, ','.join(embed_names))
+    return sel_embed_name
+
+  def _find_embed_combiners(self, norm_embed_names):
+    """Find embedding lookup combiner methods.
+
+    Args:
+       norm_embed_names: normalized embedding names
+    Return:
+       list: combiner methods for each features: sum, mean, sqrtn
+    """
+    embed_combiners = {}
+    embed_combine_node_cts = {}
+    combiner_map = {
+        'SparseSegmentSum': 'sum',
+        'SparseSegmentMean': 'mean',
+        'SparseSegmentSqrtN': 'sqrtn'
+    }
+    for node in self._meta_graph_def.graph_def.node:
+      if node.op in combiner_map:
+        norm_name, _ = proto_util.get_norm_embed_name(node.name)
+        embed_combiners[norm_name] = combiner_map[node.op]
+        embed_combine_node_cts[norm_name] = embed_combine_node_cts.get(
+            norm_name, 0) + 1
+      elif node.op == 'RealDiv' and len(node.input) == 2:
+        # for tag feature with weights, and combiner == mean
+        if 'SegmentSum' in node.input[0] and 'SegmentSum' in node.input[1]:
+          norm_name, _ = proto_util.get_norm_embed_name(node.name)
+          embed_combiners[norm_name] = 'mean'
+          embed_combine_node_cts[norm_name] = embed_combine_node_cts.get(
+              norm_name, 0) + 1
+      elif node.op == 'SegmentSum':
+        norm_name, _ = proto_util.get_norm_embed_name(node.name)
+        # avoid overwrite RealDiv results
+        if norm_name not in embed_combiners:
+          embed_combiners[norm_name] = 'sum'
+        embed_combine_node_cts[norm_name] = embed_combine_node_cts.get(
+            norm_name, 0) + 1
+    return [embed_combiners[x] for x in norm_embed_names]
+
+  def _find_lookup_indices_values_shapes(self):
+    # use the specific _embedding_weights/SparseReshape to find out
+    # lookup inputs: indices, values, dense_shape, weights
+    indices = {}
+    values = {}
+    shapes = {}
+
+    def _get_output_shape(graph_def, input_name):
+      out_id = 0
+      if ':' in input_name:
+        node_name, out_id = input_name.split(':')
+        out_id = int(out_id)
+      else:
+        node_name = input_name
+      for node in graph_def.node:
+        if node.name == node_name:
+          return node.attr['_output_shapes'].list.shape[out_id]
+      return None
+
+    for node in self._meta_graph_def.graph_def.node:
+      if '_embedding_weights/SparseReshape' in node.name:
+        if node.op == 'SparseReshape':
+          # embed_name, _ = proto_util.get_norm_embed_name(node.name, self._verbose)
+          fea_name, _ = proto_util.get_norm_embed_name(node.name, self._verbose)
+          for tmp_input in node.input:
+            tmp_shape = _get_output_shape(self._meta_graph_def.graph_def,
+                                          tmp_input)
+            if '_embedding_weights/Cast' in tmp_input:
+              continue
+            elif len(tmp_shape.dim) == 2:
+              indices[fea_name] = tmp_input
+            elif len(tmp_shape.dim) == 1:
+              shapes[fea_name] = tmp_input
+        elif node.op == 'Identity':
+          fea_name, _ = proto_util.get_norm_embed_name(node.name, self._verbose)
+          values[fea_name] = node.input[0]
+    return indices, values, shapes
+
+  def _find_lookup_weights(self):
+    weights = {}
+    for node in self._meta_graph_def.graph_def.node:
+      if '_weighted_by_' in node.name and 'GatherV2' in node.name:
+        has_sparse_reshape = False
+        for tmp_input in node.input:
+          if 'SparseReshape' in tmp_input:
+            has_sparse_reshape = True
+        if has_sparse_reshape:
+          continue
+        if len(node.input) != 3:
+          continue
+        # try to find nodes with weights
+        # input_layer/xxx_weighted_by_yyy_embedding/xxx_weighted_by_yyy_embedding_weights/GatherV2_[0-9]
+        # which has three inputs:
+        #   input_layer/xxx_weighted_by_yyy_embedding/xxx_weighted_by_yyy_embedding_weights/Reshape_1
+        #   DeserializeSparse_1    (this is the weight)
+        #   input_layer/xxx_weighted_by_yyy_embedding/xxx_weighted_by_yyy_embedding_weights/GatherV2_4/axis
+        fea_name, _ = proto_util.get_norm_embed_name(node.name, self._verbose)
+        for tmp_input in node.input:
+          if '_weighted_by_' not in tmp_input:
+            weights[fea_name] = tmp_input
+    return weights
+
+  def _find_embed_names_and_dims(self, norm_embed_names):
+    # get embedding dimensions from Variables
+    embed_dims = {}
+    for node in self._meta_graph_def.graph_def.node:
+      if 'embedding_weights' in node.name and node.op in [
+          'VariableV2', 'KvVarHandleOp'
+      ]:
+        tmp = node.attr['shape'].shape.dim[-1].size
+        embed_name, _ = proto_util.get_norm_embed_name(node.name, self._verbose)
+        embed_dims[embed_name] = tmp
+        assert embed_name is not None,\
+            'fail to get_norm_embed_name(%s)' % node.name
+
+    # get all embedding dimensions, note that some embeddings
+    # are shared by multiple inputs, so the names should be
+    # transformed
+    all_embed_dims = []
+    all_embed_names = []
+    for x in norm_embed_names:
+      if x in embed_dims:
+        all_embed_names.append(x)
+        all_embed_dims.append(embed_dims[x])
+      elif x.endswith('_shared_embedding'):
+        tmp_embed_name = self._get_share_embed_name(x, embed_dims.keys())
+        all_embed_names.append(tmp_embed_name)
+        all_embed_dims.append(embed_dims[tmp_embed_name])
+    return all_embed_names, all_embed_dims
+
+  def find_lookup_inputs(self):
+    logging.info('Extract embedding_lookup inputs')
+
+    indices, values, shapes = self._find_lookup_indices_values_shapes()
+    weights = self._find_lookup_weights()
+
+    for fea in shapes.keys():
+      logging.info('Lookup Input[%s]: indices=%s values=%s shapes=%s' %
+                   (fea, indices[fea], values[fea], shapes[fea]))
+
+    graph = tf.get_default_graph()
+
+    def _get_tensor_by_name(tensor_name):
+      if ':' not in tensor_name:
+        tensor_name = tensor_name + ':0'
+      return graph.get_tensor_by_name(tensor_name)
+
+    lookup_input_values = []
+    lookup_input_indices = []
+    lookup_input_shapes = []
+    lookup_input_weights = []
+    for key in values.keys():
+      tmp_val, tmp_ind, tmp_shape = values[key], indices[key], shapes[key]
+      lookup_input_values.append(_get_tensor_by_name(tmp_val))
+      lookup_input_indices.append(_get_tensor_by_name(tmp_ind))
+      lookup_input_shapes.append(_get_tensor_by_name(tmp_shape))
+      if key in weights:
+        tmp_w = weights[key]
+        lookup_input_weights.append(_get_tensor_by_name(tmp_w))
+      else:
+        lookup_input_weights.append([])
+
+    # get embedding combiners
+    self._embed_combiners = self._find_embed_combiners(values.keys())
+
+    # get embedding dimensions
+    self._embed_names, self._embed_dims = self._find_embed_names_and_dims(
+        values.keys())
+
+    if not self._embed_name_to_ids:
+      embed_name_uniq = list(set(self._embed_names))
+      self._embed_name_to_ids = {
+          t: str(tid) for tid, t in enumerate(embed_name_uniq)
+      }
+    self._embed_ids = [self._embed_name_to_ids[x] for x in self._embed_names]
+
+    # normalized feature names
+    self._feature_names = list(values.keys())
+
+    return lookup_input_indices, lookup_input_values, lookup_input_shapes,\
+        lookup_input_weights
+
+  def add_lookup_op(self, lookup_input_indices, lookup_input_values,
+                    lookup_input_shapes, lookup_input_weights):
+    logging.info('add custom lookup operation to lookup embeddings from redis')
+    for i in range(len(lookup_input_values)):
+      if lookup_input_values[i].dtype == tf.int32:
+        lookup_input_values[i] = tf.to_int64(lookup_input_values[i])
+    self._lookup_outs = self._lookup_op.kv_lookup(
+        lookup_input_indices,
+        lookup_input_values,
+        lookup_input_shapes,
+        lookup_input_weights,
+        url=self._redis_url,
+        password=self._redis_passwd,
+        timeout=self._redis_timeout,
+        combiners=self._embed_combiners,
+        embedding_dims=self._embed_dims,
+        embedding_names=self._embed_ids,
+        version=self._meta_graph_version)
+
+    meta_graph_def = tf.train.export_meta_graph()
+
+    if self._verbose:
+      debug_path = os.path.join(self._debug_dir, 'graph_raw.txt')
+      with GFile(debug_path, 'w') as fout:
+        fout.write(
+            text_format.MessageToString(
+                self._meta_graph_def.graph_def, as_utf8=True))
+    return meta_graph_def
+
+  def bytes2str(self, x):
+    if bytes == str:
+      return x
+    else:
+      return x.decode('utf-8')
+
+  def clear_meta_graph_embeding(self, meta_graph_def):
+    logging.info('clear meta graph embedding_weights')
+
+    def _clear_embedding_in_meta_collect(meta_graph_def, collect_name):
+      tmp_vals = [
+          x
+          for x in meta_graph_def.collection_def[collect_name].bytes_list.value
+          if 'embedding_weights' not in self.bytes2str(x)
+      ]
+      meta_graph_def.collection_def[collect_name].bytes_list.ClearField('value')
+      for tmp_v in tmp_vals:
+        meta_graph_def.collection_def[collect_name].bytes_list.value.append(
+            tmp_v)
+
+    _clear_embedding_in_meta_collect(meta_graph_def, 'model_variables')
+    _clear_embedding_in_meta_collect(meta_graph_def, 'trainable_variables')
+    _clear_embedding_in_meta_collect(meta_graph_def, 'variables')
+
+    # clear Kv(pai embedding variable) ops in meta_info_def.stripped_op_list.op
+    kept_ops = [
+        x for x in meta_graph_def.meta_info_def.stripped_op_list.op
+        if x.name not in [
+            'InitializeKvVariableOp', 'KvResourceGather', 'KvResourceImportV2',
+            'KvVarHandleOp', 'KvVarIsInitializedOp', 'ReadKvVariableOp'
+        ]
+    ]
+    meta_graph_def.meta_info_def.stripped_op_list.ClearField('op')
+    meta_graph_def.meta_info_def.stripped_op_list.op.extend(kept_ops)
+    for tmp_op in meta_graph_def.meta_info_def.stripped_op_list.op:
+      if tmp_op.name == 'SaveV2':
+        for tmp_id, tmp_attr in enumerate(tmp_op.attr):
+          if tmp_attr.name == 'has_ev':
+            tmp_op.attr.remove(tmp_attr)
+            break
+
+  def clear_meta_collect(self, meta_graph_def):
+    drop_meta_collects = []
+    for key in meta_graph_def.collection_def:
+      val = meta_graph_def.collection_def[key]
+      if val.HasField('node_list'):
+        if 'embedding_weights' in val.node_list.value[
+            0] and 'easy_rec' not in val.node_list.value[0]:
+          drop_meta_collects.append(key)
+      elif key == 'saved_model_assets':
+        drop_meta_collects.append(key)
+    for key in drop_meta_collects:
+      meta_graph_def.collection_def.pop(key)
+
+  def remove_embedding_weights_and_update_lookup_outputs(self):
+
+    def _should_drop(name):
+      if '_embedding_weights' in name:
+        if self._verbose:
+          logging.info('[SHOULD_DROP] %s' % name)
+        return True
+
+    logging.info('remove embedding_weights node in graph_def.node')
+    logging.info(
+        'and replace the old embedding_lookup outputs with new lookup_op outputs'
+    )
+
+    for tid, node in enumerate(self._all_graph_nodes):
+      # drop the nodes
+      if _should_drop(node.name):
+        self._all_graph_node_flags[tid] = False
+      else:
+        for i in range(len(node.input)):
+          if _should_drop(node.input[i]):
+            input_name, _ = proto_util.get_norm_embed_name(
+                node.input[i], self._verbose)
+            print('REPLACE:' + node.input[i] + '=>' + input_name)
+            input_name = self._lookup_outs[self._feature_names.index(
+                input_name)].name
+            if input_name.endswith(':0'):
+              input_name = input_name.replace(':0', '')
+            node.input[i] = input_name
+
+  # drop by ids
+  def _drop_by_ids(self, tmp_obj, key, drop_ids):
+    keep_vals = [
+        x for i, x in enumerate(getattr(tmp_obj, key)) if i not in drop_ids
+    ]
+    tmp_obj.ClearField(key)
+    getattr(tmp_obj, key).extend(keep_vals)
+
+  def clear_save_restore(self):
+    """Clear save restore ops.
+
+    save/restore_all need save/restore_shard as input
+    save/restore_shard needs save/Assign_[0-N] as input
+    save/Assign_[0-N] needs save/RestoreV2 as input
+    save/RestoreV2 use save/RestoreV2/tensor_names and save/RestoreV2/shape_and_slices as input
+    edit [ save/RestoreV2/tensor_names save/RestoreV2/shape_and_slices save/RestoreV2 save/restore_shard ]
+    """
+    for tid, node in enumerate(self._all_graph_nodes):
+      if not self._all_graph_node_flags[tid]:
+        continue
+      if node.name == 'save/RestoreV2/tensor_names':
+        self._restore_tensor_node = node
+        break
+    # assert self._restore_tensor_node is not None, 'save/RestoreV2/tensor_names is not found'
+
+    if self._restore_tensor_node:
+      drop_ids = []
+      for tmp_id, tmp_name in enumerate(
+          self._restore_tensor_node.attr['value'].tensor.string_val):
+        if 'embedding_weights' in self.bytes2str(tmp_name):
+          drop_ids.append(tmp_id)
+
+      self._drop_by_ids(self._restore_tensor_node.attr['value'].tensor,
+                        'string_val', drop_ids)
+      keep_node_num = len(
+          self._restore_tensor_node.attr['value'].tensor.string_val)
+      logging.info(
+          'update self._restore_tensor_node: string_val keep_num = %d drop_num = %d'
+          % (keep_node_num, len(drop_ids)))
+      self._restore_tensor_node.attr['value'].tensor.tensor_shape.dim[
+          0].size = keep_node_num
+      self._restore_tensor_node.attr['_output_shapes'].list.shape[0].dim[
+          0].size = keep_node_num
+
+    logging.info(
+        'update save/RestoreV2, drop tensor_shapes, _output_shapes, related to embedding_weights'
+    )
+    self._restore_shard_node = None
+    for node_id, node in enumerate(self._all_graph_nodes):
+      if not self._all_graph_node_flags[tid]:
+        continue
+      if node.name == 'save/RestoreV2/shape_and_slices':
+        node.attr['value'].tensor.tensor_shape.dim[0].size = keep_node_num
+        node.attr['_output_shapes'].list.shape[0].dim[0].size = keep_node_num
+        self._drop_by_ids(node.attr['value'].tensor, 'string_val', drop_ids)
+      elif node.name == 'save/RestoreV2':
+        self._drop_by_ids(node.attr['_output_shapes'].list, 'shape', drop_ids)
+        self._drop_by_ids(node.attr['dtypes'].list, 'type', drop_ids)
+      elif node.name == 'save/restore_shard':
+        self._restore_shard_node = node
+      elif node.name.startswith('save/restore_all'):
+        self._restore_all_node.append(node)
+
+  def clear_save_assign(self):
+    logging.info(
+        'update save/Assign, drop tensor_shapes, _output_shapes, related to embedding_weights'
+    )
+    # edit save/Assign
+    drop_save_assigns = []
+    all_kv_drop = []
+    for tid, node in enumerate(self._all_graph_nodes):
+      if not self._all_graph_node_flags[tid]:
+        continue
+      if node.op == 'Assign' and 'save/Assign' in node.name and \
+         'embedding_weights' in node.input[0]:
+        drop_save_assigns.append('^' + node.name)
+        self._all_graph_node_flags[tid] = False
+      elif 'embedding_weights/ConcatPartitions/concat' in node.name:
+        self._all_graph_node_flags[tid] = False
+      elif node.name.endswith('/embedding_weights') and node.op == 'Identity':
+        self._all_graph_node_flags[tid] = False
+      elif 'save/KvResourceImportV2' in node.name and node.op == 'KvResourceImportV2':
+        drop_save_assigns.append('^' + node.name)
+        self._all_graph_node_flags[tid] = False
+      elif 'KvResourceImportV2' in node.name:
+        self._all_graph_node_flags[tid] = False
+      elif 'save/Const' in node.name and node.op == 'Const':
+        if '_class' in node.attr and 'embedding_weights' in node.attr[
+            '_class'].list.s[0]:
+          self._all_graph_node_flags[tid] = False
+      elif 'ReadKvVariableOp' in node.name and node.op == 'ReadKvVariableOp':
+        all_kv_drop.append(node.name)
+        self._all_graph_node_flags[tid] = False
+      elif node.op == 'Assign' and 'save/Assign' in node.name:
+        # update node(save/Assign_[0-N])'s input[1] by the position of
+        #     node.input[0] in save/RestoreV2/tensor_names
+        # the outputs of save/RestoreV2 is connected to save/Assign
+        tmp_id = [
+            self.bytes2str(x)
+            for x in self._restore_tensor_node.attr['value'].tensor.string_val
+        ].index(node.input[0])
+        if tmp_id != 0:
+          tmp_input2 = 'save/RestoreV2:%d' % tmp_id
+        else:
+          tmp_input2 = 'save/RestoreV2'
+        if tmp_input2 != node.input[1]:
+          if self._verbose:
+            logging.info("update save/Assign[%s]'s input from %s to %s" %
+                         (node.name, node.input[1], tmp_input2))
+          node.input[1] = tmp_input2
+
+    # save/restore_all need save/restore_shard as input
+    # save/restore_shard needs save/Assign_[0-N] as input
+    # save/Assign_[0-N] needs save/RestoreV2 as input
+    if self._restore_shard_node:
+      for tmp_input in drop_save_assigns:
+        self._restore_shard_node.input.remove(tmp_input)
+        if self._verbose:
+          logging.info('drop restore_shard input: %s' % tmp_input)
+    elif len(self._restore_all_node) > 0:
+      for tmp_input in drop_save_assigns:
+        for tmp_node in self._restore_all_node:
+          if tmp_input in tmp_node.input:
+            tmp_node.input.remove(tmp_input)
+            if self._verbose:
+              logging.info('drop %s input: %s' % (tmp_node.name, tmp_input))
+              break
+
+  def clear_save_v2(self):
+    """Clear SaveV2 ops.
+
+    save/Identity need [ save/MergeV2Checkpoints, save/control_dependency ]
+    as input. Save/MergeV2Checkpoints need [save/MergeV2Checkpoints/checkpoint_prefixes]
+    as input. Save/MergeV2Checkpoints/checkpoint_prefixes need [ save/ShardedFilename,
+    save/control_dependency ] as input. save/control_dependency need save/SaveV2 as input.
+    save/SaveV2 input: [ save/SaveV2/tensor_names, save/SaveV2/shape_and_slices ]
+    edit save/SaveV2  save/SaveV2/shape_and_slices save/SaveV2/tensor_names.
+    """
+    logging.info('update save/SaveV2 input shape, _output_shapes, tensor_shape')
+    save_drop_ids = []
+    for tid, node in enumerate(self._all_graph_nodes):
+      if not self._all_graph_node_flags[tid]:
+        continue
+      if node.name == 'save/SaveV2' and node.op == 'SaveV2':
+        for tmp_id, tmp_input in enumerate(node.input):
+          if '/embedding_weights' in tmp_input:
+            save_drop_ids.append(tmp_id)
+        diff_num = len(node.input) - len(node.attr['dtypes'].list.type)
+        self._drop_by_ids(node, 'input', save_drop_ids)
+        save_drop_ids = [x - diff_num for x in save_drop_ids]
+        self._drop_by_ids(node.attr['dtypes'].list, 'type', save_drop_ids)
+        if 'has_ev' in node.attr:
+          del node.attr['has_ev']
+    for node in self._all_graph_nodes:
+      if node.name == 'save/SaveV2/shape_and_slices' and node.op == 'Const':
+        # _output_shapes # size # string_val
+        node.attr['_output_shapes'].list.shape[0].dim[0].size -= len(
+            save_drop_ids)
+        node.attr['value'].tensor.tensor_shape.dim[0].size -= len(save_drop_ids)
+        self._drop_by_ids(node.attr['value'].tensor, 'string_val',
+                          save_drop_ids)
+      elif node.name == 'save/SaveV2/tensor_names':
+        # tensor_names may not have the same order as save/SaveV2/shape_and_slices
+        tmp_drop_ids = [
+            tmp_id for tmp_id, tmp_val in enumerate(
+                node.attr['value'].tensor.string_val)
+            if 'embedding_weights' in self.bytes2str(tmp_val)
+        ]
+        # attr['value'].tensor.string_val  # tensor_shape  # size
+        assert len(save_drop_ids) == len(save_drop_ids)
+        node.attr['_output_shapes'].list.shape[0].dim[0].size -= len(
+            tmp_drop_ids)
+        node.attr['value'].tensor.tensor_shape.dim[0].size -= len(tmp_drop_ids)
+        self._drop_by_ids(node.attr['value'].tensor, 'string_val', tmp_drop_ids)
+
+  def clear_initialize(self):
+    """Clear initialization ops.
+
+    */read(Identity) depend on [*(VariableV2)]
+    */Assign depend on [*/Initializer/*, *(VariableV2)]
+    drop embedding_weights initialization nodes
+    */embedding_weights/part_x [,/Assign,/read]
+    */embedding_weights/part_1/Initializer/truncated_normal [,/shape,/mean,/stddev,/TruncatedNormal,/mul]
+    """
+    logging.info('Remove Initialization nodes for embedding_weights')
+    for tid, node in enumerate(self._all_graph_nodes):
+      if not self._all_graph_node_flags[tid]:
+        continue
+      if 'embedding_weights' in node.name and 'Initializer' in node.name:
+        self._all_graph_node_flags[tid] = False
+      elif 'embedding_weights' in node.name and 'Assign' in node.name:
+        self._all_graph_node_flags[tid] = False
+      elif 'embedding_weights' in node.name and node.op == 'VariableV2':
+        self._all_graph_node_flags[tid] = False
+      elif 'embedding_weights' in node.name and node.name.endswith(
+          '/read') and node.op == 'Identity':
+        self._all_graph_node_flags[tid] = False
+      elif 'embedding_weights' in node.name and node.op == 'Identity':
+        node_toks = node.name.split('/')
+        node_tok = node_toks[-1]
+        if 'embedding_weights_' in node_tok:
+          node_tok = node_tok[len('embedding_weights_'):]
+          try:
+            int(node_tok)
+            self._all_graph_node_flags[tid] = False
+          except Exception:
+            pass
+
+  def clear_embedding_variable(self):
+    # for pai embedding variable, we drop some special nodes
+    for tid, node in enumerate(self._all_graph_nodes):
+      if not self._all_graph_node_flags[tid]:
+        continue
+      if node.op in [
+          'ReadKvVariableOp', 'KvVarIsInitializedOp', 'KvVarHandleOp'
+      ]:
+        self._all_graph_node_flags[tid] = False
+
+  # there maybe some nodes depend on the dropped nodes, they are dropped as well
+  def drop_dependent_nodes(self):
+    drop_names = [
+        tmp_node.name
+        for tid, tmp_node in enumerate(self._all_graph_nodes)
+        if not self._all_graph_node_flags[tid]
+    ]
+    while True:
+      more_drop_names = []
+      for tid, tmp_node in enumerate(self._all_graph_nodes):
+        if not self._all_graph_node_flags[tid]:
+          continue
+        if len(tmp_node.input) > 0 and tmp_node.input[0] in drop_names:
+          logging.info('drop dependent node: %s depend on %s' %
+                       (tmp_node.name, tmp_node.input[0]))
+          self._all_graph_node_flags[tid] = False
+          more_drop_names.append(tmp_node.name)
+      drop_names = more_drop_names
+      if not drop_names:
+        break
+
+  def edit_graph(self):
+    # the main entrance
+    lookup_input_indices, lookup_input_values, lookup_input_shapes,\
+        lookup_input_weights = self.find_lookup_inputs()
+
+    # add lookup op to the graph
+    self._meta_graph_def = self.add_lookup_op(lookup_input_indices,
+                                              lookup_input_values,
+                                              lookup_input_shapes,
+                                              lookup_input_weights)
+
+    self.clear_meta_graph_embeding(self._meta_graph_def)
+
+    self.clear_meta_collect(self._meta_graph_def)
+
+    self.init_graph_node_clear_flags()
+
+    self.remove_embedding_weights_and_update_lookup_outputs()
+
+    # save/RestoreV2
+    self.clear_save_restore()
+
+    # save/Assign
+    self.clear_save_assign()
+
+    # save/SaveV2
+    self.clear_save_v2()
+
+    self.clear_initialize()
+
+    self.clear_embedding_variable()
+
+    self.drop_dependent_nodes()
+
+    self._meta_graph_def.graph_def.ClearField('node')
+    self._meta_graph_def.graph_def.node.extend([
+        x for tid, x in enumerate(self._all_graph_nodes)
+        if self._all_graph_node_flags[tid]
+    ])
+
+    logging.info('old node number = %d' % self._old_node_num)
+    logging.info('node number = %d' % len(self._meta_graph_def.graph_def.node))
+
+    if self._verbose:
+      debug_dump_path = os.path.join(self._debug_dir, 'graph.txt')
+      with GFile(debug_dump_path, 'w') as fout:
+        fout.write(text_format.MessageToString(self.graph_def, as_utf8=True))
+      debug_dump_path = os.path.join(self._debug_dir, 'meta_graph.txt')
+      with GFile(debug_dump_path, 'w') as fout:
+        fout.write(
+            text_format.MessageToString(self._meta_graph_def, as_utf8=True))
diff --git a/easy_rec/python/utils/odps_util.py b/easy_rec/python/utils/odps_util.py
index fcb6ff919..99e33bc32 100644
--- a/easy_rec/python/utils/odps_util.py
+++ b/easy_rec/python/utils/odps_util.py
@@ -43,7 +43,7 @@ def check_input_field_and_types(data_config):
 
   selected_cols = selected_cols.split(',')
   for x in input_fields:
-    assert x in selected_cols, 'column %s is not in table %s' % x
+    assert x in selected_cols, 'column %s is not in table' % x
   if selected_col_types:
     selected_types = selected_col_types.split(',')
     type_map = {x: y for x, y in zip(selected_cols, selected_types)}
diff --git a/easy_rec/python/utils/pai_util.py b/easy_rec/python/utils/pai_util.py
index 855ac26f4..de7ce99aa 100644
--- a/easy_rec/python/utils/pai_util.py
+++ b/easy_rec/python/utils/pai_util.py
@@ -55,7 +55,8 @@ def process_config(configs, task_index=0, worker_num=1):
   configs = configs.split(',')
   if len(configs) > 1:
     assert len(configs) == worker_num, \
-        'number of configs must be equal to number of workers, when number of configs > 1'
+        'number of configs must be equal to number of workers,' + \
+        ' when number of configs > 1'
     config = configs[task_index]
   else:
     config = configs[0]
@@ -63,6 +64,10 @@ def process_config(configs, task_index=0, worker_num=1):
   if config[:4] == 'http':
     return download(config)
   elif config[:3] == 'oss':
+    if '/##/' in config:
+      config = config.replace('/##/', '\x02')
+    if '/#/' in config:
+      config = config.replace('/#/', '\x01')
     return config
   else:
     # allow to use this entry file to run experiments from local env
diff --git a/easy_rec/python/utils/proto_util.py b/easy_rec/python/utils/proto_util.py
index 91d786c96..7f73e438d 100644
--- a/easy_rec/python/utils/proto_util.py
+++ b/easy_rec/python/utils/proto_util.py
@@ -1,5 +1,6 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
 
 
 def copy_obj(proto_obj):
@@ -13,3 +14,55 @@ def copy_obj(proto_obj):
   tmp_obj = type(proto_obj)()
   tmp_obj.CopyFrom(proto_obj)
   return tmp_obj
+
+
+def get_norm_embed_name(name, verbose=False):
+  """For embedding export to redis.
+
+  Args:
+    name: variable name
+    verbose: whether to dump the embed_names
+  Return:
+    embedding_name: normalized embedding_name
+    embedding_part_id: normalized embedding part_id
+    if embedding_weights not in name, return None, None
+  """
+  name_toks = name.split('/')
+  for i in range(0, len(name_toks) - 1):
+    if name_toks[i + 1].startswith('embedding_weights:'):
+      var_id = name_toks[i + 1].replace('embedding_weights:', '')
+      tmp_name = '/'.join(name_toks[:i + 1])
+      if var_id != '0':
+        tmp_name = tmp_name + '_' + var_id
+      if verbose:
+        logging.info('norm %s to %s' % (name, tmp_name))
+      return tmp_name, 0
+    if i > 1 and name_toks[i + 1].startswith('part_') and \
+       name_toks[i] == 'embedding_weights':
+      tmp_name = '/'.join(name_toks[:i])
+      part_id = name_toks[i + 1].replace('part_', '')
+      part_toks = part_id.split(':')
+      if len(part_toks) >= 2 and part_toks[1] != '0':
+        tmp_name = tmp_name + '_' + part_toks[1]
+      if verbose:
+        logging.info('norm %s to %s' % (name, tmp_name))
+      return tmp_name, int(part_toks[0])
+
+  # input_layer/app_category_embedding/app_category_embedding_weights/SparseReshape
+  # => input_layer/app_category_embedding
+  for i in range(0, len(name_toks) - 1):
+    if name_toks[i + 1].endswith('_embedding_weights'):
+      tmp_name = '/'.join(name_toks[:i + 1])
+      if verbose:
+        logging.info('norm %s to %s' % (name, tmp_name))
+      return tmp_name, 0
+  # input_layer/app_category_embedding/embedding_weights
+  # => input_layer/app_category_embedding
+  for i in range(0, len(name_toks) - 1):
+    if name_toks[i + 1] == 'embedding_weights':
+      tmp_name = '/'.join(name_toks[:i + 1])
+      if verbose:
+        logging.info('norm %s to %s' % (name, tmp_name))
+      return tmp_name, 0
+  logging.warning('Failed to norm: %s' % name)
+  return None, None
diff --git a/easy_rec/python/utils/test_utils.py b/easy_rec/python/utils/test_utils.py
index df0ddef95..8423f9422 100644
--- a/easy_rec/python/utils/test_utils.py
+++ b/easy_rec/python/utils/test_utils.py
@@ -6,7 +6,7 @@
 """
 from future import standard_library
 standard_library.install_aliases()
-
+import yaml
 import glob
 import json
 import logging
@@ -18,15 +18,25 @@
 import time
 from multiprocessing import Process
 from subprocess import getstatusoutput
-
+from tensorflow.python.platform import gfile
 import numpy as np
-
 from easy_rec.python.protos.train_pb2 import DistributionStrategy
 from easy_rec.python.utils import config_util
+from easy_rec.python.protos.pipeline_pb2 import EasyRecConfig
 
 TEST_DIR = '/tmp/easy_rec_test'
 
 
+def get_hdfs_tmp_dir(test_dir):
+  """Create a randomly of directory  in HDFS."""
+  tmp_name = ''.join(
+      [random.choice(string.ascii_letters + string.digits) for i in range(8)])
+  assert isinstance(test_dir, str)
+  test_rand_dir = os.path.join(test_dir, tmp_name)
+  gfile.MkDir(test_rand_dir)
+  return test_rand_dir
+
+
 def get_tmp_dir():
   tmp_name = ''.join(
       [random.choice(string.ascii_letters + string.digits) for i in range(8)])
@@ -94,6 +104,12 @@ def clean_up(test_dir):
   set_gpu_id(None)
 
 
+def clean_up_hdfs(test_dir):
+  if gfile.Exists(test_dir):
+    gfile.DeleteRecursively(test_dir)
+  set_gpu_id(None)
+
+
 def _replace_data_for_test(data_path):
   """Replace real data with test data."""
   test_data = {}
@@ -123,35 +139,87 @@ def _load_config_for_test(pipeline_config_path, test_dir, total_steps=50):
   eval_config = pipeline_config.eval_config
   data_config = pipeline_config.data_config
 
-  # change data change for short testing
-  # pipeline_config.train_input_path = test_utils.replace_data_for_test(pipeline_config.train_input_path)
-  # pipeline_config.test_input_path = test_utils.replace_data_for_test(pipeline_config.test_input_path)
-
   train_config.num_steps = total_steps
+  # change model_dir
   pipeline_config.model_dir = test_dir + '/train'
-
+  logging.info('test_model_dir %s' % pipeline_config.model_dir)
   eval_config.num_examples = max(10, data_config.batch_size)
   data_config.num_epochs = 0
   return pipeline_config
 
 
+def test_datahub_train_eval(pipeline_config_path,
+                            test_dir,
+                            process_pipeline_func=None,
+                            hyperparam_str='',
+                            total_steps=50,
+                            post_check_func=None):
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+
+  if not isinstance(pipeline_config_path, EasyRecConfig):
+    logging.info('testing pipeline config %s' % pipeline_config_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+
+  if isinstance(pipeline_config_path, EasyRecConfig):
+    pipeline_config = pipeline_config_path
+  else:
+    pipeline_config = _load_config_for_test(pipeline_config_path, test_dir,
+                                            total_steps)
+
+  pipeline_config.train_config.train_distribute = 0
+  pipeline_config.train_config.num_gpus_per_worker = 1
+  pipeline_config.train_config.sync_replicas = False
+  if process_pipeline_func is not None:
+    assert callable(process_pipeline_func)
+    pipeline_config = process_pipeline_func(pipeline_config)
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  train_cmd = 'python3 -m easy_rec.python.train_eval --pipeline_config_path %s %s' % (
+      test_pipeline_config_path, hyperparam_str)
+  proc = run_cmd(train_cmd, '%s/log_%s.txt' % (test_dir, 'master'))
+  proc.wait()
+  if proc.returncode != 0:
+    logging.error('train %s failed' % test_pipeline_config_path)
+    return False
+  if post_check_func:
+    return post_check_func(pipeline_config)
+  return True
+
+
+def _Load_config_for_test_eval(pipeline_config_path):
+  pipeline_config = config_util.get_configs_from_pipeline_file(
+      pipeline_config_path)
+  return pipeline_config
+
+
 def test_single_train_eval(pipeline_config_path,
                            test_dir,
                            process_pipeline_func=None,
                            hyperparam_str='',
-                           total_steps=50):
+                           total_steps=50,
+                           post_check_func=None):
   gpus = get_available_gpus()
   if len(gpus) > 0:
     set_gpu_id(gpus[0])
   else:
     set_gpu_id(None)
 
-  logging.info('testing pipeline config %s' % pipeline_config_path)
+  if not isinstance(pipeline_config_path, EasyRecConfig):
+    logging.info('testing pipeline config %s' % pipeline_config_path)
   if 'TF_CONFIG' in os.environ:
     del os.environ['TF_CONFIG']
 
-  pipeline_config = _load_config_for_test(pipeline_config_path, test_dir,
-                                          total_steps)
+  if isinstance(pipeline_config_path, EasyRecConfig):
+    pipeline_config = pipeline_config_path
+  else:
+    pipeline_config = _load_config_for_test(pipeline_config_path, test_dir,
+                                            total_steps)
+
   pipeline_config.train_config.train_distribute = 0
   pipeline_config.train_config.num_gpus_per_worker = 1
   pipeline_config.train_config.sync_replicas = False
@@ -165,7 +233,135 @@ def test_single_train_eval(pipeline_config_path,
   proc = run_cmd(train_cmd, '%s/log_%s.txt' % (test_dir, 'master'))
   proc.wait()
   if proc.returncode != 0:
-    logging.error('train %s failed' % pipeline_config_path)
+    logging.error('train %s failed' % test_pipeline_config_path)
+    return False
+  if post_check_func:
+    return post_check_func(pipeline_config)
+  return True
+
+
+def yaml_replace(train_yaml_path,
+                 pipline_config_path,
+                 test_pipeline_config_path,
+                 test_export_dir=None):
+  with open(train_yaml_path, 'r', encoding='utf-8') as _file:
+    sample = _file.read()
+    x = yaml.load(sample)
+    _command = x['app']['command']
+    if test_export_dir is not None:
+      _command = _command.replace(pipline_config_path,
+                                  test_pipeline_config_path).replace(
+                                      '{EXPOERT_DIR}', test_export_dir)
+    else:
+      _command = _command.replace(pipline_config_path,
+                                  test_pipeline_config_path)
+    x['app']['command'] = _command
+
+  with open(train_yaml_path, 'w', encoding='utf-8') as _file:
+    yaml.dump(x, _file)
+
+
+def test_hdfs_train_eval(pipeline_config_path,
+                         train_yaml_path,
+                         test_dir,
+                         process_pipeline_func=None,
+                         hyperparam_str='',
+                         total_steps=2000):
+
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  logging.info('testing pipeline config %s' % pipeline_config_path)
+  logging.info('train_yaml_path %s' % train_yaml_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+  pipeline_config = _load_config_for_test(pipeline_config_path, test_dir,
+                                          total_steps)
+  logging.info('model_dir in pipeline_config has been modified')
+  pipeline_config.train_config.train_distribute = 0
+  pipeline_config.train_config.num_gpus_per_worker = 1
+  pipeline_config.train_config.sync_replicas = False
+  if process_pipeline_func is not None:
+    assert callable(process_pipeline_func)
+    pipeline_config = process_pipeline_func(pipeline_config)
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  yaml_replace(train_yaml_path, pipeline_config_path, test_pipeline_config_path)
+  logging.info('test_pipeline_config_path is %s' % test_pipeline_config_path)
+  train_cmd = 'el_submit -yaml %s' % train_yaml_path
+  proc = subprocess.Popen(train_cmd.split(), stderr=subprocess.STDOUT)
+  proc.wait()
+  if proc.returncode != 0:
+    logging.error('train %s failed' % test_pipeline_config_path)
+    logging.error('train_yaml %s failed' % train_yaml_path)
+  return proc.returncode == 0
+
+
+def test_hdfs_eval(pipeline_config_path,
+                   eval_yaml_path,
+                   test_dir,
+                   process_pipeline_func=None,
+                   hyperparam_str=''):
+
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  logging.info('testing export pipeline config %s' % pipeline_config_path)
+  logging.info('eval_yaml_path %s' % eval_yaml_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+  pipeline_config = _Load_config_for_test_eval(pipeline_config_path)
+  if process_pipeline_func is not None:
+    assert callable(process_pipeline_func)
+    pipeline_config = process_pipeline_func(pipeline_config)
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  yaml_replace(eval_yaml_path, pipeline_config_path, test_pipeline_config_path)
+  logging.info('test_pipeline_config_path is %s' % test_pipeline_config_path)
+  eval_cmd = 'el_submit -yaml %s' % eval_yaml_path
+  proc = subprocess.Popen(eval_cmd.split(), stderr=subprocess.STDOUT)
+  proc.wait()
+  if proc.returncode != 0:
+    logging.error('eval %s failed' % test_pipeline_config_path)
+    logging.error('eval_yaml %s failed' % eval_yaml_path)
+  return proc.returncode == 0
+
+
+def test_hdfs_export(pipeline_config_path,
+                     export_yaml_path,
+                     test_dir,
+                     process_pipeline_func=None,
+                     hyperparam_str=''):
+
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  logging.info('testing export pipeline config %s' % pipeline_config_path)
+  logging.info('export_yaml_path %s' % export_yaml_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+  pipeline_config = _Load_config_for_test_eval(pipeline_config_path)
+  if process_pipeline_func is not None:
+    assert callable(process_pipeline_func)
+    pipeline_config = process_pipeline_func(pipeline_config)
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  test_export_path = os.path.join(test_dir, 'export_dir')
+  yaml_replace(export_yaml_path, pipeline_config_path,
+               test_pipeline_config_path, test_export_path)
+  logging.info('test_pipeline_config_path is %s' % test_pipeline_config_path)
+  eval_cmd = 'el_submit -yaml %s' % export_yaml_path
+  proc = subprocess.Popen(eval_cmd.split(), stderr=subprocess.STDOUT)
+  proc.wait()
+  if proc.returncode != 0:
+    logging.error('export %s failed' % test_pipeline_config_path)
+    logging.error('export_yaml %s failed' % export_yaml_path)
   return proc.returncode == 0
 
 
diff --git a/easy_rec/version.py b/easy_rec/version.py
index 4c5bc56eb..8f06a347b 100644
--- a/easy_rec/version.py
+++ b/easy_rec/version.py
@@ -1,3 +1,3 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
-__version__ = '0.1.0'
+__version__ = '0.1.4'
diff --git a/pai_jobs/deploy.sh b/pai_jobs/deploy.sh
new file mode 100644
index 000000000..872f09175
--- /dev/null
+++ b/pai_jobs/deploy.sh
@@ -0,0 +1,17 @@
+curr_path=`readlink -f $0`
+curr_dir=`dirname $curr_path`
+root_dir=`dirname $curr_dir`
+
+cd $root_dir
+sh scripts/gen_proto.sh
+if [ $? -ne 0 ]
+then
+  echo "generate proto file failed"
+  exit 1
+fi
+
+cd $curr_dir
+rm -rf easy_rec
+ln -s $root_dir/easy_rec ./
+find -L easy_rec -name "*.pyc" | xargs rm -rf
+tar -cvzhf easy_rec.tar.gz easy_rec run.py
diff --git a/pai_jobs/deploy_ext.sh b/pai_jobs/deploy_ext.sh
new file mode 100644
index 000000000..6c5f76e89
--- /dev/null
+++ b/pai_jobs/deploy_ext.sh
@@ -0,0 +1,96 @@
+curr_path=`readlink -f $0`
+curr_dir=`dirname $curr_path`
+root_dir=`dirname $curr_dir`
+
+VERSION=""
+ODPSCMD=odpscmd
+resource_only=0
+odps_config=""
+
+while getopts 'V:C:Oc:' OPT; do
+    case $OPT in
+        V)
+            VERSION="$OPTARG";;
+        C)
+            ODPSCMD="$OPTARG";;
+        c)
+            odps_config="$OPTARG";;
+        O)
+            resource_only=1;;
+        ?)
+            echo "Usage: `basename $0` -V VERSION [-C odpscmd_path] [-c odps_config_path] [-O]"
+            echo " -O: only update easy_rec resource file"
+            echo " -c: odps_config file path"
+            echo " -C: odpscmd file path, default to: odpscmd, so in default odpscmd must be in PATH"
+            echo " -V: algorithm version, chars must be in [0-9A-Za-z_-]"
+            exit 1
+    esac
+done
+
+if [ -z "$VERSION" ]
+then
+  echo "algorithm version(-V) is not set."
+  exit 1
+fi
+
+ODPSCMD=`which $ODPSCMD`
+if [ $? -ne 0 ]
+then
+   echo "$ODPSCMD is not in PATH"
+   exit 1
+fi
+
+if [ ! -e $odps_config ]
+then
+  echo "$odps_config does not exist"
+  exit 1
+fi
+odps_config=`readlink -f $odps_config`
+
+cd $root_dir
+bash scripts/gen_proto.sh
+if [ $? -ne 0 ]
+then
+  echo "generate proto file failed"
+  exit 1
+fi
+
+cd $curr_dir
+
+RES_PATH=easy_rec_ext_${VERSION}_res.tar.gz
+ln -s $root_dir/easy_rec ./
+cp easy_rec/__init__.py easy_rec/__init__.py.bak
+sed -i -e "s/\[VERSION\]/$VERSION/g" easy_rec/__init__.py
+find -L easy_rec -name "*.pyc" | xargs rm -rf
+ln -s ../requirements.txt ./
+tar -cvzhf $RES_PATH easy_rec run.py requirements.txt
+mv easy_rec/__init__.py.bak easy_rec/__init__.py
+${ODPSCMD} --config=$odps_config -e "add file $RES_PATH -f;"
+if [ $? -ne 0 ]
+then
+  echo "add $RES_PATH failed"
+  exit 1
+fi
+if [ $resource_only -gt 0 ]
+then
+  echo "add $RES_PATH succeed, version=${VERSION}"
+  echo "[WARNING] will not update xflow"
+  echo "   your must specify -Dversion=${VERSION} when run pai -name easy_rec_ext"
+  exit 0
+fi
+#rm -rf $RES_PATH
+
+cd easy_rec_flow_ex
+sed -i -e "s/parameter name=\"version\" use=\"optional\" default=\"[0-9A-Za-z_-]\+\"/parameter name=\"version\" use=\"optional\" default=\"$VERSION\"/g" easy_rec_ext.xml
+tar -cvzf easy_rec_flow_ex.tar.gz easy_rec_ext.lua  easy_rec_ext.xml
+cd ../xflow-deploy
+package=../easy_rec_flow_ex/easy_rec_flow_ex.tar.gz
+python xflow_deploy.py conf=${odps_config} package=$package
+if [ $? -ne 0 ]
+then
+   echo "deploy $package failed"
+   exit 1
+else
+   echo "deploy $package succeed"
+fi
+rm -rf ../easy_rec_flow_ex/easy_rec_flow_ex ../easy_rec_flow_ex/easy_rec_flow_ex.tar.gz
diff --git a/pai_jobs/easy_rec_flow/easy_rec.xml b/pai_jobs/easy_rec_flow/easy_rec.xml
new file mode 100644
index 000000000..5af6b28f9
--- /dev/null
+++ b/pai_jobs/easy_rec_flow/easy_rec.xml
@@ -0,0 +1,214 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xflow xmlns="odps:xflow:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="easy_rec_ext" category="deep_learning" ref_resource="xxxxxxxx/resources/xxxxxxxx" comments="easy-rec" catalog="String" xsi:schemaLocation="">
+  <parameters>
+    <parameter name="config" use="optional" default=""/>
+    <parameter name="script" use="optional" default=""/>
+    <parameter name="entryFile" use="optional" default="run.py"/>
+    <parameter name="volumes" use="optional" default=""/>
+    <parameter name="buckets" use="required" default=""/>
+    <parameter name="tables" use="optional" default=""/>
+    <parameter name="outputs" use="optional" default=""/>
+    <parameter name="cmd" use="optional" default="train"/>
+    <parameter name="train_tables" use="optional" default=""/>
+    <parameter name="eval_tables" use="optional" default=""/>
+    <parameter name="boundary_table" use="optional" default=""/>
+    <parameter name="continue_train" use="optional" default="true"/>
+    <parameter name="export_dir" use="optional" default=""/>
+    <parameter name="cluster" use="optional"/>
+    <parameter name="gpuRequired" use="optional" default=""/>
+    <parameter name="cpuRequired" use="optional" default="1200"/>
+    <parameter name="memRequired" use="optional" default="30000"/>
+    <parameter name="with_evaluator" use="optional" default=""/>
+    <parameter name="eval_method" use="optional" default="separate"/>
+    <parameter name="distribute_strategy" use="optional" default=""/>
+    <!-- for train only -->
+    <parameter name="edit_config_json" use="optional" default=""/>
+    <!-- for evaluate only -->
+    <parameter name="checkpoint_path" use="optional" default=""/>
+    <parameter name="eval_result_path" use="optional" default="eval_result.txt"/>
+    <!--for cmd=predict only-->
+    <parameter name="saved_model_dir" use="optional" default="" />
+    <parameter name="input_table" use="optional" default=""/>
+    <parameter name="output_table" use="optional" default=""/>
+    <parameter name="selected_cols" use="optional" default=""/>
+    <parameter name="excluded_cols" use="optional" default=""/>
+    <!-- specify ALL_COLUMNS to include all columns to the final set -->
+    <parameter name="reserved_cols" use="optional" default=""/>
+    <parameter name="lifecycle" use="optional" default="10"/>
+    <parameter name="output_cols" use="optional" default="probs double"/>
+    <parameter name="model_outputs" use="optional" default=""/>
+    <parameter name="batch_size" use="optional" default="32"/>
+    <!-- for hyperparameter tuning -->
+    <parameter name="model_dir" use="optional" default=""/>
+    <parameter name="hpo_param_path" use="optional" default=""/>
+    <parameter name="hpo_metric_save_path" use="optional" default=""/>
+    <parameter name="profiling_file" use="optional" default=""/>
+    <!-- for resources and version control -->
+    <parameter name="version" use="optional" default="oppo_release_v2"/>
+    <parameter name="res_project" use="optional" default="algo_public"/>
+    <!-- for mask feature for eval -->
+    <parameter name="mask_feature_name" use="optional" default=""/>
+    <!-- for train/evaluate/export/predict -->
+    <parameter name="extra_params" use="optional" default=""/>
+    <parameter name="useSparseClusterSchema" use="optional" default="false"/>
+    <parameter name="autoEnablePsTaskFailover" use="optional" default="false"/>
+  </parameters>
+
+  <workflow>
+    <start to="getEntry" />
+    <action name="getEntry">
+      <script>
+        <input_vars>
+          <var name="script_in" value="${parameters.script}" />
+          <var name="entryFile_in" value="${parameters.entryFile}" />
+          <var name="config" value="${parameters.config}" />
+          <var name="cluster" value="${parameters.cluster}" />
+          <var name="res_project" value="${parameters.res_project}" />
+          <var name="version" value="${parameters.version}" />
+        </input_vars>
+        <output_vars>
+          <var name="script"/>
+          <var name="entryFile"/>
+        </output_vars>
+        <function>getEntry</function>
+      </script>
+      <ok to="parseTable" />
+      <error to="failed" />
+    </action>
+
+    <action name="parseTable">
+      <script>
+        <input_vars>
+          <var name="cmd" value="${parameters.cmd}"/>
+          <var name="inputTable" value="${parameters.input_table}" />
+          <var name="outputTable" value="${parameters.output_table}" />
+          <var name="selectedCols" value="${parameters.selected_cols}" />
+          <var name="excludedCols" value="${parameters.excluded_cols}" />
+          <var name="reservedCols" value="${parameters.reserved_cols}" />
+          <var name="lifecycle" value="${parameters.lifecycle}" />
+          <var name="outputCol" value="${parameters.output_cols}" />
+          <var name="tables" value="${parameters.tables}" />
+          <var name="trainTables" value="${parameters.train_tables}" />
+          <var name="evalTables" value="${parameters.eval_tables}" />
+          <var name="boundaryTable" value="${parameters.boundary_table}" />
+        </input_vars>
+        <output_vars>
+          <var name="all_cols"/>
+          <var name="all_col_types"/>
+          <var name="selected_cols"/>
+          <var name="reserved_cols"/>
+          <var name="create_table_sql"/>
+          <var name="add_partition_sql"/>
+          <var name="tables"/>
+        </output_vars>
+        <function>parseTable</function>
+      </script>
+      <ok to="createPredictTable" />
+      <error to="failed" />
+    </action>
+
+    <action name="createPredictTable">
+       <SQL>
+         <sql>${workflow.parseTable.create_table_sql}</sql>
+       </SQL>
+       <ok to="addPartition" />
+       <error to="failed" />
+    </action>
+    <action name="addPartition">
+      <SQL>
+        <sql>${workflow.parseTable.add_partition_sql}</sql>
+      </SQL>
+      <ok to="getHyperParams" />
+      <error to="failed" />
+    </action>
+
+    <action name="getHyperParams">
+      <script>
+        <input_vars>
+          <var name="config" value="${parameters.config}" />
+          <var name="cmd" value="${parameters.cmd}" />
+          <var name="checkpoint_path" value="${parameters.checkpoint_path}" />
+          <var name="eval_result_path" value="${parameters.eval_result_path}" />
+          <var name="export_dir" value="${parameters.export_dir}" />
+          <var name="gpuRequired" value="${parameters.gpuRequired}" />
+          <var name="cpuRequired" value="${parameters.cpuRequired}" />
+          <var name="memRequired" value="${parameters.memRequired}" />
+          <var name="cluster" value="${parameters.cluster}" />
+          <var name="continue_train" value="${parameters.continue_train}" />
+          <var name="distribute_strategy" value="${parameters.distribute_strategy}" />
+          <var name="with_evaluator" value="${parameters.with_evaluator}"/>
+          <var name="eval_method" value="${parameters.eval_method}"/>
+
+          <!-- edit pipeline_config from cmd line -->
+          <var name="edit_config_json" value="${parameters.edit_config_json}"/>
+
+          <!-- for both train/evaluate/predict -->
+          <var name="selected_cols" value="${workflow.parseTable.selected_cols}"/>
+
+          <!-- for hyperparameter tuning -->
+          <var name="model_dir" value="${parameters.model_dir}"/>
+          <var name="hpo_param_path" value="${parameters.hpo_param_path}"/>
+          <var name="hpo_metric_save_path" value="${parameters.hpo_metric_save_path}"/>
+          <!-- for cmd = predict only -->
+          <var name="saved_model_dir" value="${parameters.saved_model_dir}"/>
+          <var name="all_cols" value="${workflow.parseTable.all_cols}"/>
+          <var name="all_col_types" value="${workflow.parseTable.all_col_types}"/>
+          <var name="reserved_cols" value="${workflow.parseTable.reserved_cols}"/>
+          <var name="output_cols" value="${parameters.output_cols}"/>
+          <var name="model_outputs" value="${parameters.model_outputs}"/>
+          <var name="input_table" value="${parameters.input_table}"/>
+          <var name="output_table" value="${parameters.output_table}"/>
+          <var name="tables" value="${workflow.parseTable.tables}"/>
+          <!-- separate train and test -->
+          <var name="train_tables" value="${parameters.train_tables}"/>
+          <var name="eval_tables" value="${parameters.eval_tables}"/>
+          <var name="boundary_table" value="${parameters.boundary_table}"/>
+          <var name="batch_size" value="${parameters.batch_size}"/>
+          <!-- for save predict timeline stats -->
+          <var name="profiling_file" value="${parameters.profiling_file}"/>
+          <!-- for mask feature for eval -->
+          <var name="mask_feature_name" value="${parameters.mask_feature_name}"/>
+          <!-- for extra parameters -->
+          <var name="extra_params" value="${parameters.extra_params}"/>
+        </input_vars>
+        <output_vars>
+          <var name="hyperParameters"/>
+          <var name="cluster"/>
+          <var name="tables"/>
+          <var name="outputs"/>
+        </output_vars>
+        <function>getHyperParams</function>
+      </script>
+      <ok to="runTF" />
+      <error to="failed" />
+    </action>
+
+    <action name="runTF">
+      <sub-workflow>
+        <parameters>
+          <p name="script" value="${workflow.getEntry.script}"/>
+          <p name="entryFile" value="${workflow.getEntry.entryFile}"/>
+          <p name="volumes" value="${parameters.volumes}"/>
+          <p name="buckets" value="${parameters.buckets}"/>
+          <p name="tables" value="${workflow.getHyperParams.tables}"/>
+          <p name="outputs" value="${workflow.getHyperParams.outputs}"/>
+          <p name="cluster" value="${workflow.getHyperParams.cluster}"/>
+          <p name="gpuRequired" value="${parameters.gpuRequired}"/>
+          <p name="userDefinedParameters" value="${workflow.getHyperParams.hyperParameters}"/>
+	  <p name="useSparseClusterSchema" value="${parameters.useSparseClusterSchema}"/>
+          <p name="autoEnablePsTaskFailover" value="${parameters.autoEnablePsTaskFailover}"/>
+        </parameters>
+        <name>tensorflow1120</name>
+        <project>algo_public</project>
+      </sub-workflow>
+      <ok to="end" />
+      <error to="failed" />
+    </action>
+    <fail name="failed">
+      <code>1</code>
+      <message>job failed</message>
+    </fail>
+    <end name="end"/>
+  </workflow>
+
+</xflow>
diff --git a/pai_jobs/easy_rec_flow_ex/easy_rec_ext.lua b/pai_jobs/easy_rec_flow_ex/easy_rec_ext.lua
new file mode 100644
index 000000000..747fee50f
--- /dev/null
+++ b/pai_jobs/easy_rec_flow_ex/easy_rec_ext.lua
@@ -0,0 +1,500 @@
+function split(str, delimiter)
+    if str==nil or str=='' or delimiter==nil then
+        return nil
+    end
+    local result = {}
+    for match in (str..delimiter):gmatch("(.-)"..delimiter) do
+        table.insert(result, match)
+    end
+    return result
+end
+
+function join(list, delimiter)
+  return table.concat(list, delimiter)
+end
+
+function match_str_in_list(list, str_pattern)
+  for idx=1,#(list) do
+    if string.find(list[idx], str_pattern) ~= nil then
+      return idx
+    end
+  end
+  return nil
+end
+
+function CheckOssValid(host, bucket)
+    if host == nil or string.len(host) == 0 or
+        bucket == nil or string.len(bucket) == 0 then
+        return false
+    end
+    return true
+end
+
+function ParseOssUri(oss_uri, default_host)
+    if string.len(oss_uri) > 6 and string.find(oss_uri, "oss://") == 1 then
+        _,_,_path,file = string.find(oss_uri,"oss://(.*/)(.*)")
+        if _path == nil or string.len(_path) == 0 then
+            error("invalid oss uri: "..oss_uri..", should end with '/'")
+        end
+        _,_,bucket_host,dir = string.find(_path, "(.-)(/.*)")
+        if (string.find(bucket_host, "%.")) then
+            _,_,bucket,host = string.find(bucket_host, "(.-)%.(.*)")
+        else
+            bucket = bucket_host
+            host = default_host
+        end
+        if not CheckOssValid(host, bucket) then
+            error("invalid oss uri: "..oss_uri..", oss host or bucket not found")
+        end
+        root_dir = bucket..dir
+        return host, root_dir, file
+    end
+    error("invalid oss uri: "..oss_uri)
+end
+
+function getEntry(script_in, entryFile_in, config, cluster, res_project, version)
+  if script_in ~= nil and string.len(script_in) > 0
+    and entryFile_in ~= nil and string.len(entryFile_in) > 0 then
+    script = script_in
+    entryFile = entryFile_in
+  else
+    script= "odps://" .. res_project .. "/resources/easy_rec_ext_" .. version .. "_res.tar.gz"
+    entryFile="run.py"
+  end
+
+  return script, entryFile
+end
+
+function checkConfig(config)
+  s1, e1 = string.find(config, 'oss://')
+  s2, e2 = string.find(config, 'http')
+  if s1 == nil and s2 == nil then
+    error("config path should be url or oss path")
+  end
+end
+
+function checkTable(table)
+  s1, e1 = string.find(table, "/tables/")
+  s2, e2 = string.find(table, "odps://")
+  if s1 == nil or s2 == nil then
+    error(string.format("invalid odps table path: %s", table))
+  end
+end
+
+function checkOss(path)
+  s1, e1 = string.find(path, "oss://")
+  if s1 == nil then
+    error(string.format("invalid oss path: %s", path))
+  end
+end
+
+function check_run_mode(cluster, gpuRequired)
+  if (cluster ~=nil and cluster ~= "") and gpuRequired ~=""  then
+    error(string.format('cluster and gpuRequired should not be set at the same time. cluster: %s gpuRequired:%s',
+          cluster, gpuRequired))
+  end
+end
+
+function getHyperParams(config, cmd, checkpoint_path,
+                        eval_result_path, export_dir,  gpuRequired,
+                        cpuRequired, memRequired, cluster, continue_train,
+                        distribute_strategy, with_evaluator, eval_method,
+                        edit_config_json, selected_cols,
+                        model_dir, hpo_param_path, hpo_metric_save_path,
+                        saved_model_dir, all_cols, all_col_types,
+                        reserved_cols, output_cols, model_outputs,
+                        input_table, output_table, tables, train_tables,
+                        eval_tables, boundary_table, batch_size, profiling_file,
+                        mask_feature_name, extra_params)
+  if cmd == "predict" then
+    if cluster == nil or cluster == '' then
+      error('cluster must be set')
+    end
+    if saved_model_dir == nil or saved_model_dir == '' then
+      error('saved_model_dir must be set')
+      checkOss(saved_model_dir)
+    end
+    hyperParameters = " --cmd=" .. cmd
+    hyperParameters = hyperParameters .. " --saved_model_dir=" .. saved_model_dir
+    hyperParameters = hyperParameters .. " --all_cols=" .. all_cols ..
+                     " --all_col_types=" .. all_col_types
+    if selected_cols ~= nil and selected_cols ~= '' then
+      hyperParameters = hyperParameters .. " --selected_cols=" .. selected_cols
+    end
+    if reserved_cols ~= nil and string.len(reserved_cols) > 0 then
+      hyperParameters = hyperParameters .. " --reserved_cols=" .. reserved_cols
+    end
+    hyperParameters = hyperParameters .. " --batch_size=" .. batch_size
+    if profiling_file ~= nil and profiling_file ~= '' then
+      checkOss(profiling_file)
+      hyperParameters = hyperParameters .. " --profiling_file=" .. profiling_file
+    end
+    --support both 'probs float, embedding string' and 'probs, embedding' format
+    --in easy_rec.python.inferece.predictor.predict_table
+    if model_outputs ~= nil and model_outputs ~= "" then
+      hyperParameters = hyperParameters .. " --output_cols='" .. model_outputs .. "'"
+    else
+      hyperParameters = hyperParameters .. " --output_cols='" .. output_cols .. "'"
+    end
+    checkTable(input_table)
+    checkTable(output_table)
+
+    if extra_params ~= nil and extra_params ~= '' then
+      hyperParameters = hyperParameters .. extra_params
+    end
+    return hyperParameters, cluster, input_table, output_table
+  end
+
+  if string.len(config) > 0 then
+    checkConfig(config)
+    hyperParameters = "--config='" .. config .. "'"
+  end
+
+  if selected_cols ~= nil and selected_cols ~= '' then
+    hyperParameters = hyperParameters .. ' --selected_cols=' .. selected_cols
+  end
+
+  hyperParameters = string.format('%s --cmd=%s', hyperParameters, cmd)
+
+  if cmd == 'evaluate' then
+    hyperParameters = hyperParameters .. " --checkpoint_path=" .. checkpoint_path
+    hyperParameters = hyperParameters .. " --all_cols=" .. all_cols ..
+                     " --all_col_types=" .. all_col_types
+    hyperParameters = hyperParameters .. " --eval_result_path=" .. eval_result_path
+    hyperParameters = hyperParameters .. " --mask_feature_name=" .. mask_feature_name
+    hyperParameters = hyperParameters .. " --distribute_strategy=" .. distribute_strategy
+  elseif cmd == 'export' then
+    hyperParameters = hyperParameters .. " --checkpoint_path=" .. checkpoint_path
+    hyperParameters = hyperParameters .. " --export_dir=" .. export_dir
+  elseif cmd == 'train' then
+    hyperParameters = hyperParameters .. " --all_cols=" .. all_cols ..
+                     " --all_col_types=" .. all_col_types
+    hyperParameters = hyperParameters .. " --continue_train=" .. continue_train
+    hyperParameters = hyperParameters .. " --distribute_strategy=" .. distribute_strategy
+    if with_evaluator ~= "" and tonumber(with_evaluator) ~= 0 then
+      hyperParameters = hyperParameters .. " --with_evaluator"
+    end
+    if eval_method ~= 'none' and eval_method ~= 'separate' and eval_method ~= 'master' then
+      error('invalid eval_method ' .. eval_method)
+    end
+    if eval_method ~= "" then
+      hyperParameters = hyperParameters .. " --eval_method=" .. eval_method
+    end
+
+    -- tables used for train and evaluate
+    if train_tables ~= "" and train_tables ~= nil then
+      hyperParameters = hyperParameters .. " --train_tables " .. train_tables
+    end
+    if eval_tables ~= "" and eval_tables ~= nil then
+      hyperParameters = hyperParameters .. " --eval_tables " .. eval_tables
+    end
+    if boundary_table ~= "" and boundary_table ~= nil then
+      hyperParameters = hyperParameters .. " --boundary_table " .. boundary_table
+    end
+
+    if hpo_param_path ~= "" and hpo_param_path ~= nil then
+      hyperParameters = hyperParameters .. " --hpo_param_path=" .. hpo_param_path
+      if hpo_metric_save_path == nil then
+        error('hpo_metric_save_path must be set')
+      end
+      hyperParameters = hyperParameters .. " --hpo_metric_save_path=" .. hpo_metric_save_path
+    end
+
+    if edit_config_json ~= "" and edit_config_json ~= nil then
+      hyperParameters = hyperParameters ..
+          string.format(" --edit_config_json='%s'", edit_config_json)
+    end
+  end
+
+  if model_dir ~= "" and model_dir ~= nil then
+    checkOss(model_dir)
+    hyperParameters = hyperParameters .. " --model_dir=" .. model_dir
+  end
+
+  check_run_mode(cluster, gpuRequired)
+  if gpuRequired ~= "" then
+    num_gpus_per_worker = math.max(math.ceil(tonumber(gpuRequired)/100), 0)
+    cluster = string.format('{"worker":{"count":1, "gpu":%s, "cpu":%s, "memory":%s}}',
+                     gpuRequired, cpuRequired, memRequired)
+  elseif cluster ~= "" then
+    gpus_str = string.match(cluster, '"gpu"%s*:%s*(%d+)')
+    if gpus_str ~= nil then
+      num_gpus_per_worker = math.max(math.ceil(tonumber(gpus_str)/100), 0)
+    else
+      num_gpus_per_worker = 1
+    end
+  else
+    num_gpus_per_worker = 1
+  end
+  hyperParameters = string.format("%s --num_gpus_per_worker=%s ", hyperParameters,
+                                  num_gpus_per_worker)
+
+  if extra_params ~= nil and extra_params ~= '' then
+    hyperParameters = hyperParameters .. extra_params
+  end
+
+  return hyperParameters, cluster, tables, nil
+end
+
+function splitTableParam(table_path)
+  --  odps://xx_project/tables/table_name/pa=1/pb=2
+  --  split table name and partitions
+  delimiter = '/'
+  eles = split(table_path, delimiter)
+  project_name = eles[3]
+  table_name = eles[5]
+  local partitions = {}
+  for i=6, table.getn(eles) do
+    table.insert(partitions, eles[i])
+  end
+  partition_str = join(partitions, delimiter)
+
+  return project_name, table_name, partition_str
+end
+
+function getInputTableColTypes(inputTable)
+  -- to test: uncomment the following, and comment the rest
+  --return {["a"] = "string", ["b"] = "int",["c"] = "string",["d"] = "int"}, {"a", "b", "c" }
+  local all_input_cols  = Builtin.GetAllColumnNames(inputTable, ",")
+  local all_input_types = Builtin.GetColumnDataTypes(inputTable, ",")
+  local col_list = split(all_input_cols, ',')
+  local type_list = split(all_input_types, ',')
+  local col_map = {}
+  for i=1,table.getn(col_list) do
+    col_map[col_list[i]] = type_list[i]
+  end
+  return col_map, col_list
+end
+
+function getOutputCols(col_type_map, reserved_columns, result_column)
+  local res_cols = split(reserved_columns, ',')
+  local sql = "("
+  if res_cols ~= nil then
+    for i=1, table.getn(res_cols) do
+      if col_type_map[res_cols[i]] == nil then
+        error(string.format("column %s is not in input table", res_cols[i]))
+        return
+      else
+        sql = sql .. res_cols[i] .. " " .. col_type_map[res_cols[i]] .. ","
+      end
+    end
+  end
+  sql = sql .. result_column .. " string)"
+  return sql
+end
+
+function parseParitionSpec(partitions)
+  local parition_names = {}
+  local partition_values = {}
+  local parts = split(partitions, "/")
+  for i = 1, table.getn(parts) do
+    local spec = split(parts[i], "=")
+    if table.getn(spec) ~=2 then
+      error("Partition Spec is not Right "..parts[i])
+    else
+      table.insert(parition_names, i, spec[1])
+      table.insert(partition_values,i, spec[2])
+    end
+  end
+  return parition_names, partition_values
+end
+
+function genCreatePartitionStr(partition_names)
+  local part_str = "("
+  for i = 1,#(partition_names) do
+    part_str = part_str..partition_names[i].." string,"
+  end
+  part_str = string.sub(part_str, 1, -2)
+  return part_str..")"
+end
+
+function genAddPartitionStr(parition_names, partition_values)
+  local part_str = "("
+  for i = 1, #(parition_names) do
+    part_str= part_str..parition_names[i].."=\""..partition_values[i].."\","
+  end
+  part_str = string.sub(part_str, 1, -2)
+  return part_str..")"
+end
+
+
+function parseTable(cmd, inputTable, outputTable, selectedCols, excludedCols,
+                     reservedCols, lifecycle, outputCol, tables,
+                     trainTables, evalTables, boundaryTable)
+  if cmd ~= 'train' and cmd ~= 'evaluate' and cmd ~= 'predict' and cmd ~= 'export'
+     and cmd ~= 'evaluate' then
+    error('invalid cmd: ' .. cmd .. ', should be one of train, evaluate, predict, evaluate, export')
+  end
+
+  -- for export, no table need to be parsed
+  if cmd == 'export' then
+    return "", "", "", "", "select 1;", "select 1;", ''
+  end
+
+  if cmd == 'train' then
+    -- merge train table and eval table into tables
+    if trainTables ~= '' and trainTables ~= nil then
+      all_tables  = {}
+      trainTables = split(trainTables, ',')
+      table_id = 0
+      for k=1, table.getn(trainTables) do
+        v = trainTables[k]
+        if all_tables[v] == nil then
+          all_tables[v] = table_id
+          table_id = table_id + 1
+        end
+      end
+      evalTables = split(evalTables, ',')
+      for k=1, table.getn(evalTables) do
+        v = evalTables[k]
+        if all_tables[v] == nil then
+          all_tables[v] = table_id
+          table_id = table_id + 1
+        end
+      end
+      tables = {}
+      for k,v in pairs(all_tables) do
+        table.insert(tables, k)
+      end
+      tables = join(tables, ',')
+    end
+    if boundaryTable ~= nil and boundaryTable ~= '' then
+      tables = tables .. "," .. boundaryTable
+    end
+  end
+  if cmd == 'evaluate' then
+    -- either set tables or evalTables is ok for evaluation
+    if tables == nil or tables == '' then
+      if evalTables == nil or evalTables == '' then
+        error('either tables or eval_tables must be set for evaluation')
+      end
+      tables = evalTables
+    end
+  end
+
+  if tables ~= '' and tables ~= nil then
+    inputTable = split(tables, ',')[1]
+  end
+
+  -- analyze selected_cols excluded_cols for train, evaluate and predict
+  proj0, table0, partition0 = splitTableParam(inputTable)
+  input_col_types, input_cols = getInputTableColTypes(proj0 .. "." .. table0)
+
+  if (excludedCols ~= nil and excludedCols ~= '') and
+     (selectedCols ~= nil and selectedCols ~= '') then
+    error('selected_cols and excluded_cols should not be set')
+  end
+
+  ex_cols_map = {}
+  if excludedCols ~= '' and excludedCols ~= nil then
+    ex_cols_lst = split(excludedCols, ',')
+    for i=1, table.getn(ex_cols_lst) do
+      ex_cols_map[ex_cols_lst[i]] = 1
+    end
+  end
+
+  -- columns to be selected to input to the model
+  selected_cols = {}
+  -- all columns to read by TableRecordDataset
+  all_cols = {}
+  all_col_types = {}
+  all_cols_map = {}
+  if selectedCols ~= '' and selectedCols ~= nil then
+    tmp_cols = split(selectedCols, ",")
+  else
+    tmp_cols = input_cols
+  end
+
+  for i=1, table.getn(tmp_cols) do
+    if input_col_types[tmp_cols[i]] == nil then
+      error(string.format("column %s is not in input table", tmp_cols[i]))
+      return
+    elseif ex_cols_map[tmp_cols[i]] == nil then
+      -- not in excluded cols map
+      if input_col_types[tmp_cols[i]] ~= nil and all_cols_map[tmp_cols[i]] == nil then
+        table.insert(all_cols, tmp_cols[i])
+        table.insert(all_col_types, input_col_types[tmp_cols[i]])
+        table.insert(selected_cols, tmp_cols[i])
+        all_cols_map[tmp_cols[i]] = 1
+      end
+    end
+  end
+
+  if cmd == 'train' or cmd == 'evaluate' then
+    return join(all_cols, ","), join(all_col_types, ","),
+         join(selected_cols, ","), '',
+         "select 1;", "select 1;", tables
+  end
+
+  -- analyze reserved_cols for predict
+  -- columns to be copied to output_table, may not be in selected columns
+  -- could have overlapped columns with selected_cols and excluded_cols
+  reserved_cols = {}
+  reserved_col_types = {}
+  if reservedCols ~= nil and reservedCols ~= '' then
+    if reservedCols == 'ALL_COLUMNS' then
+      tmp_cols = input_cols
+    else
+      tmp_cols = split(reservedCols, ',')
+    end
+    for i=1, table.getn(tmp_cols) do
+      if input_col_types[tmp_cols[i]] ~= nil then
+        table.insert(reserved_cols, tmp_cols[i])
+        table.insert(reserved_col_types, input_col_types[tmp_cols[i]])
+        if all_cols_map[tmp_cols[i]] == nil then
+          table.insert(all_cols, tmp_cols[i])
+          table.insert(all_col_types, input_col_types[tmp_cols[i]])
+          all_cols_map[tmp_cols[i]] = 1
+        end
+      else
+        error("invalid reserved_col: " .. tmp_cols[i] .. " available: " .. join(input_cols, ","))
+      end
+    end
+  else
+    table.insert(reserved_cols, selected_cols[0])
+  end
+
+  -- build create output table sql and add partition sql for predict
+  sql_col_desc = {}
+  for i=1, table.getn(reserved_cols) do
+    table.insert(sql_col_desc, reserved_cols[i] .. " " .. reserved_col_types[i])
+  end
+  table.insert(sql_col_desc, outputCol)
+  sql_col_desc = join(sql_col_desc, ",")
+
+  proj1, table1, partition1 = splitTableParam(outputTable)
+  out_table_name = proj1 .. "." .. table1
+  create_sql = ''
+  add_partition_sql = ''
+  if partition1 ~= nil and string.len(partition1) ~= 0 then
+    local partition_names, parition_values = parseParitionSpec(partition1)
+    create_partition_str = genCreatePartitionStr(partition_names)
+    create_sql = string.format("create table if not exists %s (%s) partitioned by %s lifecycle %s;", out_table_name, sql_col_desc, create_partition_str, lifecycle)
+    add_partition_sql = genAddPartitionStr(partition_names, parition_values)
+    add_partition_sql = string.format("alter table %s add if not exists partition %s;", out_table_name, add_partition_sql)
+  else
+    create_sql = string.format("create table %s (%s) lifecycle %s;", out_table_name, sql_col_desc, lifecycle)
+    add_partition_sql = string.format("desc %s;",  out_table_name)
+  end
+
+  return join(all_cols, ","), join(all_col_types, ","),
+         join(selected_cols, ","), join(reserved_cols, ","),
+         create_sql, add_partition_sql, ""
+end
+
+function test_create_table()
+  input_table = "odps://pai_rec_dev/tables/test_longonehot_4deepfm_20/part=1"
+  output_table = "odps://pai_rec_dev/tables/test_longonehot_4deepfm_20_out/part=1"
+  selectedCols = "a,b,c"
+  excludedCols = ""
+  reservedCols = "a"
+  lifecycle=1
+  outputCol = "score double"
+  all_cols, all_cols_types, selected_cols, reserved_cols, create_sql, add_partition_sql =  createTable(input_table, output_table, selectedCols, excludedCols, reservedCols, lifecycle, outputCol)
+  print(create_sql)
+  print(add_partition_sql)
+  print(string.format('all_cols = %s', all_cols))
+  print(string.format('selected_cols = %s', selected_cols))
+  print(string.format('reserved_cols = %s', reserved_cols))
+end
+--test_create_table()
diff --git a/pai_jobs/easy_rec_flow_ex/easy_rec_ext.xml b/pai_jobs/easy_rec_flow_ex/easy_rec_ext.xml
new file mode 100644
index 000000000..b786710fd
--- /dev/null
+++ b/pai_jobs/easy_rec_flow_ex/easy_rec_ext.xml
@@ -0,0 +1,218 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xflow xmlns="odps:xflow:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="easy_rec_ext" category="deep_learning" ref_resource="xxxxxxxx/resources/xxxxxxxx" comments="easy-rec" catalog="String" xsi:schemaLocation="">
+  <parameters>
+    <parameter name="config" use="optional" default=""/>
+    <parameter name="script" use="optional" default=""/>
+    <parameter name="entryFile" use="optional" default="run.py"/>
+    <parameter name="volumes" use="optional" default=""/>
+    <parameter name="buckets" use="required" default=""/>
+    <parameter name="ossHost" use="optional" default=""/>
+    <parameter name="arn" use="required"/>
+    <parameter name="tables" use="optional" default=""/>
+    <parameter name="outputs" use="optional" default=""/>
+    <parameter name="cmd" use="optional" default="train"/>
+    <parameter name="train_tables" use="optional" default=""/>
+    <parameter name="eval_tables" use="optional" default=""/>
+    <parameter name="boundary_table" use="optional" default=""/>
+    <parameter name="continue_train" use="optional" default="true"/>
+    <parameter name="export_dir" use="optional" default=""/>
+    <parameter name="cluster" use="optional"/>
+    <parameter name="gpuRequired" use="optional" default=""/>
+    <parameter name="cpuRequired" use="optional" default="1200"/>
+    <parameter name="memRequired" use="optional" default="30000"/>
+    <parameter name="with_evaluator" use="optional" default=""/>
+    <parameter name="eval_method" use="optional" default="separate"/>
+    <parameter name="distribute_strategy" use="optional" default=""/>
+    <!-- for train only -->
+    <parameter name="edit_config_json" use="optional" default=""/>
+    <!-- for evaluate only -->
+    <parameter name="checkpoint_path" use="optional" default=""/>
+    <parameter name="eval_result_path" use="optional" default="eval_result.txt"/>
+    <!--for cmd=predict only-->
+    <parameter name="saved_model_dir" use="optional" default="" />
+    <parameter name="input_table" use="optional" default=""/>
+    <parameter name="output_table" use="optional" default=""/>
+    <parameter name="selected_cols" use="optional" default=""/>
+    <parameter name="excluded_cols" use="optional" default=""/>
+    <!-- specify ALL_COLUMNS to include all columns to the final set -->
+    <parameter name="reserved_cols" use="optional" default=""/>
+    <parameter name="lifecycle" use="optional" default="10"/>
+    <parameter name="output_cols" use="optional" default="probs double"/>
+    <parameter name="model_outputs" use="optional" default=""/>
+    <parameter name="batch_size" use="optional" default="32"/>
+    <!-- for hyperparameter tuning -->
+    <parameter name="model_dir" use="optional" default=""/>
+    <parameter name="hpo_param_path" use="optional" default=""/>
+    <parameter name="hpo_metric_save_path" use="optional" default=""/>
+    <parameter name="profiling_file" use="optional" default=""/>
+    <!-- for resources and version control -->
+    <parameter name="version" use="optional" default="20201030"/>
+    <parameter name="res_project" use="optional" default="algo_public"/>
+    <!-- for mask feature for eval -->
+    <parameter name="mask_feature_name" use="optional" default=""/>
+    <!-- for train/evaluate/export/predict -->
+    <parameter name="extra_params" use="optional" default=""/>
+    <parameter name="vpcRegion" use="optional" default=""/>
+    <parameter name="vpcId" use="optional" default=""/>
+  </parameters>
+
+  <workflow>
+    <start to="getEntry" />
+    <action name="getEntry">
+      <script>
+        <input_vars>
+          <var name="script_in" value="${parameters.script}" />
+          <var name="entryFile_in" value="${parameters.entryFile}" />
+          <var name="config" value="${parameters.config}" />
+          <var name="cluster" value="${parameters.cluster}" />
+          <var name="res_project" value="${parameters.res_project}" />
+          <var name="version" value="${parameters.version}" />
+        </input_vars>
+        <output_vars>
+          <var name="script"/>
+          <var name="entryFile"/>
+        </output_vars>
+        <function>getEntry</function>
+      </script>
+      <ok to="parseTable" />
+      <error to="failed" />
+    </action>
+
+    <action name="parseTable">
+      <script>
+        <input_vars>
+          <var name="cmd" value="${parameters.cmd}"/>
+          <var name="inputTable" value="${parameters.input_table}" />
+          <var name="outputTable" value="${parameters.output_table}" />
+          <var name="selectedCols" value="${parameters.selected_cols}" />
+          <var name="excludedCols" value="${parameters.excluded_cols}" />
+          <var name="reservedCols" value="${parameters.reserved_cols}" />
+          <var name="lifecycle" value="${parameters.lifecycle}" />
+          <var name="outputCol" value="${parameters.output_cols}" />
+          <var name="tables" value="${parameters.tables}" />
+          <var name="trainTables" value="${parameters.train_tables}" />
+          <var name="evalTables" value="${parameters.eval_tables}" />
+          <var name="boundaryTable" value="${parameters.boundary_table}" />
+        </input_vars>
+        <output_vars>
+          <var name="all_cols"/>
+          <var name="all_col_types"/>
+          <var name="selected_cols"/>
+          <var name="reserved_cols"/>
+          <var name="create_table_sql"/>
+          <var name="add_partition_sql"/>
+          <var name="tables"/>
+        </output_vars>
+        <function>parseTable</function>
+      </script>
+      <ok to="createPredictTable" />
+      <error to="failed" />
+    </action>
+
+    <action name="createPredictTable">
+       <SQL>
+         <sql>${workflow.parseTable.create_table_sql}</sql>
+       </SQL>
+       <ok to="addPartition" />
+       <error to="failed" />
+    </action>
+    <action name="addPartition">
+      <SQL>
+        <sql>${workflow.parseTable.add_partition_sql}</sql>
+      </SQL>
+      <ok to="getHyperParams" />
+      <error to="failed" />
+    </action>
+
+    <action name="getHyperParams">
+      <script>
+        <input_vars>
+          <var name="config" value="${parameters.config}" />
+          <var name="cmd" value="${parameters.cmd}" />
+          <var name="checkpoint_path" value="${parameters.checkpoint_path}" />
+          <var name="eval_result_path" value="${parameters.eval_result_path}" />
+          <var name="export_dir" value="${parameters.export_dir}" />
+          <var name="gpuRequired" value="${parameters.gpuRequired}" />
+          <var name="cpuRequired" value="${parameters.cpuRequired}" />
+          <var name="memRequired" value="${parameters.memRequired}" />
+          <var name="cluster" value="${parameters.cluster}" />
+          <var name="continue_train" value="${parameters.continue_train}" />
+          <var name="distribute_strategy" value="${parameters.distribute_strategy}" />
+          <var name="with_evaluator" value="${parameters.with_evaluator}"/>
+          <var name="eval_method" value="${parameters.eval_method}"/>
+
+          <!-- edit pipeline_config from cmd line -->
+          <var name="edit_config_json" value="${parameters.edit_config_json}"/>
+
+          <!-- for both train/evaluate/predict -->
+          <var name="selected_cols" value="${workflow.parseTable.selected_cols}"/>
+
+          <!-- for hyperparameter tuning -->
+          <var name="model_dir" value="${parameters.model_dir}"/>
+          <var name="hpo_param_path" value="${parameters.hpo_param_path}"/>
+          <var name="hpo_metric_save_path" value="${parameters.hpo_metric_save_path}"/>
+          <!-- for cmd = predict only -->
+          <var name="saved_model_dir" value="${parameters.saved_model_dir}"/>
+          <var name="all_cols" value="${workflow.parseTable.all_cols}"/>
+          <var name="all_col_types" value="${workflow.parseTable.all_col_types}"/>
+          <var name="reserved_cols" value="${workflow.parseTable.reserved_cols}"/>
+          <var name="output_cols" value="${parameters.output_cols}"/>
+          <var name="model_outputs" value="${parameters.model_outputs}"/>
+          <var name="input_table" value="${parameters.input_table}"/>
+          <var name="output_table" value="${parameters.output_table}"/>
+          <var name="tables" value="${workflow.parseTable.tables}"/>
+          <!-- separate train and test -->
+          <var name="train_tables" value="${parameters.train_tables}"/>
+          <var name="eval_tables" value="${parameters.eval_tables}"/>
+          <var name="boundary_table" value="${parameters.boundary_table}"/>
+          <var name="batch_size" value="${parameters.batch_size}"/>
+          <!-- for save predict timeline stats -->
+          <var name="profiling_file" value="${parameters.profiling_file}"/>
+          <!-- for mask feature for eval -->
+          <var name="mask_feature_name" value="${parameters.mask_feature_name}"/>
+          <!-- for extra parameters -->
+          <var name="extra_params" value="${parameters.extra_params}"/>
+        </input_vars>
+        <output_vars>
+          <var name="hyperParameters"/>
+          <var name="cluster"/>
+          <var name="tables"/>
+          <var name="outputs"/>
+        </output_vars>
+        <function>getHyperParams</function>
+      </script>
+      <ok to="runTF" />
+      <error to="failed" />
+    </action>
+
+    <action name="runTF">
+      <sub-workflow>
+        <parameters>
+          <p name="script" value="${workflow.getEntry.script}"/>
+          <p name="entryFile" value="${workflow.getEntry.entryFile}"/>
+          <p name="volumes" value="${parameters.volumes}"/>
+          <p name="buckets" value="${parameters.buckets}"/>
+          <p name="ossHost" value="${parameters.ossHost}"/>
+          <p name="arn" value="${parameters.arn}"/>
+          <p name="tables" value="${workflow.getHyperParams.tables}"/>
+          <p name="outputs" value="${workflow.getHyperParams.outputs}"/>
+          <p name="cluster" value="${workflow.getHyperParams.cluster}"/>
+          <p name="gpuRequired" value="${parameters.gpuRequired}"/>
+          <p name="vpcRegion" value="${parameters.vpcRegion}"/>
+          <p name="vpcId" value="${parameters.vpcId}"/>
+          <p name="userDefinedParameters" value="${workflow.getHyperParams.hyperParameters}"/>
+        </parameters>
+        <name>tensorflow1120_ext</name>
+        <project>algo_public</project>
+      </sub-workflow>
+      <ok to="end" />
+      <error to="failed" />
+    </action>
+    <fail name="failed">
+      <code>1</code>
+      <message>job failed</message>
+    </fail>
+    <end name="end"/>
+  </workflow>
+
+</xflow>
diff --git a/pai_jobs/run.py b/pai_jobs/run.py
new file mode 100644
index 000000000..27d4e5c89
--- /dev/null
+++ b/pai_jobs/run.py
@@ -0,0 +1,512 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from __future__ import print_function
+
+import json
+import logging
+import os
+
+import tensorflow as tf
+
+import easy_rec
+from easy_rec.python.inference.predictor import Predictor
+from easy_rec.python.protos.train_pb2 import DistributionStrategy
+from easy_rec.python.utils import config_util
+from easy_rec.python.utils import estimator_utils
+from easy_rec.python.utils import hpo_util
+from easy_rec.python.utils import pai_util
+from easy_rec.python.utils.estimator_utils import chief_to_master
+from easy_rec.python.utils.estimator_utils import master_to_chief
+
+if not tf.__version__.startswith('1.12'):
+  tf = tf.compat.v1
+  try:
+    import tensorflow_io as tfio  # noqa: F401
+  except Exception as ex:
+    logging.error('failed to import tfio: %s' % str(ex))
+  tf.disable_eager_execution()
+
+from easy_rec.python.main import _train_and_evaluate_impl as train_and_evaluate_impl  # NOQA
+
+logging.basicConfig(
+    level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
+
+tf.app.flags.DEFINE_string('worker_hosts', '',
+                           'Comma-separated list of hostname:port pairs')
+tf.app.flags.DEFINE_string('ps_hosts', '',
+                           'Comma-separated list of hostname:port pairs')
+tf.app.flags.DEFINE_string('job_name', '', 'task type, ps/worker')
+tf.app.flags.DEFINE_integer('task_index', 0, 'Index of task within the job')
+tf.app.flags.DEFINE_string('config', '', 'EasyRec config file path')
+tf.app.flags.DEFINE_string('cmd', 'train',
+                           'command type, train/evaluate/export')
+tf.app.flags.DEFINE_string('tables', '', 'tables passed by pai command')
+
+# flags for train
+tf.app.flags.DEFINE_integer('num_gpus_per_worker', 1,
+                            'number of gpu to use in training')
+tf.app.flags.DEFINE_boolean('with_evaluator', False,
+                            'whether a evaluator is necessary')
+tf.app.flags.DEFINE_string(
+    'eval_method', 'none', 'default to none, choices are [none: not evaluate,' +
+    'master: evaluate on master, separate: evaluate on a separate task]')
+
+tf.app.flags.DEFINE_string('distribute_strategy', '',
+                           'training distribute strategy')
+tf.app.flags.DEFINE_string('edit_config_json', '', 'edit config json string')
+tf.app.flags.DEFINE_string('train_tables', '', 'tables used for train')
+tf.app.flags.DEFINE_string('eval_tables', '', 'tables used for evaluation')
+tf.app.flags.DEFINE_string('boundary_table', '', 'tables used for boundary')
+tf.app.flags.DEFINE_string('sampler_table', '', 'tables used for sampler')
+
+# flags used for evaluate & export
+tf.app.flags.DEFINE_string(
+    'checkpoint_path', '', 'checkpoint to be evaluated or exported '
+    'if not specified, use the latest checkpoint '
+    'in train_config.model_dir')
+# flags used for evaluate
+tf.app.flags.DEFINE_string('eval_result_path', 'eval_result.txt',
+                           'eval result metric file')
+# flags used for export
+tf.app.flags.DEFINE_string('export_dir', '',
+                           'directory where model should be exported to')
+tf.app.flags.DEFINE_boolean('continue_train', True,
+                            'use the same model to continue train or not')
+
+# flags used for predict
+tf.app.flags.DEFINE_string('saved_model_dir', '',
+                           'directory where saved_model.pb exists')
+tf.app.flags.DEFINE_string('outputs', '', 'output tables')
+tf.app.flags.DEFINE_string(
+    'all_cols', '',
+    'union of (selected_cols, reserved_cols), separated with , ')
+tf.app.flags.DEFINE_string(
+    'all_col_types', '',
+    'column data types, for build record defaults, separated with ,')
+tf.app.flags.DEFINE_string(
+    'selected_cols', '',
+    'columns to keep from input table,  they are separated with ,')
+tf.app.flags.DEFINE_string(
+    'reserved_cols', '',
+    'columns to keep from input table,  they are separated with ,')
+tf.app.flags.DEFINE_string(
+    'output_cols', None,
+    'output columns, such as: score float. multiple columns are separated by ,')
+tf.app.flags.DEFINE_integer('batch_size', 1024, 'predict batch size')
+tf.app.flags.DEFINE_string(
+    'profiling_file', None,
+    'time stat file which can be viewed using chrome tracing')
+tf.app.flags.DEFINE_string('redis_url', None, 'export to redis url, host:port')
+tf.app.flags.DEFINE_string('redis_passwd', None, 'export to redis passwd')
+tf.app.flags.DEFINE_integer('redis_threads', 5, 'export to redis threads')
+tf.app.flags.DEFINE_integer('redis_batch_size', 1024,
+                            'export to redis batch_size')
+tf.app.flags.DEFINE_integer('redis_timeout', 600,
+                            'export to redis time_out in seconds')
+tf.app.flags.DEFINE_integer('redis_expire', 24,
+                            'export to redis expire time in hour')
+tf.app.flags.DEFINE_string('redis_embedding_version', '',
+                           'redis embedding version')
+tf.app.flags.DEFINE_integer('redis_write_kv', 1, 'whether write kv ')
+tf.app.flags.DEFINE_bool('verbose', False, 'print more debug information')
+
+# for automl hyper parameter tuning
+tf.app.flags.DEFINE_string('model_dir', None, 'model directory')
+tf.app.flags.DEFINE_string('hpo_param_path', None,
+                           'hyperparameter tuning param path')
+tf.app.flags.DEFINE_string('hpo_metric_save_path', None,
+                           'hyperparameter save metric path')
+tf.app.flags.DEFINE_string('asset_files', None, 'extra files to add to export')
+
+FLAGS = tf.app.flags.FLAGS
+
+
+def check_param(name):
+  assert getattr(FLAGS, name) != '', '%s should not be empty' % name
+
+
+DistributionStrategyMap = {
+    '': DistributionStrategy.NoStrategy,
+    'ps': DistributionStrategy.PSStrategy,
+    'ess': DistributionStrategy.ExascaleStrategy,
+    'mirrored': DistributionStrategy.MirroredStrategy,
+    'collective': DistributionStrategy.CollectiveAllReduceStrategy
+}
+
+
+def set_tf_config_and_get_train_worker_num(
+    distribute_strategy=DistributionStrategy.NoStrategy, eval_method='none'):
+  logging.info(
+      'set_tf_config_and_get_train_worker_num: distribute_strategy = %d' %
+      distribute_strategy)
+  worker_hosts = FLAGS.worker_hosts.split(',')
+  ps_hosts = FLAGS.ps_hosts.split(',')
+
+  total_worker_num = len(worker_hosts)
+  train_worker_num = total_worker_num
+
+  print('Original TF_CONFIG=%s' % os.environ.get('TF_CONFIG', ''))
+  print('worker_hosts=%s ps_hosts=%s task_index=%d job_name=%s' %
+        (FLAGS.worker_hosts, FLAGS.ps_hosts, FLAGS.task_index, FLAGS.job_name))
+  print('eval_method=%s' % eval_method)
+  if distribute_strategy == DistributionStrategy.MirroredStrategy:
+    assert total_worker_num == 1, 'mirrored distribute strategy only need 1 worker'
+  elif distribute_strategy in [
+      DistributionStrategy.NoStrategy, DistributionStrategy.PSStrategy,
+      DistributionStrategy.CollectiveAllReduceStrategy,
+      DistributionStrategy.ExascaleStrategy
+  ]:
+    cluster, task_type, task_index = estimator_utils.parse_tf_config()
+    train_worker_num = 0
+    if eval_method == 'separate':
+      if 'evaluator' in cluster:
+        # 'evaluator' in cluster indicates user use new-style cluster content
+        if 'chief' in cluster:
+          train_worker_num += len(cluster['chief'])
+        elif 'master' in cluster:
+          train_worker_num += len(cluster['master'])
+        if 'worker' in cluster:
+          train_worker_num += len(cluster['worker'])
+        # drop evaluator to avoid hang
+        if distribute_strategy == DistributionStrategy.NoStrategy:
+          del cluster['evaluator']
+        tf_config = {
+            'cluster': cluster,
+            'task': {
+                'type': task_type,
+                'index': task_index
+            }
+        }
+        os.environ['TF_CONFIG'] = json.dumps(tf_config)
+      else:
+        # backward compatibility, if user does not assign one evaluator in
+        # -Dcluster, we use first worker for chief, second for evaluation
+        train_worker_num = total_worker_num - 1
+        assert train_worker_num > 0, 'in distribution mode worker num must be greater than 1, ' \
+                                     'the second worker will be used as evaluator'
+        if len(worker_hosts) > 1:
+          cluster = {'chief': [worker_hosts[0]], 'worker': worker_hosts[2:]}
+          if distribute_strategy != DistributionStrategy.NoStrategy:
+            cluster['evaluator'] = [worker_hosts[1]]
+          if FLAGS.ps_hosts != '':
+            cluster['ps'] = ps_hosts
+          if FLAGS.job_name == 'ps':
+            os.environ['TF_CONFIG'] = json.dumps({
+                'cluster': cluster,
+                'task': {
+                    'type': FLAGS.job_name,
+                    'index': FLAGS.task_index
+                }
+            })
+          elif FLAGS.job_name == 'worker':
+            if FLAGS.task_index == 0:
+              os.environ['TF_CONFIG'] = json.dumps({
+                  'cluster': cluster,
+                  'task': {
+                      'type': 'chief',
+                      'index': 0
+                  }
+              })
+            elif FLAGS.task_index == 1:
+              os.environ['TF_CONFIG'] = json.dumps({
+                  'cluster': cluster,
+                  'task': {
+                      'type': 'evaluator',
+                      'index': 0
+                  }
+              })
+            else:
+              os.environ['TF_CONFIG'] = json.dumps({
+                  'cluster': cluster,
+                  'task': {
+                      'type': FLAGS.job_name,
+                      'index': FLAGS.task_index - 2
+                  }
+              })
+    else:
+      if 'evaluator' in cluster:
+        evaluator = cluster['evaluator']
+        del cluster['evaluator']
+        # 'evaluator' in cluster indicates user use new-style cluster content
+        train_worker_num += 1
+        if 'chief' in cluster:
+          train_worker_num += len(cluster['chief'])
+        elif 'master' in cluster:
+          train_worker_num += len(cluster['master'])
+        if 'worker' in cluster:
+          train_worker_num += len(cluster['worker'])
+          cluster['worker'].append(evaluator[0])
+        else:
+          cluster['worker'] = [evaluator[0]]
+        if task_type == 'evaluator':
+          tf_config = {
+              'cluster': cluster,
+              'task': {
+                  'type': 'worker',
+                  'index': train_worker_num - 2
+              }
+          }
+        else:
+          tf_config = {
+              'cluster': cluster,
+              'task': {
+                  'type': task_type,
+                  'index': task_index
+              }
+          }
+        os.environ['TF_CONFIG'] = json.dumps(tf_config)
+      else:
+        cluster = {'chief': [worker_hosts[0]], 'worker': worker_hosts[1:]}
+        train_worker_num = len(worker_hosts)
+        if FLAGS.ps_hosts != '':
+          cluster['ps'] = ps_hosts
+        if FLAGS.job_name == 'ps':
+          os.environ['TF_CONFIG'] = json.dumps({
+              'cluster': cluster,
+              'task': {
+                  'type': FLAGS.job_name,
+                  'index': FLAGS.task_index
+              }
+          })
+        else:
+          if FLAGS.task_index == 0:
+            os.environ['TF_CONFIG'] = json.dumps({
+                'cluster': cluster,
+                'task': {
+                    'type': 'chief',
+                    'index': 0
+                }
+            })
+          else:
+            os.environ['TF_CONFIG'] = json.dumps({
+                'cluster': cluster,
+                'task': {
+                    'type': 'worker',
+                    'index': FLAGS.task_index - 1
+                }
+            })
+      if eval_method == 'none':
+        # change master to chief, will not evaluate
+        master_to_chief()
+      elif eval_method == 'master':
+        # change chief to master, will evaluate on master
+        chief_to_master()
+  else:
+    assert distribute_strategy == '', 'invalid distribute_strategy %s'\
+           % distribute_strategy
+    cluster, task_type, task_index = estimator_utils.parse_tf_config()
+  print('Final TF_CONFIG = %s' % os.environ.get('TF_CONFIG', ''))
+  tf.logging.info('TF_CONFIG %s' % os.environ.get('TF_CONFIG', ''))
+  tf.logging.info('distribute_stategy %s, train_worker_num: %d' %
+                  (distribute_strategy, train_worker_num))
+
+  # remove pai chief-worker waiting strategy
+  # which is conflicted with worker waiting strategy in easyrec
+  if 'TF_WRITE_WORKER_STATUS_FILE' in os.environ:
+    del os.environ['TF_WRITE_WORKER_STATUS_FILE']
+  return train_worker_num
+
+
+def set_distribution_config(pipeline_config, num_worker, num_gpus_per_worker,
+                            distribute_strategy):
+  if distribute_strategy in [
+      DistributionStrategy.PSStrategy, DistributionStrategy.MirroredStrategy,
+      DistributionStrategy.CollectiveAllReduceStrategy,
+      DistributionStrategy.ExascaleStrategy
+  ]:
+    pipeline_config.train_config.sync_replicas = False
+    pipeline_config.train_config.train_distribute = distribute_strategy
+    pipeline_config.train_config.num_gpus_per_worker = num_gpus_per_worker
+  print('Dump pipeline_config.train_config:')
+  print(pipeline_config.train_config)
+
+
+def set_selected_cols(pipeline_config, selected_cols, all_cols, all_col_types):
+  if selected_cols:
+    pipeline_config.data_config.selected_cols = selected_cols
+    # add column types which will be used by OdpsInput, OdpsInputV2
+    # to check consistency with input_fields.input_type
+    if all_cols:
+      all_cols_arr = all_cols.split(',')
+      all_col_types_arr = all_col_types.split(',')
+      all_col_types_map = {
+          x.strip(): y.strip() for x, y in zip(all_cols_arr, all_col_types_arr)
+      }
+      selected_cols_arr = [x.strip() for x in selected_cols.split(',')]
+      selected_col_types = [all_col_types_map[x] for x in selected_cols_arr]
+      selected_col_types = ','.join(selected_col_types)
+      pipeline_config.data_config.selected_col_types = selected_col_types
+
+  print('[run.py] data_config.selected_cols = "%s"' %
+        pipeline_config.data_config.selected_cols)
+  print('[run.py] data_config.selected_col_types = "%s"' %
+        pipeline_config.data_config.selected_col_types)
+
+
+def main(argv):
+  pai_util.set_on_pai()
+  num_gpus_per_worker = FLAGS.num_gpus_per_worker
+  worker_hosts = FLAGS.worker_hosts.split(',')
+  num_worker = len(worker_hosts)
+  assert FLAGS.distribute_strategy in DistributionStrategyMap, \
+      'invalid distribute_strategy [%s], available ones are %s' % (
+          FLAGS.distribute_strategy, ','.join(DistributionStrategyMap.keys()))
+
+  if FLAGS.config:
+    config = pai_util.process_config(FLAGS.config, FLAGS.task_index,
+                                     len(FLAGS.worker_hosts.split(',')))
+    pipeline_config = config_util.get_configs_from_pipeline_file(config, False)
+
+  if FLAGS.edit_config_json:
+    print('[run.py] edit_config_json = %s' % FLAGS.edit_config_json)
+    config_json = json.loads(FLAGS.edit_config_json)
+    config_util.edit_config(pipeline_config, config_json)
+
+  if FLAGS.model_dir:
+    pipeline_config.model_dir = FLAGS.model_dir
+    pipeline_config.model_dir = pipeline_config.model_dir.strip()
+    print('[run.py] update model_dir to %s' % pipeline_config.model_dir)
+    assert pipeline_config.model_dir.startswith(
+        'oss://'), 'invalid model_dir format: %s' % pipeline_config.model_dir
+
+  if FLAGS.cmd == 'train':
+    assert FLAGS.config, 'config should not be empty when training!'
+
+    if not FLAGS.train_tables:
+      tables = FLAGS.tables.split(',')
+      assert len(
+          tables
+      ) >= 2, 'at least 2 tables must be specified, but only[%d]: %s' % (
+          len(tables), FLAGS.tables)
+
+    if FLAGS.train_tables:
+      pipeline_config.train_input_path = FLAGS.train_tables
+    else:
+      pipeline_config.train_input_path = FLAGS.tables.split(',')[0]
+
+    if FLAGS.eval_tables:
+      pipeline_config.eval_input_path = FLAGS.eval_tables
+    else:
+      pipeline_config.eval_input_path = FLAGS.tables.split(',')[1]
+
+    print('[run.py] train_tables: %s' % pipeline_config.train_input_path)
+    print('[run.py] eval_tables: %s' % pipeline_config.eval_input_path)
+
+    if FLAGS.boundary_table:
+      logging.info('Load boundary_table: %s' % FLAGS.boundary_table)
+      config_util.add_boundaries_to_config(pipeline_config,
+                                           FLAGS.boundary_table)
+
+    if FLAGS.sampler_table:
+      pipeline_config.data_config.negative_sampler.input_path = FLAGS.sampler_table
+
+    # parse selected_cols
+    set_selected_cols(pipeline_config, FLAGS.selected_cols, FLAGS.all_cols,
+                      FLAGS.all_col_types)
+
+    distribute_strategy = DistributionStrategyMap[FLAGS.distribute_strategy]
+
+    # update params specified by automl if hpo_param_path is specified
+    if FLAGS.hpo_param_path:
+      logging.info('hpo_param_path = %s' % FLAGS.hpo_param_path)
+      with tf.gfile.GFile(FLAGS.hpo_param_path, 'r') as fin:
+        hpo_config = json.load(fin)
+        hpo_params = hpo_config['param']
+        config_util.edit_config(pipeline_config, hpo_params)
+    config_util.auto_expand_share_feature_configs(pipeline_config)
+
+    print('[run.py] with_evaluator %s' % str(FLAGS.with_evaluator))
+    print('[run.py] eval_method %s' % FLAGS.eval_method)
+    assert FLAGS.eval_method in [
+        'none', 'master', 'separate'
+    ], 'invalid evalaute_method: %s' % FLAGS.eval_method
+    if FLAGS.with_evaluator:
+      FLAGS.eval_method = 'separate'
+    num_worker = set_tf_config_and_get_train_worker_num(
+        distribute_strategy=distribute_strategy, eval_method=FLAGS.eval_method)
+    set_distribution_config(pipeline_config, num_worker, num_gpus_per_worker,
+                            distribute_strategy)
+    train_and_evaluate_impl(
+        pipeline_config, continue_train=FLAGS.continue_train)
+
+    if FLAGS.hpo_metric_save_path:
+      hpo_util.save_eval_metrics(
+          pipeline_config.model_dir,
+          metric_save_path=FLAGS.hpo_metric_save_path,
+          has_evaluator=FLAGS.with_evaluator)
+  elif FLAGS.cmd == 'evaluate':
+    check_param('config')
+    # TODO: support multi-worker evaluation
+    assert len(FLAGS.worker_hosts.split(',')) == 1, 'evaluate only need 1 woker'
+    config_util.auto_expand_share_feature_configs(pipeline_config)
+    pipeline_config.eval_input_path = FLAGS.tables
+
+    distribute_strategy = DistributionStrategyMap[FLAGS.distribute_strategy]
+    set_tf_config_and_get_train_worker_num(eval_method='none')
+    set_distribution_config(pipeline_config, num_worker, num_gpus_per_worker,
+                            distribute_strategy)
+
+    # parse selected_cols
+    set_selected_cols(pipeline_config, FLAGS.selected_cols, FLAGS.all_cols,
+                      FLAGS.all_col_types)
+
+    easy_rec.evaluate(pipeline_config, FLAGS.checkpoint_path, None,
+                      FLAGS.eval_result_path)
+  elif FLAGS.cmd == 'export':
+    check_param('export_dir')
+    check_param('config')
+
+    redis_params = {}
+    if FLAGS.redis_url:
+      redis_params['redis_url'] = FLAGS.redis_url
+    if FLAGS.redis_passwd:
+      redis_params['redis_passwd'] = FLAGS.redis_passwd
+    if FLAGS.redis_threads > 0:
+      redis_params['redis_threads'] = FLAGS.redis_threads
+    if FLAGS.redis_batch_size > 0:
+      redis_params['redis_batch_size'] = FLAGS.redis_batch_size
+    if FLAGS.redis_expire > 0:
+      redis_params['redis_expire'] = FLAGS.redis_expire
+    if FLAGS.redis_embedding_version:
+      redis_params['redis_embedding_version'] = FLAGS.redis_embedding_version
+    if FLAGS.redis_write_kv:
+      redis_params['redis_write_kv'] = FLAGS.redis_write_kv
+
+    set_tf_config_and_get_train_worker_num(eval_method='none')
+    assert len(FLAGS.worker_hosts.split(',')) == 1, 'export only need 1 woker'
+    config_util.auto_expand_share_feature_configs(pipeline_config)
+    easy_rec.export(FLAGS.export_dir, pipeline_config, FLAGS.checkpoint_path,
+                    FLAGS.asset_files, FLAGS.verbose, **redis_params)
+  elif FLAGS.cmd == 'predict':
+    check_param('tables')
+    check_param('saved_model_dir')
+    logging.info('will use the following columns as model input: %s' %
+                 FLAGS.selected_cols)
+    logging.info('will copy the following columns to output: %s' %
+                 FLAGS.reserved_cols)
+
+    profiling_file = FLAGS.profiling_file if FLAGS.task_index == 0 else None
+    if profiling_file is not None:
+      print('profiling_file = %s ' % profiling_file)
+    predictor = Predictor(FLAGS.saved_model_dir, profiling_file=profiling_file)
+    input_table, output_table = FLAGS.tables, FLAGS.outputs
+    logging.info('input_table = %s, output_table = %s' %
+                 (input_table, output_table))
+    worker_num = len(FLAGS.worker_hosts.split(','))
+    predictor.predict_table(
+        input_table,
+        output_table,
+        all_cols=FLAGS.all_cols,
+        all_col_types=FLAGS.all_col_types,
+        selected_cols=FLAGS.selected_cols,
+        reserved_cols=FLAGS.reserved_cols,
+        output_cols=FLAGS.output_cols,
+        batch_size=FLAGS.batch_size,
+        slice_id=FLAGS.task_index,
+        slice_num=worker_num)
+  else:
+    raise ValueError('cmd should be one of train/evaluate/export/predict')
+
+
+if __name__ == '__main__':
+  tf.app.run()
diff --git a/requirements/docs.txt b/requirements/docs.txt
index 89fbf86c0..cddfc09c6 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,4 +1,4 @@
-recommonmark
-sphinx
-sphinx_markdown_tables
-sphinx_rtd_theme
+recommonmark==0.6.0
+sphinx==3.3.1
+sphinx_markdown_tables==0.0.15
+sphinx_rtd_theme==0.5.0
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index 5fe8201c7..6e54423e1 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,3 +1,6 @@
 future
 pandas
+psutil
+pydatahub
+scikit-learn
 xlrd >= 0.9.0
diff --git a/samples/dh_script/configs/deepfm.config b/samples/dh_script/configs/deepfm.config
new file mode 100644
index 000000000..d4d8f3af5
--- /dev/null
+++ b/samples/dh_script/configs/deepfm.config
@@ -0,0 +1,370 @@
+model_dir: "{TEST_DIR}"
+
+train_config {
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  log_step_count_steps: 200L
+  sync_replicas: true
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+datahub_train_input{
+  akId:"{DH_ID}"
+  akSecret:"{DH_KEY}"
+  region:"{DH_REG}"
+  project:"{DH_PRO}"
+  topic:"{DH_TOPIC}"
+  shard_num:3
+  life_cycle:7
+}
+
+datahub_eval_input{
+  akId:"{DH_ID}"
+  akSecret:"{DH_KEY}"
+  region:"{DH_REG}"
+  project:"{DH_PRO}"
+  topic:"{DH_TOPIC}"
+  shard_num:3
+  life_cycle:7
+}
+
+data_config {
+  input_fields {
+    input_name:'label'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'hour'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c1'
+    input_type: STRING
+  }
+
+  input_fields {
+    input_name: 'banner_pos'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'site_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'site_domain'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'site_category'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'app_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'app_domain'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'app_category'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_ip'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_model'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_type'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_conn_type'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c14'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c15'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c16'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c17'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c18'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c19'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c20'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c21'
+    input_type: STRING
+  }
+
+  auto_expand_input_fields: true
+
+  label_fields: 'label'
+  batch_size: 1024
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: DataHubInput
+}
+
+feature_configs : {
+  input_names: 'hour'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c1'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'banner_pos'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'site_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'site_domain'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'site_category'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'app_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'app_domain'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'app_category'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_ip'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_model'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_type'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_conn_type'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c14'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c15'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c16'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c17'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c18'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c19'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c20'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c21'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+model_config: {
+  model_class: 'DeepFM'
+  feature_groups: {
+    group_name: 'wide'
+    feature_names: 'hour'
+    feature_names: 'c1'
+    feature_names: 'banner_pos'
+    feature_names: 'site_id'
+    feature_names: 'site_domain'
+    feature_names: 'site_category'
+    feature_names: 'app_id'
+    feature_names: 'app_domain'
+    feature_names: 'app_category'
+    feature_names: 'device_id'
+    feature_names: 'device_ip'
+    feature_names: 'device_model'
+    feature_names: 'device_type'
+    feature_names: 'device_conn_type'
+    feature_names: 'c14'
+    feature_names: 'c15'
+    feature_names: 'c16'
+    feature_names: 'c17'
+    feature_names: 'c18'
+    feature_names: 'c19'
+    feature_names: 'c20'
+    feature_names: 'c21'
+    wide_deep: WIDE
+  }
+  feature_groups: {
+    group_name: 'deep'
+    feature_names: 'hour'
+    feature_names: 'c1'
+    feature_names: 'banner_pos'
+    feature_names: 'site_id'
+    feature_names: 'site_domain'
+    feature_names: 'site_category'
+    feature_names: 'app_id'
+    feature_names: 'app_domain'
+    feature_names: 'app_category'
+    feature_names: 'device_id'
+    feature_names: 'device_ip'
+    feature_names: 'device_model'
+    feature_names: 'device_type'
+    feature_names: 'device_conn_type'
+    feature_names: 'c14'
+    feature_names: 'c15'
+    feature_names: 'c16'
+    feature_names: 'c17'
+    feature_names: 'c18'
+    feature_names: 'c19'
+    feature_names: 'c20'
+    feature_names: 'c21'
+    wide_deep: DEEP
+  }
+  deepfm {
+    dnn {
+      hidden_units: [64, 32, 16]
+    }
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    wide_output_dim: 16
+    wide_regularization: 1e-4
+  }
+  embedding_regularization: 1e-5
+}
diff --git a/samples/dh_script/configs/dh.config b/samples/dh_script/configs/dh.config
new file mode 100644
index 000000000..1aa936bab
--- /dev/null
+++ b/samples/dh_script/configs/dh.config
@@ -0,0 +1,6 @@
+[datahub]
+access_id=
+access_key=
+endpoint=https://dh-cn-beijing.aliyuncs.com
+topic_name=pf_test
+project=tmf_easy
diff --git a/samples/dh_script/deep_fm/create_external_deepfm_table.sql b/samples/dh_script/deep_fm/create_external_deepfm_table.sql
new file mode 100644
index 000000000..031af15dd
--- /dev/null
+++ b/samples/dh_script/deep_fm/create_external_deepfm_table.sql
@@ -0,0 +1,65 @@
+drop TABLE IF EXISTS external_deepfm_train_{TIME_STAMP} ;
+create EXTERNAL table external_deepfm_train_{TIME_STAMP}(
+    label BIGINT
+    ,`hour` string
+    ,c1 STRING
+    ,banner_pos STRING
+    ,site_id STRING
+    ,site_domain STRING
+    ,site_category STRING
+    ,app_id STRING
+    ,app_domain STRING
+    ,app_category STRING
+    ,device_id STRING
+    ,device_ip STRING
+    ,device_model STRING
+    ,device_type STRING
+    ,device_conn_type STRING
+    ,c14 STRING
+    ,c15 STRING
+    ,c16 STRING
+    ,c17 STRING
+    ,c18 STRING
+    ,c19 STRING
+    ,c20 STRING
+    ,c21 STRING
+)
+STORED BY 'com.aliyun.odps.CsvStorageHandler'
+WITH SERDEPROPERTIES (
+ 'odps.properties.rolearn'='{ROLEARN}'
+)
+LOCATION 'oss://{OSS_ENDPOINT_INTERNAL}/{OSS_BUCKET_NAME}/{EXP_NAME}/test_data/train/'
+;
+
+drop TABLE IF EXISTS external_deepfm_test_{TIME_STAMP};
+create EXTERNAL table external_deepfm_test_{TIME_STAMP}(
+    label BIGINT
+    ,`hour` string
+    ,c1 STRING
+    ,banner_pos STRING
+    ,site_id STRING
+    ,site_domain STRING
+    ,site_category STRING
+    ,app_id STRING
+    ,app_domain STRING
+    ,app_category STRING
+    ,device_id STRING
+    ,device_ip STRING
+    ,device_model STRING
+    ,device_type STRING
+    ,device_conn_type STRING
+    ,c14 STRING
+    ,c15 STRING
+    ,c16 STRING
+    ,c17 STRING
+    ,c18 STRING
+    ,c19 STRING
+    ,c20 STRING
+    ,c21 STRING
+)
+STORED BY 'com.aliyun.odps.CsvStorageHandler'
+WITH SERDEPROPERTIES (
+ 'odps.properties.rolearn'='{ROLEARN}'
+)
+LOCATION 'oss://{OSS_ENDPOINT_INTERNAL}/{OSS_BUCKET_NAME}/{EXP_NAME}/test_data/test/'
+;
diff --git a/samples/dh_script/deep_fm/create_inner_deepfm_table.sql b/samples/dh_script/deep_fm/create_inner_deepfm_table.sql
new file mode 100644
index 000000000..bf70f0ac1
--- /dev/null
+++ b/samples/dh_script/deep_fm/create_inner_deepfm_table.sql
@@ -0,0 +1,66 @@
+drop TABLE IF EXISTS deepfm_train_{TIME_STAMP};
+create table deepfm_train_{TIME_STAMP}(
+    label BIGINT
+    ,`hour` string
+    ,c1 STRING
+    ,banner_pos STRING
+    ,site_id STRING
+    ,site_domain STRING
+    ,site_category STRING
+    ,app_id STRING
+    ,app_domain STRING
+    ,app_category STRING
+    ,device_id STRING
+    ,device_ip STRING
+    ,device_model STRING
+    ,device_type STRING
+    ,device_conn_type STRING
+    ,c14 STRING
+    ,c15 STRING
+    ,c16 STRING
+    ,c17 STRING
+    ,c18 STRING
+    ,c19 STRING
+    ,c20 STRING
+    ,c21 STRING
+)
+;
+
+INSERT OVERWRITE TABLE deepfm_train_{TIME_STAMP}
+select * from external_deepfm_train_{TIME_STAMP} ;
+
+desc deepfm_train_{TIME_STAMP};
+desc external_deepfm_train_{TIME_STAMP};
+
+drop TABLE IF EXISTS deepfm_test_{TIME_STAMP};
+create table deepfm_test_{TIME_STAMP}(
+    label BIGINT
+    ,`hour` string
+    ,c1 STRING
+    ,banner_pos STRING
+    ,site_id STRING
+    ,site_domain STRING
+    ,site_category STRING
+    ,app_id STRING
+    ,app_domain STRING
+    ,app_category STRING
+    ,device_id STRING
+    ,device_ip STRING
+    ,device_model STRING
+    ,device_type STRING
+    ,device_conn_type STRING
+    ,c14 STRING
+    ,c15 STRING
+    ,c16 STRING
+    ,c17 STRING
+    ,c18 STRING
+    ,c19 STRING
+    ,c20 STRING
+    ,c21 STRING
+)
+;
+
+INSERT OVERWRITE TABLE deepfm_test_{TIME_STAMP}
+select * from external_deepfm_test_{TIME_STAMP};
+desc deepfm_test_{TIME_STAMP};
+desc external_deepfm_test_{TIME_STAMP};
diff --git a/samples/dh_script/deep_fm/drop_table.sql b/samples/dh_script/deep_fm/drop_table.sql
new file mode 100644
index 000000000..77e256560
--- /dev/null
+++ b/samples/dh_script/deep_fm/drop_table.sql
@@ -0,0 +1,5 @@
+drop TABLE IF EXISTS external_deepfm_train_{TIME_STAMP};
+drop TABLE IF EXISTS external_deepfm_test_{TIME_STAMP};
+drop TABLE IF EXISTS deepfm_train_{TIME_STAMP};
+drop TABLE IF EXISTS deepfm_test_{TIME_STAMP};
+drop TABLE IF EXISTS deepfm_output_v1_{TIME_STAMP};
diff --git a/samples/emr_script/configs/deepfm.config b/samples/emr_script/configs/deepfm.config
new file mode 100644
index 000000000..f33a3ef25
--- /dev/null
+++ b/samples/emr_script/configs/deepfm.config
@@ -0,0 +1,353 @@
+train_input_path: "odps://{ODPS_PROJ_NAME}/tables/deepfm_train_{TIME_STAMP}"
+eval_input_path: "odps://{ODPS_PROJ_NAME}/tables/deepfm_test_{TIME_STAMP}"
+model_dir: "{TEST_DIR}"
+
+train_config {
+  log_step_count_steps: 200
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  sync_replicas: true
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'label'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'hour'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c1'
+    input_type: STRING
+  }
+
+  input_fields {
+    input_name: 'banner_pos'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'site_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'site_domain'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'site_category'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'app_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'app_domain'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'app_category'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_ip'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_model'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_type'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'device_conn_type'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c14'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c15'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c16'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c17'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c18'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c19'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c20'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'c21'
+    input_type: STRING
+  }
+
+  auto_expand_input_fields: true
+
+  label_fields: 'label'
+  batch_size: 1024
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: OdpsInputV3
+}
+
+feature_configs : {
+  input_names: 'hour'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c1'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'banner_pos'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'site_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'site_domain'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'site_category'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'app_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'app_domain'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'app_category'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_ip'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_model'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_type'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'device_conn_type'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c14'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c15'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c16'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c17'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c18'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c19'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c20'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'c21'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+model_config: {
+  model_class: 'DeepFM'
+  feature_groups: {
+    group_name: 'wide'
+    feature_names: 'hour'
+    feature_names: 'c1'
+    feature_names: 'banner_pos'
+    feature_names: 'site_id'
+    feature_names: 'site_domain'
+    feature_names: 'site_category'
+    feature_names: 'app_id'
+    feature_names: 'app_domain'
+    feature_names: 'app_category'
+    feature_names: 'device_id'
+    feature_names: 'device_ip'
+    feature_names: 'device_model'
+    feature_names: 'device_type'
+    feature_names: 'device_conn_type'
+    feature_names: 'c14'
+    feature_names: 'c15'
+    feature_names: 'c16'
+    feature_names: 'c17'
+    feature_names: 'c18'
+    feature_names: 'c19'
+    feature_names: 'c20'
+    feature_names: 'c21'
+    wide_deep: WIDE
+  }
+  feature_groups: {
+    group_name: 'deep'
+    feature_names: 'hour'
+    feature_names: 'c1'
+    feature_names: 'banner_pos'
+    feature_names: 'site_id'
+    feature_names: 'site_domain'
+    feature_names: 'site_category'
+    feature_names: 'app_id'
+    feature_names: 'app_domain'
+    feature_names: 'app_category'
+    feature_names: 'device_id'
+    feature_names: 'device_ip'
+    feature_names: 'device_model'
+    feature_names: 'device_type'
+    feature_names: 'device_conn_type'
+    feature_names: 'c14'
+    feature_names: 'c15'
+    feature_names: 'c16'
+    feature_names: 'c17'
+    feature_names: 'c18'
+    feature_names: 'c19'
+    feature_names: 'c20'
+    feature_names: 'c21'
+    wide_deep: DEEP
+  }
+  deepfm {
+    dnn {
+      hidden_units: [64, 32, 16]
+    }
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    wide_output_dim: 16
+    wide_regularization: 1e-4
+  }
+  embedding_regularization: 1e-5
+}
diff --git a/samples/emr_script/configs/deepfm_eval_pipeline.config b/samples/emr_script/configs/deepfm_eval_pipeline.config
new file mode 100644
index 000000000..b742549a2
--- /dev/null
+++ b/samples/emr_script/configs/deepfm_eval_pipeline.config
@@ -0,0 +1,331 @@
+train_input_path: "odps://test_03/tables/deepfm_train_1612255915"
+eval_input_path: "odps://test_03/tables/deepfm_test_1612255915"
+model_dir: "hdfs://emr-header-1:9000/user/easy_rec/emr_test/L4smcfr1/train"
+train_config {
+  optimizer_config {
+    use_moving_average: false
+    adam_optimizer {
+      learning_rate {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 1e-07
+        }
+      }
+    }
+  }
+  num_steps: 2000
+  sync_replicas: false
+  log_step_count_steps: 200
+  train_distribute: NoStrategy
+  num_gpus_per_worker: 1
+}
+eval_config {
+  num_examples: 1024
+  metrics_set {
+    auc {
+    }
+  }
+}
+data_config {
+  batch_size: 1024
+  auto_expand_input_fields: true
+  label_fields: "label"
+  num_epochs: 0
+  prefetch_size: 32
+  input_type: OdpsInputV3
+  input_fields {
+    input_name: "label"
+    input_type: INT32
+  }
+  input_fields {
+    input_name: "hour"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "c1"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "banner_pos"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "site_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "site_domain"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "site_category"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "app_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "app_domain"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "app_category"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "device_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "device_ip"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "device_model"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "device_type"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "device_conn_type"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "c14"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "c15"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "c16"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "c17"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "c18"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "c19"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "c20"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "c21"
+    input_type: STRING
+  }
+}
+feature_configs {
+  input_names: "hour"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "c1"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "banner_pos"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "c19"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "c20"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+feature_configs {
+  input_names: "c21"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+model_config {
+  model_class: "DeepFM"
+  feature_groups {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep: WIDE
+  }
+  feature_groups {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep: DEEP
+  }
+  embedding_regularization: 1e-05
+  deepfm {
+    dnn {
+      hidden_units: 64
+      hidden_units: 32
+      hidden_units: 16
+    }
+    final_dnn {
+      hidden_units: 128
+      hidden_units: 64
+    }
+    wide_output_dim: 16
+    wide_regularization: 0.0001
+  }
+}
diff --git a/samples/emr_script/deep_fm/create_external_deepfm_table.sql b/samples/emr_script/deep_fm/create_external_deepfm_table.sql
new file mode 100644
index 000000000..031af15dd
--- /dev/null
+++ b/samples/emr_script/deep_fm/create_external_deepfm_table.sql
@@ -0,0 +1,65 @@
+drop TABLE IF EXISTS external_deepfm_train_{TIME_STAMP} ;
+create EXTERNAL table external_deepfm_train_{TIME_STAMP}(
+    label BIGINT
+    ,`hour` string
+    ,c1 STRING
+    ,banner_pos STRING
+    ,site_id STRING
+    ,site_domain STRING
+    ,site_category STRING
+    ,app_id STRING
+    ,app_domain STRING
+    ,app_category STRING
+    ,device_id STRING
+    ,device_ip STRING
+    ,device_model STRING
+    ,device_type STRING
+    ,device_conn_type STRING
+    ,c14 STRING
+    ,c15 STRING
+    ,c16 STRING
+    ,c17 STRING
+    ,c18 STRING
+    ,c19 STRING
+    ,c20 STRING
+    ,c21 STRING
+)
+STORED BY 'com.aliyun.odps.CsvStorageHandler'
+WITH SERDEPROPERTIES (
+ 'odps.properties.rolearn'='{ROLEARN}'
+)
+LOCATION 'oss://{OSS_ENDPOINT_INTERNAL}/{OSS_BUCKET_NAME}/{EXP_NAME}/test_data/train/'
+;
+
+drop TABLE IF EXISTS external_deepfm_test_{TIME_STAMP};
+create EXTERNAL table external_deepfm_test_{TIME_STAMP}(
+    label BIGINT
+    ,`hour` string
+    ,c1 STRING
+    ,banner_pos STRING
+    ,site_id STRING
+    ,site_domain STRING
+    ,site_category STRING
+    ,app_id STRING
+    ,app_domain STRING
+    ,app_category STRING
+    ,device_id STRING
+    ,device_ip STRING
+    ,device_model STRING
+    ,device_type STRING
+    ,device_conn_type STRING
+    ,c14 STRING
+    ,c15 STRING
+    ,c16 STRING
+    ,c17 STRING
+    ,c18 STRING
+    ,c19 STRING
+    ,c20 STRING
+    ,c21 STRING
+)
+STORED BY 'com.aliyun.odps.CsvStorageHandler'
+WITH SERDEPROPERTIES (
+ 'odps.properties.rolearn'='{ROLEARN}'
+)
+LOCATION 'oss://{OSS_ENDPOINT_INTERNAL}/{OSS_BUCKET_NAME}/{EXP_NAME}/test_data/test/'
+;
diff --git a/samples/emr_script/deep_fm/create_inner_deepfm_table.sql b/samples/emr_script/deep_fm/create_inner_deepfm_table.sql
new file mode 100644
index 000000000..bf70f0ac1
--- /dev/null
+++ b/samples/emr_script/deep_fm/create_inner_deepfm_table.sql
@@ -0,0 +1,66 @@
+drop TABLE IF EXISTS deepfm_train_{TIME_STAMP};
+create table deepfm_train_{TIME_STAMP}(
+    label BIGINT
+    ,`hour` string
+    ,c1 STRING
+    ,banner_pos STRING
+    ,site_id STRING
+    ,site_domain STRING
+    ,site_category STRING
+    ,app_id STRING
+    ,app_domain STRING
+    ,app_category STRING
+    ,device_id STRING
+    ,device_ip STRING
+    ,device_model STRING
+    ,device_type STRING
+    ,device_conn_type STRING
+    ,c14 STRING
+    ,c15 STRING
+    ,c16 STRING
+    ,c17 STRING
+    ,c18 STRING
+    ,c19 STRING
+    ,c20 STRING
+    ,c21 STRING
+)
+;
+
+INSERT OVERWRITE TABLE deepfm_train_{TIME_STAMP}
+select * from external_deepfm_train_{TIME_STAMP} ;
+
+desc deepfm_train_{TIME_STAMP};
+desc external_deepfm_train_{TIME_STAMP};
+
+drop TABLE IF EXISTS deepfm_test_{TIME_STAMP};
+create table deepfm_test_{TIME_STAMP}(
+    label BIGINT
+    ,`hour` string
+    ,c1 STRING
+    ,banner_pos STRING
+    ,site_id STRING
+    ,site_domain STRING
+    ,site_category STRING
+    ,app_id STRING
+    ,app_domain STRING
+    ,app_category STRING
+    ,device_id STRING
+    ,device_ip STRING
+    ,device_model STRING
+    ,device_type STRING
+    ,device_conn_type STRING
+    ,c14 STRING
+    ,c15 STRING
+    ,c16 STRING
+    ,c17 STRING
+    ,c18 STRING
+    ,c19 STRING
+    ,c20 STRING
+    ,c21 STRING
+)
+;
+
+INSERT OVERWRITE TABLE deepfm_test_{TIME_STAMP}
+select * from external_deepfm_test_{TIME_STAMP};
+desc deepfm_test_{TIME_STAMP};
+desc external_deepfm_test_{TIME_STAMP};
diff --git a/samples/emr_script/deep_fm/drop_table.sql b/samples/emr_script/deep_fm/drop_table.sql
new file mode 100644
index 000000000..77e256560
--- /dev/null
+++ b/samples/emr_script/deep_fm/drop_table.sql
@@ -0,0 +1,5 @@
+drop TABLE IF EXISTS external_deepfm_train_{TIME_STAMP};
+drop TABLE IF EXISTS external_deepfm_test_{TIME_STAMP};
+drop TABLE IF EXISTS deepfm_train_{TIME_STAMP};
+drop TABLE IF EXISTS deepfm_test_{TIME_STAMP};
+drop TABLE IF EXISTS deepfm_output_v1_{TIME_STAMP};
diff --git a/samples/emr_script/yaml_config/eval.tf.yaml.template b/samples/emr_script/yaml_config/eval.tf.yaml.template
new file mode 100644
index 000000000..d2d404094
--- /dev/null
+++ b/samples/emr_script/yaml_config/eval.tf.yaml.template
@@ -0,0 +1,17 @@
+app:
+    app_type: standalone
+    app_name: easyrec_tf_eval
+    mode: local
+    exit_mode: true
+    verbose: true
+    files: {TEMP_DIR}/configs/deepfm_eval_pipeline.config,./.odps_config.ini
+    command: python -m easy_rec.python.eval --pipeline_config_path {TEMP_DIR}/configs/deepfm_eval_pipeline.config --odps_config ./.odps_config.ini
+    wait_time: 8
+    hook: /usr/local/dstools/bin/hooks.sh
+
+resource:
+    worker_num: 1
+    worker_cpu: 6
+    worker_gpu:
+    worker_memory: 10g
+    worker_mode_arg:
diff --git a/samples/emr_script/yaml_config/export.tf.yaml.template b/samples/emr_script/yaml_config/export.tf.yaml.template
new file mode 100644
index 000000000..a64971fbf
--- /dev/null
+++ b/samples/emr_script/yaml_config/export.tf.yaml.template
@@ -0,0 +1,17 @@
+app:
+    app_type: standalone
+    app_name: easyrec_tf_export
+    mode: local
+    exit_mode: true
+    verbose: true
+    files: {TEMP_DIR}/configs/deepfm_eval_pipeline.config
+    command: python -m easy_rec.python.export --pipeline_config_path {TEMP_DIR}/configs/deepfm_eval_pipeline.config --export_dir {EXPOERT_DIR}
+    wait_time: 8
+    hook: /usr/local/dstools/bin/hooks.sh
+
+resource:
+    worker_num: 1
+    worker_cpu: 6
+    worker_gpu:
+    worker_memory: 10g
+    worker_mode_arg:
diff --git a/samples/emr_script/yaml_config/train.paitf.yaml.template b/samples/emr_script/yaml_config/train.paitf.yaml.template
new file mode 100644
index 000000000..8da169569
--- /dev/null
+++ b/samples/emr_script/yaml_config/train.paitf.yaml.template
@@ -0,0 +1,19 @@
+app:
+  app_name: easyrec_tf_train
+  app_type: tensorflow-ps
+  command: python -m easy_rec.python.train_eval --pipeline_config_path {TEMP_DIR}/configs/deepfm.config --odps_config ./.odps_config.ini --continue_train
+  exit_mode: true
+  files: {TEMP_DIR}/configs/deepfm.config,./.odps_config.ini
+  hook: /usr/local/dstools/bin/hooks.sh
+  mode: local
+  verbose: true
+  wait_time: 8
+resource:
+  ps_cpu: 1
+  ps_memory: 10g
+  ps_mode_arg: null
+  ps_num: 1
+  worker_cpu: 1
+  worker_memory: 10g
+  worker_mode_arg: null
+  worker_num: 2
diff --git a/samples/hpo/hpo_param_v10.json b/samples/hpo/hpo_param_v10.json
new file mode 100755
index 000000000..6d798fe8c
--- /dev/null
+++ b/samples/hpo/hpo_param_v10.json
@@ -0,0 +1,5 @@
+{
+   "param": {
+      "feature_configs[input_names[0]=c21]": "{input_names:'c21' feature_type:RawFeature boundaries:[4,5,6,7] embedding_dim:32}"
+    }
+}
diff --git a/samples/hpo/hpo_param_v11.json b/samples/hpo/hpo_param_v11.json
new file mode 100755
index 000000000..cfc9a0c52
--- /dev/null
+++ b/samples/hpo/hpo_param_v11.json
@@ -0,0 +1,5 @@
+{
+   "param": {
+      "feature_configs[input_names[0]=c21].boundaries": "[10.0, 20.0, 30.0, 40.0]"
+    }
+}
diff --git a/samples/hpo/hpo_param_v12.json b/samples/hpo/hpo_param_v12.json
new file mode 100755
index 000000000..7a7a0a427
--- /dev/null
+++ b/samples/hpo/hpo_param_v12.json
@@ -0,0 +1,5 @@
+{
+   "param": {
+      "feature_configs[input_names[0]=c21].boundaries[1]": "21.0"
+    }
+}
diff --git a/samples/hpo/hpo_param_v51.json b/samples/hpo/hpo_param_v51.json
new file mode 100644
index 000000000..8b400769a
--- /dev/null
+++ b/samples/hpo/hpo_param_v51.json
@@ -0,0 +1,5 @@
+{
+   "param": {
+      "feature_configs[5].embedding_dim": 37
+    }
+}
diff --git a/samples/hpo/hpo_param_v81.json b/samples/hpo/hpo_param_v81.json
new file mode 100644
index 000000000..fbd98614d
--- /dev/null
+++ b/samples/hpo/hpo_param_v81.json
@@ -0,0 +1,5 @@
+{
+   "param": {
+      "feature_configs[feature_type=RawFeature].embedding_dim":24
+    }
+}
diff --git a/samples/hpo/hyperparams.json b/samples/hpo/hyperparams.json
index 0fe076d09..f6e338e39 100644
--- a/samples/hpo/hyperparams.json
+++ b/samples/hpo/hyperparams.json
@@ -1,7 +1,7 @@
 [
   {
     "type": "Categorical",
-    "name": "feature_configs[input_names[0]=field1].embedding_dim;feature_configs[input_names[0]=field20].embedding_dim",
-    "candidates": ["16;16", "32;32", "48;48", "64;64", "80;80"]
+    "name": "feature_configs[:].embedding_dim",
+    "candidates": ["16", "32", "48", "64", "80"]
   }
 ]
diff --git a/samples/model_config/autoint_on_taobao.config b/samples/model_config/autoint_on_taobao.config
new file mode 100644
index 000000000..a21df6dfa
--- /dev/null
+++ b/samples/model_config/autoint_on_taobao.config
@@ -0,0 +1,261 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/autoint_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'AutoInt'
+  feature_groups: {
+    group_name: 'all'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'final_gender_code'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  autoint {
+    multi_head_num: 2
+    multi_head_size: 32
+    interacting_layer_num: 3
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-6
+}
diff --git a/samples/model_config/dbmtl_variational_dropout.config b/samples/model_config/dbmtl_variational_dropout.config
new file mode 100644
index 000000000..f6495c664
--- /dev/null
+++ b/samples/model_config/dbmtl_variational_dropout.config
@@ -0,0 +1,292 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dbmtl_taobao_ckpt"
+
+train_config {
+  optimizer_config {
+    adam_optimizer {
+      learning_rate {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 1e-07
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  num_steps: 5000
+  sync_replicas: true
+  save_checkpoints_steps: 100
+  log_step_count_steps: 100
+}
+eval_config {
+  metrics_set {
+    auc {
+    }
+  }
+}
+data_config {
+  batch_size: 4096
+  label_fields: "clk"
+  label_fields: "buy"
+  prefetch_size: 32
+  input_type: CSVInput
+  input_fields {
+    input_name: "clk"
+    input_type: INT32
+  }
+  input_fields {
+    input_name: "buy"
+    input_type: INT32
+  }
+  input_fields {
+    input_name: "pid"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "adgroup_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "cate_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "campaign_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "customer"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "brand"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "user_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "cms_segid"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "cms_group_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "final_gender_code"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "age_level"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "pvalue_level"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "shopping_level"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "occupation"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "new_user_class_level"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "tag_category_list"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "tag_brand_list"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "price"
+    input_type: INT32
+  }
+}
+feature_configs {
+  input_names: "pid"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "adgroup_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "cate_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs {
+  input_names: "campaign_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "customer"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "brand"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "user_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "cms_segid"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs {
+  input_names: "cms_group_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs {
+  input_names: "final_gender_code"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "age_level"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "pvalue_level"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "shopping_level"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "occupation"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "new_user_class_level"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "tag_category_list"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: "|"
+}
+feature_configs {
+  input_names: "tag_brand_list"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: "|"
+}
+feature_configs {
+  input_names: "price"
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config {
+  model_class: "DBMTL"
+  feature_groups {
+    group_name: "all"
+    feature_names: "user_id"
+    feature_names: "cms_segid"
+    feature_names: "cms_group_id"
+    feature_names: "age_level"
+    feature_names: "pvalue_level"
+    feature_names: "shopping_level"
+    feature_names: "occupation"
+    feature_names: "new_user_class_level"
+    feature_names: "adgroup_id"
+    feature_names: "cate_id"
+    feature_names: "campaign_id"
+    feature_names: "customer"
+    feature_names: "brand"
+    feature_names: "price"
+    feature_names: "pid"
+    feature_names: "tag_category_list"
+    feature_names: "tag_brand_list"
+    wide_deep: DEEP
+  }
+  dbmtl {
+    bottom_dnn {
+      hidden_units: [1024, 512, 256]
+    }
+    task_towers {
+      tower_name: "ctr"
+      label_name: "clk"
+      loss_type: CLASSIFICATION
+      metrics_set: {
+        auc {}
+      }
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+      relation_dnn {
+        hidden_units: [32]
+      }
+      weight: 1.0
+    }
+    task_towers {
+      tower_name: "cvr"
+      label_name: "buy"
+      loss_type: CLASSIFICATION
+      metrics_set: {
+        auc {}
+      }
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+      relation_tower_names: ["ctr"]
+      relation_dnn {
+        hidden_units: [32]
+      }
+      weight: 1.0
+    }
+    l2_regularization: 1e-6
+  }
+  variational_dropout{
+    regularization_lambda:0.01
+    embedding_wise_variational_dropout:true
+  }
+  embedding_regularization: 5e-6
+}
diff --git a/samples/model_config/dbmtl_variational_dropout_feature_num.config b/samples/model_config/dbmtl_variational_dropout_feature_num.config
new file mode 100644
index 000000000..3b0dec93a
--- /dev/null
+++ b/samples/model_config/dbmtl_variational_dropout_feature_num.config
@@ -0,0 +1,292 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dbmtl_taobao_ckpt"
+
+train_config {
+  optimizer_config {
+    adam_optimizer {
+      learning_rate {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 1e-07
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  num_steps: 5000
+  sync_replicas: true
+  save_checkpoints_steps: 100
+  log_step_count_steps: 100
+}
+eval_config {
+  metrics_set {
+    auc {
+    }
+  }
+}
+data_config {
+  batch_size: 4096
+  label_fields: "clk"
+  label_fields: "buy"
+  prefetch_size: 32
+  input_type: CSVInput
+  input_fields {
+    input_name: "clk"
+    input_type: INT32
+  }
+  input_fields {
+    input_name: "buy"
+    input_type: INT32
+  }
+  input_fields {
+    input_name: "pid"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "adgroup_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "cate_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "campaign_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "customer"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "brand"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "user_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "cms_segid"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "cms_group_id"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "final_gender_code"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "age_level"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "pvalue_level"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "shopping_level"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "occupation"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "new_user_class_level"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "tag_category_list"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "tag_brand_list"
+    input_type: STRING
+  }
+  input_fields {
+    input_name: "price"
+    input_type: INT32
+  }
+}
+feature_configs {
+  input_names: "pid"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "adgroup_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "cate_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs {
+  input_names: "campaign_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "customer"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "brand"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "user_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs {
+  input_names: "cms_segid"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs {
+  input_names: "cms_group_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs {
+  input_names: "final_gender_code"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "age_level"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "pvalue_level"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "shopping_level"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "occupation"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "new_user_class_level"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs {
+  input_names: "tag_category_list"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: "|"
+}
+feature_configs {
+  input_names: "tag_brand_list"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: "|"
+}
+feature_configs {
+  input_names: "price"
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config {
+  model_class: "DBMTL"
+  feature_groups {
+    group_name: "all"
+    feature_names: "user_id"
+    feature_names: "cms_segid"
+    feature_names: "cms_group_id"
+    feature_names: "age_level"
+    feature_names: "pvalue_level"
+    feature_names: "shopping_level"
+    feature_names: "occupation"
+    feature_names: "new_user_class_level"
+    feature_names: "adgroup_id"
+    feature_names: "cate_id"
+    feature_names: "campaign_id"
+    feature_names: "customer"
+    feature_names: "brand"
+    feature_names: "price"
+    feature_names: "pid"
+    feature_names: "tag_category_list"
+    feature_names: "tag_brand_list"
+    wide_deep: DEEP
+  }
+  dbmtl {
+    bottom_dnn {
+      hidden_units: [1024, 512, 256]
+    }
+    task_towers {
+      tower_name: "ctr"
+      label_name: "clk"
+      loss_type: CLASSIFICATION
+      metrics_set: {
+        auc {}
+      }
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+      relation_dnn {
+        hidden_units: [32]
+      }
+      weight: 1.0
+    }
+    task_towers {
+      tower_name: "cvr"
+      label_name: "buy"
+      loss_type: CLASSIFICATION
+      metrics_set: {
+        auc {}
+      }
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+      relation_tower_names: ["ctr"]
+      relation_dnn {
+        hidden_units: [32]
+      }
+      weight: 1.0
+    }
+    l2_regularization: 1e-6
+  }
+  variational_dropout{
+    regularization_lambda:0.01
+    embedding_wise_variational_dropout:false
+  }
+  embedding_regularization: 5e-6
+}
diff --git a/samples/model_config/dcn_f1_on_taobao.config b/samples/model_config/dcn_f1_on_taobao.config
new file mode 100644
index 000000000..a4bb9439f
--- /dev/null
+++ b/samples/model_config/dcn_f1_on_taobao.config
@@ -0,0 +1,273 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dcn_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+  metrics_set: {
+    max_f1 {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'DCN'
+  feature_groups: {
+    group_name: 'all'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  dcn {
+    deep_tower {
+      input: "all"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    cross_tower {
+      input: "all"
+      cross_num: 5
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
diff --git a/samples/model_config/dcn_on_taobao.config b/samples/model_config/dcn_on_taobao.config
new file mode 100644
index 000000000..e3f9012a0
--- /dev/null
+++ b/samples/model_config/dcn_on_taobao.config
@@ -0,0 +1,270 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dcn_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'DCN'
+  feature_groups: {
+    group_name: 'all'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  dcn {
+    deep_tower {
+      input: "all"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    cross_tower {
+      input: "all"
+      cross_num: 5
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
diff --git a/samples/model_config/deepfm_combo_on_avazu_adamw_ctr.config b/samples/model_config/deepfm_combo_on_avazu_adamw_ctr.config
new file mode 100644
index 000000000..7347b3151
--- /dev/null
+++ b/samples/model_config/deepfm_combo_on_avazu_adamw_ctr.config
@@ -0,0 +1,364 @@
+train_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+eval_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+model_dir: "experiments/dwd_avazu_out_test_combo_adamw"
+
+train_config {
+  log_step_count_steps: 200
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adamw_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+      weight_decay: 1e-5
+    }
+  }
+
+  sync_replicas: true
+  save_checkpoints_steps: 500
+  num_steps: 1000
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: INT64
+    default_val:"0"
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: IdFeature
+  num_buckets: 24
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: RawFeature
+  boundaries: [1000.0,1001.0,1002.0,1003.0,1004.0,1005.0,1006.0,1007.0,1008.0,1009.0,1010.0,1011.0,1012.0,1013.0,1014.0,1015.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: RawFeature
+  boundaries: [10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: RawFeature
+  boundaries: [100.0,200.0,300.0,400.0,500.0,600.0,700.0,800.0,  900.0, 1000.0,1100.0,1200.0, 1300.0,1400.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: ["site_id", "app_id"]
+  feature_name: "site_id_app_id"
+  feature_type: ComboFeature
+  hash_bucket_size: 1000,
+  embedding_dim: 16
+}
+
+model_config:{
+  model_class: "DeepFM"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    feature_names: "site_id_app_id"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  deepfm {
+    wide_output_dim: 16
+
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+  }
+}
diff --git a/samples/model_config/deepfm_combo_on_avazu_ctr.config b/samples/model_config/deepfm_combo_on_avazu_ctr.config
index 53808d47e..a68ef0ba8 100644
--- a/samples/model_config/deepfm_combo_on_avazu_ctr.config
+++ b/samples/model_config/deepfm_combo_on_avazu_ctr.config
@@ -364,3 +364,7 @@ model_config:{
   }
   embedding_regularization: 1e-7
 }
+
+export_config {
+  multi_placeholder: false
+}
diff --git a/samples/model_config/deepfm_combo_on_avazu_emblr_ctr.config b/samples/model_config/deepfm_combo_on_avazu_emblr_ctr.config
new file mode 100644
index 000000000..a188f3118
--- /dev/null
+++ b/samples/model_config/deepfm_combo_on_avazu_emblr_ctr.config
@@ -0,0 +1,371 @@
+train_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+eval_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+model_dir: "experiments/dwd_avazu_out_test_combo_emblr"
+
+train_config {
+  log_step_count_steps: 200
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+    embedding_learning_rate_multiplier: 100
+  }
+
+  sync_replicas: true
+  save_checkpoints_steps: 500
+  num_steps: 1000
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: INT64
+    default_val:"0"
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: IdFeature
+  num_buckets: 24
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: RawFeature
+  boundaries: [1000.0,1001.0,1002.0,1003.0,1004.0,1005.0,1006.0,1007.0,1008.0,1009.0,1010.0,1011.0,1012.0,1013.0,1014.0,1015.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: RawFeature
+  boundaries: [10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: RawFeature
+  boundaries: [100.0,200.0,300.0,400.0,500.0,600.0,700.0,800.0,  900.0, 1000.0,1100.0,1200.0, 1300.0,1400.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: ["site_id", "app_id"]
+  feature_name: "site_id_app_id"
+  feature_type: ComboFeature
+  hash_bucket_size: 1000,
+  embedding_dim: 16
+}
+
+model_config:{
+  model_class: "DeepFM"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    feature_names: "site_id_app_id"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  deepfm {
+    wide_output_dim: 16
+
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    l2_regularization: 1e-5
+  }
+  embedding_regularization: 1e-7
+}
+
+export_config {
+  multi_placeholder: false
+}
diff --git a/samples/model_config/deepfm_combo_on_avazu_eval_online_ctr.config b/samples/model_config/deepfm_combo_on_avazu_eval_online_ctr.config
new file mode 100644
index 000000000..e1492315f
--- /dev/null
+++ b/samples/model_config/deepfm_combo_on_avazu_eval_online_ctr.config
@@ -0,0 +1,371 @@
+train_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+eval_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+model_dir: "experiments/dwd_avazu_out_test_combo_eval_online"
+
+train_config {
+  log_step_count_steps: 200
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  sync_replicas: true
+  save_checkpoints_steps: 500
+  num_steps: 1000
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+  eval_online: true
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: INT64
+    default_val:"0"
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: IdFeature
+  num_buckets: 24
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: RawFeature
+  boundaries: [1000.0,1001.0,1002.0,1003.0,1004.0,1005.0,1006.0,1007.0,1008.0,1009.0,1010.0,1011.0,1012.0,1013.0,1014.0,1015.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: RawFeature
+  boundaries: [10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: RawFeature
+  boundaries: [100.0,200.0,300.0,400.0,500.0,600.0,700.0,800.0,  900.0, 1000.0,1100.0,1200.0, 1300.0,1400.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: ["site_id", "app_id"]
+  feature_name: "site_id_app_id"
+  feature_type: ComboFeature
+  hash_bucket_size: 1000,
+  embedding_dim: 16
+}
+
+model_config:{
+  model_class: "DeepFM"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    feature_names: "site_id_app_id"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  deepfm {
+    wide_output_dim: 16
+
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    l2_regularization: 1e-5
+  }
+  embedding_regularization: 1e-7
+}
+
+export_config {
+  multi_placeholder: false
+}
diff --git a/samples/model_config/deepfm_combo_on_avazu_momentumw_ctr.config b/samples/model_config/deepfm_combo_on_avazu_momentumw_ctr.config
new file mode 100644
index 000000000..ba6aace4a
--- /dev/null
+++ b/samples/model_config/deepfm_combo_on_avazu_momentumw_ctr.config
@@ -0,0 +1,364 @@
+train_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+eval_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+model_dir: "experiments/dwd_avazu_out_test_combo_momentumw"
+
+train_config {
+  log_step_count_steps: 200
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    momentumw_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+      weight_decay: 1e-5
+    }
+  }
+
+  sync_replicas: true
+  save_checkpoints_steps: 500
+  num_steps: 1000
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: INT64
+    default_val:"0"
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: IdFeature
+  num_buckets: 24
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: RawFeature
+  boundaries: [1000.0,1001.0,1002.0,1003.0,1004.0,1005.0,1006.0,1007.0,1008.0,1009.0,1010.0,1011.0,1012.0,1013.0,1014.0,1015.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: RawFeature
+  boundaries: [10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: RawFeature
+  boundaries: [100.0,200.0,300.0,400.0,500.0,600.0,700.0,800.0,  900.0, 1000.0,1100.0,1200.0, 1300.0,1400.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: ["site_id", "app_id"]
+  feature_name: "site_id_app_id"
+  feature_type: ComboFeature
+  hash_bucket_size: 1000,
+  embedding_dim: 16
+}
+
+model_config:{
+  model_class: "DeepFM"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    feature_names: "site_id_app_id"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  deepfm {
+    wide_output_dim: 16
+
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+  }
+}
diff --git a/samples/model_config/deepfm_combo_on_avazu_sigmoid_l2.config b/samples/model_config/deepfm_combo_on_avazu_sigmoid_l2.config
new file mode 100644
index 000000000..3054b1282
--- /dev/null
+++ b/samples/model_config/deepfm_combo_on_avazu_sigmoid_l2.config
@@ -0,0 +1,367 @@
+train_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+eval_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+model_dir: "experiments/dwd_avazu_out_test_combo_sigmoid_l2"
+
+train_config {
+  log_step_count_steps: 200
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  sync_replicas: true
+  save_checkpoints_steps: 500
+  num_steps: 1000
+}
+
+eval_config {
+  metrics_set: {
+    mean_absolute_error {}
+  }
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: INT64
+    default_val:"0"
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: IdFeature
+  num_buckets: 24
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: RawFeature
+  boundaries: [1000.0,1001.0,1002.0,1003.0,1004.0,1005.0,1006.0,1007.0,1008.0,1009.0,1010.0,1011.0,1012.0,1013.0,1014.0,1015.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: RawFeature
+  boundaries: [10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: RawFeature
+  boundaries: [100.0,200.0,300.0,400.0,500.0,600.0,700.0,800.0,  900.0, 1000.0,1100.0,1200.0, 1300.0,1400.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: ["site_id", "app_id"]
+  feature_name: "site_id_app_id"
+  feature_type: ComboFeature
+  hash_bucket_size: 1000,
+  embedding_dim: 16
+}
+
+model_config:{
+  model_class: "DeepFM"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    feature_names: "site_id_app_id"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  deepfm {
+    wide_output_dim: 16
+
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    l2_regularization: 1e-5
+  }
+  embedding_regularization: 1e-7
+  loss_type: L2_LOSS
+}
diff --git a/samples/model_config/deepfm_lookup.config b/samples/model_config/deepfm_lookup.config
index 1e3652da5..576ab0a73 100644
--- a/samples/model_config/deepfm_lookup.config
+++ b/samples/model_config/deepfm_lookup.config
@@ -58,6 +58,7 @@ feature_configs : {
   embedding_dim: 16
   hash_bucket_size: 2000
   feature_name: "field1"
+  kv_separator: ':'
 }
 
 
diff --git a/samples/model_config/deepfm_multi_cls_small.config b/samples/model_config/deepfm_multi_cls_small.config
new file mode 100644
index 000000000..b7cd753d6
--- /dev/null
+++ b/samples/model_config/deepfm_multi_cls_small.config
@@ -0,0 +1,375 @@
+train_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+eval_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+model_dir: "experiments/dwd_avazu_out_multi_cls_small"
+
+train_config {
+  log_step_count_steps: 200
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  sync_replicas: true
+  save_checkpoints_steps: 500
+  num_steps: 1000
+}
+
+eval_config {
+  metrics_set: {
+     accuracy {}
+     recall_at_topk { topk: 2}
+  }
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields {
+    input_name: 'kd_soft'
+    input_type: STRING
+  }
+
+  label_fields: ["label", "kd_soft"]
+  label_dim: [1, 5]
+  label_sep: ['', '']
+
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: IdFeature
+  hash_bucket_size: 12
+  embedding_dim: 8
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: RawFeature
+  boundaries: [1000.0,1001.0,1002.0,1003.0,1004.0,1005.0,1006.0,1007.0,1008.0,1009.0,1010.0,1011.0,1012.0,1013.0,1014.0,1015.0]
+  embedding_dim: 8
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6]
+  embedding_dim: 8
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: RawFeature
+  boundaries: [10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]
+  embedding_dim: 8
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: RawFeature
+  boundaries: [100.0,200.0,300.0,400.0,500.0,600.0,700.0,800.0,  900.0, 1000.0,1100.0,1200.0, 1300.0,1400.0]
+  embedding_dim: 8
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
+  embedding_dim: 8
+}
+model_config:{
+  model_class: "DeepFM"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  deepfm {
+    wide_output_dim: 4
+
+    dnn {
+      hidden_units: [64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [64, 32]
+    }
+    l2_regularization: 1e-5
+  }
+
+  kd {
+    soft_label_name: 'kd_soft'
+    pred_name: 'logits'
+    loss_type: CROSS_ENTROPY_LOSS
+    loss_weight: 1.0
+    temperature: 2.0
+  }
+
+  num_class: 5
+  embedding_regularization: 1e-7
+}
diff --git a/samples/model_config/deepfm_on_criteo_batch_tfrecord.config b/samples/model_config/deepfm_on_criteo_batch_tfrecord.config
new file mode 100755
index 000000000..d8aae83dd
--- /dev/null
+++ b/samples/model_config/deepfm_on_criteo_batch_tfrecord.config
@@ -0,0 +1,617 @@
+train_input_path: "data/test/batch_criteo_sample.tfrecord"
+eval_input_path: "data/test/batch_criteo_sample.tfrecord"
+
+model_dir: "experiments/dac_deepfm"
+
+train_config {
+  log_step_count_steps: 20
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  num_steps: 100
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  separator: "\t"
+  input_fields: {
+    input_name: "label"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F1"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F2"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F3"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F4"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F5"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F6"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F7"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F8"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F9"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F10"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F11"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F12"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F13"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "C1"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C2"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C3"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C4"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C5"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C6"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C7"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C8"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C9"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C10"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C11"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C12"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C13"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C14"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C15"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C16"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C17"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C18"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C19"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C20"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C21"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C22"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C23"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C24"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C25"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C26"
+    input_type: INT64
+    default_val:""
+  }
+  label_fields: "label"
+
+  batch_size: 20
+  num_epochs: 10000
+  n_data_batch_tfrecord: 10
+  prefetch_size: 32
+  input_type: BatchTFRecordInput
+}
+
+feature_configs: {
+  input_names: "F1"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val:0.0
+  max_val: 5775.0
+}
+feature_configs: {
+  input_names: "F2"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: -3.0
+  max_val: 257675.0
+}
+feature_configs: {
+  input_names: "F3"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 65535.0
+}
+feature_configs: {
+  input_names: "F4"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 969.0
+}
+feature_configs: {
+  input_names: "F5"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 23159456.0
+}
+feature_configs: {
+  input_names: "F6"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 431037.0
+}
+feature_configs: {
+  input_names: "F7"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 56311.0
+}
+feature_configs: {
+  input_names: "F8"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 6047.0
+}
+feature_configs: {
+  input_names: "F9"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 29019.0
+}
+feature_configs: {
+  input_names: "F10"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 46.0
+}
+feature_configs: {
+  input_names: "F11"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 231.0
+}
+feature_configs: {
+  input_names: "F12"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 4008.0
+}
+feature_configs: {
+  input_names: "F13"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 7393.0
+}
+feature_configs: {
+  input_names: "C1"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C2"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C3"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C4"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C5"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C6"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C7"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C8"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C9"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C10"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C11"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C12"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C13"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C14"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C15"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C16"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C17"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C18"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C19"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C20"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C21"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C22"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C23"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C24"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C25"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C26"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+model_config:{
+  model_class: "DeepFM"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "F1"
+    feature_names: "F2"
+    feature_names: "F3"
+    feature_names: "F4"
+    feature_names: "F5"
+    feature_names: "F6"
+    feature_names: "F7"
+    feature_names: "F8"
+    feature_names: "F9"
+    feature_names: "F10"
+    feature_names: "F11"
+    feature_names: "F12"
+    feature_names: "F13"
+    feature_names: "C1"
+    feature_names: "C2"
+    feature_names: "C3"
+    feature_names: "C4"
+    feature_names: "C5"
+    feature_names: "C6"
+    feature_names: "C7"
+    feature_names: "C8"
+    feature_names: "C9"
+    feature_names: "C10"
+    feature_names: "C11"
+    feature_names: "C12"
+    feature_names: "C13"
+    feature_names: "C14"
+    feature_names: "C15"
+    feature_names: "C16"
+    feature_names: "C17"
+    feature_names: "C18"
+    feature_names: "C19"
+    feature_names: "C20"
+    feature_names: "C21"
+    feature_names: "C22"
+    feature_names: "C23"
+    feature_names: "C24"
+    feature_names: "C25"
+    feature_names: "C26"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "F1"
+    feature_names: "F2"
+    feature_names: "F3"
+    feature_names: "F4"
+    feature_names: "F5"
+    feature_names: "F6"
+    feature_names: "F7"
+    feature_names: "F8"
+    feature_names: "F9"
+    feature_names: "F10"
+    feature_names: "F11"
+    feature_names: "F12"
+    feature_names: "F13"
+    feature_names: "C1"
+    feature_names: "C2"
+    feature_names: "C3"
+    feature_names: "C4"
+    feature_names: "C5"
+    feature_names: "C6"
+    feature_names: "C7"
+    feature_names: "C8"
+    feature_names: "C9"
+    feature_names: "C10"
+    feature_names: "C11"
+    feature_names: "C12"
+    feature_names: "C13"
+    feature_names: "C14"
+    feature_names: "C15"
+    feature_names: "C16"
+    feature_names: "C17"
+    feature_names: "C18"
+    feature_names: "C19"
+    feature_names: "C20"
+    feature_names: "C21"
+    feature_names: "C22"
+    feature_names: "C23"
+    feature_names: "C24"
+    feature_names: "C25"
+    feature_names: "C26"
+    wide_deep:WIDE
+  }
+
+  deepfm {
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    wide_regularization: 1e-4
+    dense_regularization: 1e-5
+  }
+  embedding_regularization: 1e-5
+}
diff --git a/samples/model_config/deepfm_on_criteo_tfrecord.config b/samples/model_config/deepfm_on_criteo_tfrecord.config
new file mode 100755
index 000000000..e5880e604
--- /dev/null
+++ b/samples/model_config/deepfm_on_criteo_tfrecord.config
@@ -0,0 +1,616 @@
+train_input_path: "data/test/criteo_sample.tfrecord"
+eval_input_path: "data/test/criteo_sample.tfrecord"
+
+model_dir: "experiments/dac_deepfm"
+
+train_config {
+  log_step_count_steps: 20
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  num_steps: 100
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  separator: "\t"
+  input_fields: {
+    input_name: "label"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F1"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F2"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F3"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F4"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F5"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F6"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F7"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F8"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F9"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F10"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F11"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F12"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "F13"
+    input_type: FLOAT
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "C1"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C2"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C3"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C4"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C5"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C6"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C7"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C8"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C9"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C10"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C11"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C12"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C13"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C14"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C15"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C16"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C17"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C18"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C19"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C20"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C21"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C22"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C23"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C24"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C25"
+    input_type: INT64
+    default_val:""
+  }
+  input_fields: {
+    input_name: "C26"
+    input_type: INT64
+    default_val:""
+  }
+  label_fields: "label"
+
+  batch_size: 8096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: TFRecordInput
+}
+
+feature_configs: {
+  input_names: "F1"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val:0.0
+  max_val: 5775.0
+}
+feature_configs: {
+  input_names: "F2"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: -3.0
+  max_val: 257675.0
+}
+feature_configs: {
+  input_names: "F3"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 65535.0
+}
+feature_configs: {
+  input_names: "F4"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 969.0
+}
+feature_configs: {
+  input_names: "F5"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 23159456.0
+}
+feature_configs: {
+  input_names: "F6"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 431037.0
+}
+feature_configs: {
+  input_names: "F7"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 56311.0
+}
+feature_configs: {
+  input_names: "F8"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 6047.0
+}
+feature_configs: {
+  input_names: "F9"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 29019.0
+}
+feature_configs: {
+  input_names: "F10"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 46.0
+}
+feature_configs: {
+  input_names: "F11"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 231.0
+}
+feature_configs: {
+  input_names: "F12"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 4008.0
+}
+feature_configs: {
+  input_names: "F13"
+  embedding_dim:10
+  feature_type: RawFeature
+  min_val: 0.0
+  max_val: 7393.0
+}
+feature_configs: {
+  input_names: "C1"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C2"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C3"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C4"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C5"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C6"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C7"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C8"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C9"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C10"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C11"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C12"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C13"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C14"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C15"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C16"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C17"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C18"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C19"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C20"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C21"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C22"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C23"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C24"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C25"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+feature_configs: {
+  input_names: "C26"
+  hash_bucket_size: 1000000
+  feature_type: IdFeature
+  embedding_dim: 10
+  embedding_name: "vocab_embed"
+}
+model_config:{
+  model_class: "DeepFM"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "F1"
+    feature_names: "F2"
+    feature_names: "F3"
+    feature_names: "F4"
+    feature_names: "F5"
+    feature_names: "F6"
+    feature_names: "F7"
+    feature_names: "F8"
+    feature_names: "F9"
+    feature_names: "F10"
+    feature_names: "F11"
+    feature_names: "F12"
+    feature_names: "F13"
+    feature_names: "C1"
+    feature_names: "C2"
+    feature_names: "C3"
+    feature_names: "C4"
+    feature_names: "C5"
+    feature_names: "C6"
+    feature_names: "C7"
+    feature_names: "C8"
+    feature_names: "C9"
+    feature_names: "C10"
+    feature_names: "C11"
+    feature_names: "C12"
+    feature_names: "C13"
+    feature_names: "C14"
+    feature_names: "C15"
+    feature_names: "C16"
+    feature_names: "C17"
+    feature_names: "C18"
+    feature_names: "C19"
+    feature_names: "C20"
+    feature_names: "C21"
+    feature_names: "C22"
+    feature_names: "C23"
+    feature_names: "C24"
+    feature_names: "C25"
+    feature_names: "C26"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "F1"
+    feature_names: "F2"
+    feature_names: "F3"
+    feature_names: "F4"
+    feature_names: "F5"
+    feature_names: "F6"
+    feature_names: "F7"
+    feature_names: "F8"
+    feature_names: "F9"
+    feature_names: "F10"
+    feature_names: "F11"
+    feature_names: "F12"
+    feature_names: "F13"
+    feature_names: "C1"
+    feature_names: "C2"
+    feature_names: "C3"
+    feature_names: "C4"
+    feature_names: "C5"
+    feature_names: "C6"
+    feature_names: "C7"
+    feature_names: "C8"
+    feature_names: "C9"
+    feature_names: "C10"
+    feature_names: "C11"
+    feature_names: "C12"
+    feature_names: "C13"
+    feature_names: "C14"
+    feature_names: "C15"
+    feature_names: "C16"
+    feature_names: "C17"
+    feature_names: "C18"
+    feature_names: "C19"
+    feature_names: "C20"
+    feature_names: "C21"
+    feature_names: "C22"
+    feature_names: "C23"
+    feature_names: "C24"
+    feature_names: "C25"
+    feature_names: "C26"
+    wide_deep:WIDE
+  }
+
+  deepfm {
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    wide_regularization: 1e-4
+    dense_regularization: 1e-5
+  }
+  embedding_regularization: 1e-5
+}
diff --git a/samples/model_config/deepfm_vocab_list_on_avazu_ctr.config b/samples/model_config/deepfm_vocab_list_on_avazu_ctr.config
new file mode 100644
index 000000000..bd2ae8815
--- /dev/null
+++ b/samples/model_config/deepfm_vocab_list_on_avazu_ctr.config
@@ -0,0 +1,357 @@
+train_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+eval_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+model_dir: "experiments/dwd_avazu_out_test_vocab"
+
+train_config {
+  log_step_count_steps: 200
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  sync_replicas: true
+  save_checkpoints_steps: 500
+  num_steps: 1000
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: INT64
+    default_val:"0"
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: IdFeature
+  vocab_list: ["0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24"]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: RawFeature
+  boundaries: [1000.0,1001.0,1002.0,1003.0,1004.0,1005.0,1006.0,1007.0,1008.0,1009.0,1010.0,1011.0,1012.0,1013.0,1014.0,1015.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: RawFeature
+  boundaries: [10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: RawFeature
+  boundaries: [100.0,200.0,300.0,400.0,500.0,600.0,700.0,800.0,  900.0, 1000.0,1100.0,1200.0, 1300.0,1400.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
+  embedding_dim: 16
+}
+model_config:{
+  model_class: "DeepFM"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  deepfm {
+    wide_output_dim: 16
+
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    l2_regularization: 1e-5
+  }
+  embedding_regularization: 1e-7
+}
diff --git a/samples/model_config/deepfm_with_embed.config b/samples/model_config/deepfm_with_embed.config
new file mode 100644
index 000000000..ee813c365
--- /dev/null
+++ b/samples/model_config/deepfm_with_embed.config
@@ -0,0 +1,103 @@
+train_input_path: "data/test/embed_data.csv"
+eval_input_path: "data/test/embed_data.csv"
+model_dir: "experiment/deepfm_with_embed/"
+
+train_config {
+  log_step_count_steps: 200
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  num_steps: 1000
+  sync_replicas: true
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  input_fields {
+     input_name: 'clk'
+     input_type: INT32
+  }
+  input_fields {
+     input_name: 'field1'
+     input_type: STRING
+  }
+  input_fields {
+     input_name: 'field2'
+     input_type: INT32
+  }
+  input_fields {
+     input_name: "field3"
+     input_type: STRING
+  }
+  label_fields: 'clk'
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'field1'
+
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'field2'
+
+  feature_type: RawFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'field3'
+
+  feature_type: RawFeature
+  embedding_dim: 16
+  raw_input_dim: 15
+}
+
+model_config: {
+  model_class: 'DeepFM'
+  feature_groups: {
+    group_name: 'deep'
+    feature_names: 'field[1-3]'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'wide'
+    feature_names: 'field[1-3]'
+    wide_deep: WIDE
+  }
+  deepfm {
+    dnn {
+      hidden_units: [64, 32, 16]
+    }
+    final_dnn {
+      hidden_units: [64, 32]
+    }
+    wide_output_dim: 8
+  }
+  embedding_regularization: 1e-5
+}
+
+export_config{
+}
diff --git a/samples/model_config/deepfm_with_sample_weight.config b/samples/model_config/deepfm_with_sample_weight.config
new file mode 100755
index 000000000..bcf6a1837
--- /dev/null
+++ b/samples/model_config/deepfm_with_sample_weight.config
@@ -0,0 +1,90 @@
+train_input_path: "data/test/test_sample_weight.txt"
+eval_input_path: "data/test/test_sample_weight.txt"
+model_dir: 'experiments/deepfm_with_sample_weight/'
+
+train_config {
+  log_step_count_steps: 200
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  save_checkpoints_steps: 500
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name: 'label'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'field[1-2]'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'sample_weight'
+    input_type: FLOAT
+  }
+
+  sample_weight: 'sample_weight'
+  auto_expand_input_fields: true
+
+  label_fields: 'label'
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+  shuffle_buffer_size: 128
+  num_epochs: 1
+}
+
+feature_configs : {
+  input_names: 'field1'
+  shared_names: 'field2'
+
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 100
+}
+
+model_config: {
+  model_class: 'DeepFM'
+  feature_groups: {
+    group_name: 'wide'
+    feature_names: 'field[1-2]'
+    wide_deep: WIDE
+  }
+  feature_groups: {
+    group_name: 'deep'
+    feature_names: 'field[1-2]'
+    wide_deep: DEEP
+  }
+  deepfm {
+    dnn {
+      hidden_units: [32, 16]
+    }
+    final_dnn {
+      hidden_units: [32, 16]
+    }
+    wide_output_dim: 4
+    l2_regularization: 1.0
+  }
+  embedding_regularization: 5e-3
+}
+
+export_config {
+}
diff --git a/samples/model_config/deppfm_seq_attn_on_taobao.config b/samples/model_config/deppfm_seq_attn_on_taobao.config
new file mode 100644
index 000000000..bf2838169
--- /dev/null
+++ b/samples/model_config/deppfm_seq_attn_on_taobao.config
@@ -0,0 +1,289 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/deepfm_seq_attn_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: SequenceFeature
+   separator: '|'
+   hash_bucket_size: 10000
+   embedding_dim: 16
+   sequence_combiner {
+  	 attention {}
+   }
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: SequenceFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+   sequence_combiner {
+  	 attention {}
+   }
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+
+model_config: {
+  model_class: 'DeepFM'
+  feature_groups: {
+    group_name: 'wide'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    wide_deep: WIDE
+  }
+  feature_groups: {
+    group_name: 'deep'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  deepfm {
+    dnn {
+      hidden_units: [256, 256, 256]
+    }
+    l2_regularization: 1e-4
+  }
+  embedding_regularization: 1e-5
+}
+
+export_config {
+}
diff --git a/samples/model_config/din_on_taobao.config b/samples/model_config/din_on_taobao.config
index b2645c93a..1992c5ab0 100644
--- a/samples/model_config/din_on_taobao.config
+++ b/samples/model_config/din_on_taobao.config
@@ -290,3 +290,7 @@ model_config: {
   }
   embedding_regularization: 5e-5
 }
+
+export_config {
+  multi_placeholder: false
+}
diff --git a/samples/model_config/dssm_hard_neg_sampler_on_taobao.config b/samples/model_config/dssm_hard_neg_sampler_on_taobao.config
new file mode 100644
index 000000000..cf1ddfbd7
--- /dev/null
+++ b/samples/model_config/dssm_hard_neg_sampler_on_taobao.config
@@ -0,0 +1,306 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dssm_hard_neg_sampler_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: false
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    recall_at_topk {
+      topk: 10
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 5
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 1
+    }
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+
+  hard_negative_sampler {
+    user_input_path: 'data/test/tb_data/taobao_user_profile_gl'
+    item_input_path: 'data/test/tb_data/taobao_ad_feature_gl'
+    hard_neg_edge_input_path: 'data/test/tb_data/taobao_noclk_edge_gl'
+    num_sample: 1024
+    num_hard_sample: 200
+    num_eval_sample: 2048
+    attr_fields: 'adgroup_id'
+    attr_fields: 'cate_id'
+    attr_fields: 'campaign_id'
+    attr_fields: 'customer'
+    attr_fields: 'brand'
+    item_id_field: 'adgroup_id'
+    user_id_field: 'user_id'
+  }
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config:{
+  model_class: "DSSM"
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    wide_deep:DEEP
+  }
+  dssm {
+    user_tower {
+      id: "user_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+        # dropout_ratio : [0.1, 0.1, 0.1, 0.1]
+      }
+    }
+    item_tower {
+      id: "adgroup_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+    }
+    simi_func: INNER_PRODUCT
+    scale_simi: false
+    l2_regularization: 1e-6
+  }
+  loss_type: SOFTMAX_CROSS_ENTROPY
+  embedding_regularization: 5e-5
+}
+
+export_config {
+}
diff --git a/samples/model_config/dssm_hard_neg_sampler_v2_on_taobao.config b/samples/model_config/dssm_hard_neg_sampler_v2_on_taobao.config
new file mode 100644
index 000000000..74b7f0d1b
--- /dev/null
+++ b/samples/model_config/dssm_hard_neg_sampler_v2_on_taobao.config
@@ -0,0 +1,307 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dssm_hard_neg_sampler_v2_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: false
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    recall_at_topk {
+      topk: 10
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 5
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 1
+    }
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+
+  hard_negative_sampler_v2 {
+    user_input_path: 'data/test/tb_data/taobao_user_profile_gl'
+    item_input_path: 'data/test/tb_data/taobao_ad_feature_gl'
+    pos_edge_input_path: 'data/test/tb_data/taobao_clk_edge_gl'
+    hard_neg_edge_input_path: 'data/test/tb_data/taobao_noclk_edge_gl'
+    num_sample: 1024
+    num_hard_sample: 200
+    num_eval_sample: 2048
+    attr_fields: 'adgroup_id'
+    attr_fields: 'cate_id'
+    attr_fields: 'campaign_id'
+    attr_fields: 'customer'
+    attr_fields: 'brand'
+    item_id_field: 'adgroup_id'
+    user_id_field: 'user_id'
+  }
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config:{
+  model_class: "DSSM"
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    wide_deep:DEEP
+  }
+  dssm {
+    user_tower {
+      id: "user_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+        # dropout_ratio : [0.1, 0.1, 0.1, 0.1]
+      }
+    }
+    item_tower {
+      id: "adgroup_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+    }
+    simi_func: INNER_PRODUCT
+    scale_simi: false
+    l2_regularization: 1e-6
+  }
+  loss_type: SOFTMAX_CROSS_ENTROPY
+  embedding_regularization: 5e-5
+}
+
+export_config {
+}
diff --git a/samples/model_config/dssm_inner_prod_on_taobao.config b/samples/model_config/dssm_inner_prod_on_taobao.config
new file mode 100644
index 000000000..9eccfda72
--- /dev/null
+++ b/samples/model_config/dssm_inner_prod_on_taobao.config
@@ -0,0 +1,278 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dssm_innner_prod_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: false
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config:{
+  model_class: "DSSM"
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    wide_deep:DEEP
+  }
+  dssm {
+    user_tower {
+      id: "user_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+        # dropout_ratio : [0.1, 0.1, 0.1, 0.1]
+      }
+    }
+    item_tower {
+      id: "adgroup_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+    }
+    simi_func: INNER_PRODUCT
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 5e-5
+}
+
+export_config {
+}
diff --git a/samples/model_config/dssm_kd_on_taobao.config b/samples/model_config/dssm_kd_on_taobao.config
new file mode 100644
index 000000000..2be5fa8ff
--- /dev/null
+++ b/samples/model_config/dssm_kd_on_taobao.config
@@ -0,0 +1,288 @@
+train_input_path: "data/test/tb_data/taobao_train_data_kd"
+eval_input_path: "data/test/tb_data/taobao_test_data_kd"
+model_dir: "experiments/dssm_kd_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: false
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'kd_soft'
+    input_type: DOUBLE
+  }
+
+  label_fields: ['clk', 'kd_soft']
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config:{
+  model_class: "DSSM"
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    wide_deep:DEEP
+  }
+  dssm {
+    user_tower {
+      id: "user_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+        # dropout_ratio : [0.1, 0.1, 0.1, 0.1]
+      }
+    }
+    item_tower {
+      id: "adgroup_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 5e-5
+  kd {
+    soft_label_name: 'kd_soft'
+    pred_name: 'logits'
+    loss_type: CROSS_ENTROPY_LOSS
+    loss_weight: 1.0
+    temperature: 2.0
+  }
+}
+
+export_config {
+}
diff --git a/samples/model_config/dssm_neg_sampler_on_taobao.config b/samples/model_config/dssm_neg_sampler_on_taobao.config
new file mode 100644
index 000000000..b2ade0835
--- /dev/null
+++ b/samples/model_config/dssm_neg_sampler_on_taobao.config
@@ -0,0 +1,302 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dssm_neg_sampler_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: false
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    recall_at_topk {
+      topk: 10
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 5
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 1
+    }
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+
+  negative_sampler {
+    input_path: 'data/test/tb_data/taobao_ad_feature_gl'
+    num_sample: 1024
+    num_eval_sample: 2048
+    attr_fields: 'adgroup_id'
+    attr_fields: 'cate_id'
+    attr_fields: 'campaign_id'
+    attr_fields: 'customer'
+    attr_fields: 'brand'
+    item_id_field: 'adgroup_id'
+  }
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config:{
+  model_class: "DSSM"
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    wide_deep:DEEP
+  }
+  dssm {
+    user_tower {
+      id: "user_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+        # dropout_ratio : [0.1, 0.1, 0.1, 0.1]
+      }
+    }
+    item_tower {
+      id: "adgroup_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+    }
+    simi_func: INNER_PRODUCT
+    scale_simi: false
+    l2_regularization: 1e-6
+  }
+  loss_type: SOFTMAX_CROSS_ENTROPY
+  embedding_regularization: 5e-5
+}
+
+export_config {
+}
diff --git a/samples/model_config/dssm_neg_sampler_v2_on_taobao.config b/samples/model_config/dssm_neg_sampler_v2_on_taobao.config
new file mode 100644
index 000000000..4a342ca0f
--- /dev/null
+++ b/samples/model_config/dssm_neg_sampler_v2_on_taobao.config
@@ -0,0 +1,305 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dssm_neg_sampler_v2_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: false
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    recall_at_topk {
+      topk: 10
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 5
+    }
+  }
+  metrics_set: {
+    recall_at_topk {
+      topk: 1
+    }
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+
+  negative_sampler_v2 {
+    user_input_path: 'data/test/tb_data/taobao_user_profile_gl'
+    item_input_path: 'data/test/tb_data/taobao_ad_feature_gl'
+    pos_edge_input_path: 'data/test/tb_data/taobao_clk_edge_gl'
+    num_sample: 1024
+    num_eval_sample: 2048
+    attr_fields: 'adgroup_id'
+    attr_fields: 'cate_id'
+    attr_fields: 'campaign_id'
+    attr_fields: 'customer'
+    attr_fields: 'brand'
+    item_id_field: 'adgroup_id'
+    user_id_field: 'user_id'
+  }
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config:{
+  model_class: "DSSM"
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    wide_deep:DEEP
+  }
+  dssm {
+    user_tower {
+      id: "user_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+        # dropout_ratio : [0.1, 0.1, 0.1, 0.1]
+      }
+    }
+    item_tower {
+      id: "adgroup_id"
+      dnn {
+        hidden_units: [256, 128, 64, 32]
+      }
+    }
+    simi_func: INNER_PRODUCT
+    scale_simi: false
+    l2_regularization: 1e-6
+  }
+  loss_type: SOFTMAX_CROSS_ENTROPY
+  embedding_regularization: 5e-5
+}
+
+export_config {
+}
diff --git a/samples/model_config/dssm_with_sample_weight.config b/samples/model_config/dssm_with_sample_weight.config
new file mode 100755
index 000000000..7893fca1b
--- /dev/null
+++ b/samples/model_config/dssm_with_sample_weight.config
@@ -0,0 +1,95 @@
+train_input_path: "data/test/test_sample_weight.txt"
+eval_input_path: "data/test/test_sample_weight.txt"
+model_dir: 'experiments/dssm_with_sample_weight/'
+
+train_config {
+  log_step_count_steps: 200
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  save_checkpoints_steps: 500
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name: 'label'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'field[1-2]'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'sample_weight'
+    input_type: FLOAT
+  }
+
+  sample_weight: 'sample_weight'
+  auto_expand_input_fields: true
+
+  label_fields: 'label'
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+  shuffle_buffer_size: 128
+  num_epochs: 1
+}
+
+feature_configs : {
+  input_names: 'field1'
+  shared_names: 'field2'
+
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 100
+}
+
+model_config: {
+  model_class: 'DSSM'
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'field1'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'item'
+    feature_names: 'field2'
+    wide_deep: DEEP
+  }
+  dssm {
+    user_tower {
+      id: "field1"
+      dnn {
+        hidden_units: [32, 16]
+      }
+    }
+    item_tower {
+      id: "field2"
+      dnn {
+        hidden_units: [32, 16]
+      }
+    }
+    l2_regularization: 1.0
+  }
+  embedding_regularization: 5e-3
+}
+
+export_config {
+}
diff --git a/samples/model_config/kv_tag.config b/samples/model_config/kv_tag.config
new file mode 100644
index 000000000..c71e6ffbe
--- /dev/null
+++ b/samples/model_config/kv_tag.config
@@ -0,0 +1,87 @@
+train_input_path: "data/test/tag_kv_data.csv"
+eval_input_path: "data/test/tag_kv_data.csv"
+model_dir: 'experiment/tag_kv_test/'
+
+train_config {
+  log_step_count_steps: 5
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  num_steps: 100
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name: 'label'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'field1'
+    input_type: STRING
+  }
+
+  auto_expand_input_fields: true
+
+
+  label_fields: 'label'
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'field1'
+
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+  feature_name: "field1"
+  separator: ''
+  kv_separator: ''
+}
+
+
+model_config: {
+  model_class: 'DeepFM'
+  feature_groups: {
+    group_name: 'wide'
+    feature_names: 'field1'
+    wide_deep: WIDE
+  }
+  feature_groups: {
+    group_name: 'deep'
+    feature_names: 'field1'
+    wide_deep: DEEP
+  }
+  deepfm {
+    dnn {
+      hidden_units: [64, 32, 16]
+    }
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+    wide_output_dim: 16
+    l2_regularization: 1e-4
+  }
+  embedding_regularization: 1e-5
+}
+
+export_config {
+}
diff --git a/samples/model_config/mind_on_taobao.config b/samples/model_config/mind_on_taobao.config
new file mode 100644
index 000000000..ee0493f28
--- /dev/null
+++ b/samples/model_config/mind_on_taobao.config
@@ -0,0 +1,276 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/mind_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 2
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: SequenceFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: SequenceFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config:{
+  model_class: "MIND"
+  feature_groups: {
+    group_name: 'hist'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+  }
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    wide_deep:DEEP
+  }
+  mind {
+    user_dnn {
+      hidden_units: [256, 128, 64, 32]
+    }
+    item_dnn {
+      hidden_units: [256, 128, 64, 32]
+    }
+
+    capsule_config {
+      max_k: 5
+      max_seq_len: 64
+      high_dim: 64
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 5e-5
+}
+
+export_config {
+}
diff --git a/samples/model_config/mind_on_taobao_with_time.config b/samples/model_config/mind_on_taobao_with_time.config
new file mode 100644
index 000000000..2b8208596
--- /dev/null
+++ b/samples/model_config/mind_on_taobao_with_time.config
@@ -0,0 +1,288 @@
+train_input_path: "data/test/tb_data_with_time/taobao_train_data_with_time"
+eval_input_path: "data/test/tb_data_with_time/taobao_test_data_with_time"
+model_dir: "experiments/mind_taobao_with_time_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'time_id'
+    input_type: STRING
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 2
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: SequenceFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: SequenceFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+feature_configs : {
+   input_names: 'time_id'
+   feature_type: SequenceFeature
+   separator: '|'
+   num_buckets: 128
+   embedding_dim: 1
+}
+model_config:{
+  model_class: "MIND"
+  feature_groups: {
+    group_name: 'hist'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    feature_names: 'time_id'
+  }
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "item"
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    wide_deep:DEEP
+  }
+  mind {
+    user_dnn {
+      hidden_units: [256, 128, 64, 32]
+    }
+    item_dnn {
+      hidden_units: [256, 128, 64, 32]
+    }
+
+    capsule_config {
+      max_k: 5
+      max_seq_len: 64
+      high_dim: 64
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 5e-5
+}
+
+export_config {
+}
diff --git a/samples/model_config/multi_tower_best_export_on_taobao.config b/samples/model_config/multi_tower_best_export_on_taobao.config
index 755553f29..f13c840b7 100644
--- a/samples/model_config/multi_tower_best_export_on_taobao.config
+++ b/samples/model_config/multi_tower_best_export_on_taobao.config
@@ -17,9 +17,9 @@ train_config {
     }
     use_moving_average: false
   }
-  save_checkpoints_steps: 100
+  save_checkpoints_steps: 50
   sync_replicas: True
-  num_steps: 100
+  num_steps: 500
 }
 
 eval_config {
@@ -288,4 +288,5 @@ model_config: {
 
 export_config {
   exporter_type: "best"
+  exports_to_keep: 2
 }
diff --git a/samples/model_config/multi_tower_ckpt_keep_3_on_taobao.config b/samples/model_config/multi_tower_ckpt_keep_3_on_taobao.config
new file mode 100644
index 000000000..8977d0cb8
--- /dev/null
+++ b/samples/model_config/multi_tower_ckpt_keep_3_on_taobao.config
@@ -0,0 +1,288 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/multi_tower_taobao_keep_3"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  keep_checkpoint_max: 3
+  sync_replicas: True
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'MultiTower'
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'item'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'combo'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+
+  multi_tower {
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "combo"
+      dnn {
+        hidden_units: [128, 96, 64, 32]
+      }
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
diff --git a/samples/model_config/multi_tower_multi_value_export_on_taobao.config b/samples/model_config/multi_tower_multi_value_export_on_taobao.config
new file mode 100644
index 000000000..f178fa19d
--- /dev/null
+++ b/samples/model_config/multi_tower_multi_value_export_on_taobao.config
@@ -0,0 +1,295 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/multi_tower_multi_value_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'MultiTower'
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'item'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'combo'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+
+  multi_tower {
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "combo"
+      dnn {
+        hidden_units: [128, 96, 64, 32]
+      }
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
+
+export_config {
+  multi_value_fields {
+    input_name: 'tag_category_list'
+    input_name: 'tag_brand_list'
+  }
+  placeholder_named_by_input: true
+}
diff --git a/samples/model_config/multi_tower_on_taobao.config b/samples/model_config/multi_tower_on_taobao.config
index b86a39aef..49cc2957f 100644
--- a/samples/model_config/multi_tower_on_taobao.config
+++ b/samples/model_config/multi_tower_on_taobao.config
@@ -19,7 +19,7 @@ train_config {
   }
   save_checkpoints_steps: 100
   sync_replicas: True
-  num_steps: 2500
+  num_steps: 200
 }
 
 eval_config {
@@ -156,7 +156,7 @@ feature_configs : {
 feature_configs : {
   input_names: 'user_id'
   feature_type: IdFeature
-  embedding_dim: 16
+  embedding_dim: 32
   hash_bucket_size: 100000
 }
 feature_configs : {
@@ -285,3 +285,7 @@ model_config: {
   }
   embedding_regularization: 1e-4
 }
+
+export_config {
+  multi_placeholder: false
+}
diff --git a/samples/model_config/multi_tower_on_taobao_chief_redundant.config b/samples/model_config/multi_tower_on_taobao_chief_redundant.config
new file mode 100644
index 000000000..38a677484
--- /dev/null
+++ b/samples/model_config/multi_tower_on_taobao_chief_redundant.config
@@ -0,0 +1,292 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/multi_tower_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    momentum_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: False
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+  chief_redundant: true
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'MultiTower'
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'item'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'combo'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+
+  multi_tower {
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "combo"
+      dnn {
+        hidden_units: [128, 96, 64, 32]
+      }
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
+
+export_config {
+  multi_placeholder: false
+}
diff --git a/samples/model_config/multi_tower_on_taobao_gauc.config b/samples/model_config/multi_tower_on_taobao_gauc.config
new file mode 100644
index 000000000..fa6f64673
--- /dev/null
+++ b/samples/model_config/multi_tower_on_taobao_gauc.config
@@ -0,0 +1,293 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/multi_tower_taobao_gauc_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    gauc {
+       uid_field: 'user_id'
+    }
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'MultiTower'
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'item'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'combo'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+
+  multi_tower {
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "combo"
+      dnn {
+        hidden_units: [128, 96, 64, 32]
+      }
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
+
+export_config {
+  multi_placeholder: false
+}
diff --git a/samples/model_config/multi_tower_with_embed.config b/samples/model_config/multi_tower_with_embed.config
new file mode 100644
index 000000000..366c1ab83
--- /dev/null
+++ b/samples/model_config/multi_tower_with_embed.config
@@ -0,0 +1,97 @@
+train_input_path: "data/test/embed_data.csv"
+eval_input_path: "data/test/embed_data.csv"
+model_dir: "experiments/multitower_with_embed/"
+
+train_config {
+  log_step_count_steps: 200
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  num_steps: 1000
+  sync_replicas: true
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  input_fields {
+     input_name: 'clk'
+     input_type: INT32
+  }
+  input_fields {
+     input_name: 'field1'
+     input_type: STRING
+  }
+  input_fields {
+     input_name: 'field2'
+     input_type: INT32
+  }
+  input_fields {
+     input_name: "field3"
+     input_type: STRING
+  }
+  label_fields: 'clk'
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'field1'
+
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'field2'
+
+  feature_type: RawFeature
+  # embedding_dim: 16
+  hash_bucket_size: 2000
+}
+
+feature_configs : {
+  input_names: 'field3'
+
+  feature_type: RawFeature
+  # embedding_dim: 16
+  raw_input_dim: 15
+}
+
+model_config: {
+  model_class: 'MultiTower'
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'field[1-3]'
+    wide_deep: DEEP
+  }
+  multi_tower {
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [64, 32, 16]
+      }
+    }
+  }
+  embedding_regularization: 1e-5
+}
+
+export_config{
+}
diff --git a/samples/model_config/rocket_launching.config b/samples/model_config/rocket_launching.config
new file mode 100644
index 000000000..1fb108dde
--- /dev/null
+++ b/samples/model_config/rocket_launching.config
@@ -0,0 +1,272 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dcn_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'RocketLaunching'
+  feature_groups: {
+    group_name: 'all'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  rocket_launching {
+   share_dnn {
+      hidden_units: [128, 96, 64]
+    }
+    booster_dnn {
+      hidden_units: [256, 128, 96, 64]
+    }
+    light_dnn{
+      hidden_units:[128, 64]
+    }
+    l2_regularization: 1e-6
+    feature_based_distillation:false
+    feature_distillation_function:COSINE
+  }
+  embedding_regularization:5e-6
+  num_class: 2
+
+}
+export_config {
+}
diff --git a/samples/model_config/rocket_launching_feature_based.config b/samples/model_config/rocket_launching_feature_based.config
new file mode 100644
index 000000000..5976643ff
--- /dev/null
+++ b/samples/model_config/rocket_launching_feature_based.config
@@ -0,0 +1,272 @@
+train_input_path: "data/test/tb_data/taobao_train_data"
+eval_input_path: "data/test/tb_data/taobao_test_data"
+model_dir: "experiments/dcn_taobao_ckpt"
+
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 2500
+}
+
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'RocketLaunching'
+  feature_groups: {
+    group_name: 'all'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  rocket_launching {
+   share_dnn {
+      hidden_units: [128, 96, 64]
+    }
+    booster_dnn {
+      hidden_units: [256, 128, 96, 64]
+    }
+    light_dnn{
+      hidden_units:[128, 64]
+    }
+    l2_regularization: 1e-6
+    feature_based_distillation:true
+    feature_distillation_function:COSINE
+  }
+  embedding_regularization:5e-6
+  num_class: 2
+
+}
+export_config {
+}
diff --git a/samples/model_config/taobao_fg.config b/samples/model_config/taobao_fg.config
index 95ffd5967..f3b5a4b03 100644
--- a/samples/model_config/taobao_fg.config
+++ b/samples/model_config/taobao_fg.config
@@ -100,6 +100,7 @@ feature_configs {
   feature_type: TagFeature
   embedding_dim: 16
   hash_bucket_size: 100000
+  max_partitions: 4
   separator: ""
 }
 feature_configs {
diff --git a/samples/model_config/taobao_fg_ev.config b/samples/model_config/taobao_fg_ev.config
new file mode 100644
index 000000000..d871a25cc
--- /dev/null
+++ b/samples/model_config/taobao_fg_ev.config
@@ -0,0 +1,295 @@
+train_input_path: "data/test/rtp/taobao_train_feature.txt"
+eval_input_path: "data/test/rtp/taobao_test_feature.txt"
+model_dir: "experiments/taobao_fg_ev_demo"
+
+train_config {
+  optimizer_config {
+    use_moving_average: false
+    adam_async_optimizer {
+      learning_rate {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 1e-07
+        }
+      }
+    }
+  }
+  num_steps: 400
+  sync_replicas: true
+  log_step_count_steps: 200
+}
+eval_config {
+  metrics_set {
+    auc {
+    }
+  }
+}
+data_config {
+  batch_size: 1024
+  label_fields: "clk"
+  input_type: RTPInput
+  separator: ""
+  selected_cols: "0,3"
+  input_fields {
+    input_name: "clk"
+    input_type: INT32
+    default_val: "0"
+  }
+  input_fields {
+    input_name: "user_id"
+  }
+  input_fields {
+    input_name: "cms_segid"
+  }
+  input_fields {
+    input_name: "cms_group_id"
+  }
+  input_fields {
+    input_name: "age_level"
+  }
+  input_fields {
+    input_name: "pvalue_level"
+  }
+  input_fields {
+    input_name: "shopping_level"
+  }
+  input_fields {
+    input_name: "occupation"
+  }
+  input_fields {
+    input_name: "new_user_class_level"
+  }
+  input_fields {
+    input_name: "adgroup_id"
+  }
+  input_fields {
+    input_name: "cate_id"
+  }
+  input_fields {
+    input_name: "campaign_id"
+  }
+  input_fields {
+    input_name: "customer"
+  }
+  input_fields {
+    input_name: "brand"
+  }
+  input_fields {
+    input_name: "price"
+    input_type: DOUBLE
+    default_val: "0.0"
+  }
+  input_fields {
+    input_name: "pid"
+  }
+  input_fields {
+    input_name: "user_tag_cate"
+  }
+  input_fields {
+    input_name: "combo_brand"
+  }
+  input_fields {
+    input_name: "combo_cate_id"
+  }
+  rtp_separator: ";"
+}
+feature_configs {
+  input_names: "user_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "cms_segid"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+  separator: ""
+}
+feature_configs {
+  input_names: "cms_group_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+  separator: ""
+}
+feature_configs {
+  input_names: "age_level"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "pvalue_level"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "shopping_level"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "occupation"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "new_user_class_level"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "adgroup_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "cate_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "campaign_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "customer"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "brand"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "price"
+  feature_type: RawFeature
+  separator: ""
+}
+feature_configs {
+  input_names: "pid"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "user_tag_cate"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "combo_brand"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "combo_cate_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+  separator: ""
+}
+model_config {
+  model_class: "MultiTower"
+  feature_groups {
+    group_name: "item"
+    feature_names: "adgroup_id"
+    feature_names: "cate_id"
+    feature_names: "campaign_id"
+    feature_names: "customer"
+    feature_names: "brand"
+    feature_names: "price"
+    feature_names: "pid"
+    wide_deep: DEEP
+  }
+  feature_groups {
+    group_name: "user"
+    feature_names: "user_id"
+    feature_names: "cms_segid"
+    feature_names: "cms_group_id"
+    feature_names: "age_level"
+    feature_names: "pvalue_level"
+    feature_names: "shopping_level"
+    feature_names: "occupation"
+    feature_names: "new_user_class_level"
+    feature_names: "user_tag_cate"
+    wide_deep: DEEP
+  }
+  feature_groups {
+    group_name: "combo"
+    feature_names: "combo_brand"
+    feature_names: "combo_cate_id"
+    wide_deep: DEEP
+  }
+  embedding_regularization: 1e-05
+  multi_tower {
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: 192
+        hidden_units: 256
+        hidden_units: 192
+        hidden_units: 128
+      }
+    }
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: 192
+        hidden_units: 256
+        hidden_units: 192
+        hidden_units: 128
+      }
+    }
+    towers {
+      input: "combo"
+      dnn {
+        hidden_units: 192
+        hidden_units: 256
+        hidden_units: 192
+        hidden_units: 128
+      }
+    }
+    final_dnn {
+      hidden_units: 256
+      hidden_units: 192
+      hidden_units: 128
+      hidden_units: 64
+    }
+    l2_regularization: 0.0001
+  }
+  use_embedding_variable: true
+}
+export_config {
+  multi_placeholder: false
+}
diff --git a/samples/model_config/wide_and_deep_no_final_on_avazau_ctr.config b/samples/model_config/wide_and_deep_no_final_on_avazau_ctr.config
new file mode 100755
index 000000000..7f91a11b7
--- /dev/null
+++ b/samples/model_config/wide_and_deep_no_final_on_avazau_ctr.config
@@ -0,0 +1,357 @@
+train_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+eval_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+model_dir: "experiments/wide_and_deep_no_final_on_avazau_ctr"
+
+train_config {
+  log_step_count_steps: 200
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  sync_replicas: true
+  save_checkpoints_steps: 500
+  num_steps: 1000
+}
+
+eval_config {
+  metrics_set: {
+       accuracy {}
+  }
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: INT64
+    default_val:"0"
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: RawFeature
+  boundaries: [1000.0,1001.0,1002.0,1003.0,1004.0,1005.0,1006.0,1007.0,1008.0,1009.0,1010.0,1011.0,1012.0,1013.0,1014.0,1015.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 64
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 64
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: RawFeature
+  boundaries: [10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: RawFeature
+  boundaries: [100.0,200.0,300.0,400.0,500.0,600.0,700.0,800.0,  900.0, 1000.0,1100.0,1200.0, 1300.0,1400.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
+  embedding_dim: 16
+}
+model_config:{
+  model_class: "WideAndDeep"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  wide_and_deep {
+    wide_output_dim: 18
+
+
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    l2_regularization: 1e-5
+  }
+  num_class: 3
+  embedding_regularization: 1e-7
+}
diff --git a/samples/model_config/wide_and_deep_on_avazau_ctr.config b/samples/model_config/wide_and_deep_on_avazau_ctr.config
new file mode 100755
index 000000000..239fe72e3
--- /dev/null
+++ b/samples/model_config/wide_and_deep_on_avazau_ctr.config
@@ -0,0 +1,360 @@
+train_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+eval_input_path: "data/test/dwd_avazu_ctr_deepmodel_10w.csv"
+model_dir: "experiments/wide_and_deep_on_avazu_ctr"
+
+train_config {
+  log_step_count_steps: 200
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 10000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  sync_replicas: true
+  save_checkpoints_steps: 500
+  num_steps: 1000
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: INT64
+    default_val:"0"
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: CSVInput
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: RawFeature
+  boundaries: [1000.0,1001.0,1002.0,1003.0,1004.0,1005.0,1006.0,1007.0,1008.0,1009.0,1010.0,1011.0,1012.0,1013.0,1014.0,1015.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 64
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 64
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: RawFeature
+  boundaries: [10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: RawFeature
+  boundaries: [100.0,200.0,300.0,400.0,500.0,600.0,700.0,800.0,  900.0, 1000.0,1100.0,1200.0, 1300.0,1400.0]
+  embedding_dim: 16
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: RawFeature
+  boundaries: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]
+  embedding_dim: 16
+}
+model_config:{
+  model_class: "WideAndDeep"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "hour"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:WIDE
+  }
+
+  wide_and_deep {
+    wide_output_dim: 16
+
+
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    final_dnn {
+      hidden_units: [128, 64]
+    }
+
+    l2_regularization: 1e-5
+  }
+  embedding_regularization: 1e-7
+}
diff --git a/samples/odps_script/boundary/create_external_boundary_table.sql b/samples/odps_script/boundary/create_external_boundary_table.sql
new file mode 100644
index 000000000..7c80741ea
--- /dev/null
+++ b/samples/odps_script/boundary/create_external_boundary_table.sql
@@ -0,0 +1,73 @@
+drop TABLE IF EXISTS external_boundary_test_{TIME_STAMP} ;
+create EXTERNAL table external_boundary_test_{TIME_STAMP}(
+   clk   bigint
+   ,buy   bigint
+   ,pid   string
+   ,adgroup_id   string
+   ,cate_id   string
+   ,campaign_id   string
+   ,customer   string
+   ,brand   string
+   ,user_id   string
+   ,cms_segid   string
+   ,cms_group_id   string
+   ,final_gender_code   string
+   ,age_level   string
+   ,pvalue_level   string
+   ,shopping_level   string
+   ,occupation   string
+   ,new_user_class_level   string
+   ,tag_category_list   string
+   ,tag_brand_list   string
+   ,price   double
+)
+STORED BY 'com.aliyun.odps.CsvStorageHandler'
+WITH SERDEPROPERTIES (
+ 'odps.properties.rolearn'='{ROLEARN}'
+)
+LOCATION 'oss://{OSS_BUCKET_NAME}/{EXP_NAME}/test_data/tb_data/train/'
+;
+
+
+drop TABLE IF EXISTS external_boundary_train_{TIME_STAMP} ;
+create EXTERNAL table external_boundary_train_{TIME_STAMP}(
+   clk   bigint
+   ,buy   bigint
+   ,pid   string
+   ,adgroup_id   string
+   ,cate_id   string
+   ,campaign_id   string
+   ,customer   string
+   ,brand   string
+   ,user_id   string
+   ,cms_segid   string
+   ,cms_group_id   string
+   ,final_gender_code   string
+   ,age_level   string
+   ,pvalue_level   string
+   ,shopping_level   string
+   ,occupation   string
+   ,new_user_class_level   string
+   ,tag_category_list   string
+   ,tag_brand_list   string
+   ,price   double
+)
+STORED BY 'com.aliyun.odps.CsvStorageHandler'
+WITH SERDEPROPERTIES (
+ 'odps.properties.rolearn'='{ROLEARN}'
+)
+LOCATION 'oss://{OSS_BUCKET_NAME}/{EXP_NAME}/test_data/tb_data/test/'
+;
+
+
+drop TABLE IF EXISTS external_boundary_info_table_{TIME_STAMP} ;
+create EXTERNAL table external_boundary_info_table_{TIME_STAMP}(
+    feature STRING
+    ,json STRING
+)
+STORED BY 'com.aliyun.odps.TsvStorageHandler'
+WITH SERDEPROPERTIES (
+ 'odps.properties.rolearn'='{ROLEARN}'
+)
+LOCATION 'oss://{OSS_BUCKET_NAME}/{EXP_NAME}/test_data/tb_data/boundary/'
+;
diff --git a/samples/odps_script/boundary/create_inner_boundary_table.sql b/samples/odps_script/boundary/create_inner_boundary_table.sql
new file mode 100644
index 000000000..c2b972ce8
--- /dev/null
+++ b/samples/odps_script/boundary/create_inner_boundary_table.sql
@@ -0,0 +1,67 @@
+drop TABLE IF EXISTS boundary_test_{TIME_STAMP} ;
+create table boundary_test_{TIME_STAMP}(
+   clk   bigint
+   ,buy   bigint
+   ,pid   string
+   ,adgroup_id   string
+   ,cate_id   string
+   ,campaign_id   string
+   ,customer   string
+   ,brand   string
+   ,user_id   string
+   ,cms_segid   string
+   ,cms_group_id   string
+   ,final_gender_code   string
+   ,age_level   string
+   ,pvalue_level   string
+   ,shopping_level   string
+   ,occupation   string
+   ,new_user_class_level   string
+   ,tag_category_list   string
+   ,tag_brand_list   string
+   ,price   double
+)
+;
+
+INSERT OVERWRITE TABLE boundary_test_{TIME_STAMP}
+select * from external_boundary_test_{TIME_STAMP} ;
+
+
+drop TABLE IF EXISTS boundary_train_{TIME_STAMP} ;
+create  table boundary_train_{TIME_STAMP}(
+   clk   bigint
+   ,buy   bigint
+   ,pid   string
+   ,adgroup_id   string
+   ,cate_id   string
+   ,campaign_id   string
+   ,customer   string
+   ,brand   string
+   ,user_id   string
+   ,cms_segid   string
+   ,cms_group_id   string
+   ,final_gender_code   string
+   ,age_level   string
+   ,pvalue_level   string
+   ,shopping_level   string
+   ,occupation   string
+   ,new_user_class_level   string
+   ,tag_category_list   string
+   ,tag_brand_list   string
+   ,price   double
+)
+;
+
+INSERT OVERWRITE TABLE boundary_train_{TIME_STAMP}
+select * from external_boundary_train_{TIME_STAMP} ;
+
+
+drop TABLE IF EXISTS boundary_info_table_{TIME_STAMP} ;
+create table boundary_info_table_{TIME_STAMP}(
+    feature STRING
+    ,json STRING
+)
+;
+
+INSERT OVERWRITE TABLE boundary_info_table_{TIME_STAMP}
+select * from external_boundary_info_table_{TIME_STAMP} ;
diff --git a/samples/odps_script/boundary/drop_table.sql b/samples/odps_script/boundary/drop_table.sql
new file mode 100644
index 000000000..c6fe9c362
--- /dev/null
+++ b/samples/odps_script/boundary/drop_table.sql
@@ -0,0 +1,8 @@
+drop TABLE if EXISTS boundary_test_{TIME_STAMP};
+drop TABLE if EXISTS external_boundary_test_{TIME_STAMP};
+
+drop TABLE if EXISTS boundary_train_{TIME_STAMP};
+drop TABLE if EXISTS external_boundary_train_{TIME_STAMP};
+
+drop TABLE if EXISTS boundary_info_table_{TIME_STAMP};
+drop TABLE if EXISTS external_boundary_info_table_{TIME_STAMP};
diff --git a/samples/odps_script/boundary/train_multi_tower_model.sql b/samples/odps_script/boundary/train_multi_tower_model.sql
new file mode 100644
index 000000000..45fe45a03
--- /dev/null
+++ b/samples/odps_script/boundary/train_multi_tower_model.sql
@@ -0,0 +1,11 @@
+pai -name easy_rec_ext
+-Dconfig=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/configs/taobao_multi_tower_boundary_test.config
+-Dcmd=train
+-Dboundary_table=odps://{ODPS_PROJ_NAME}/tables/boundary_info_table_{TIME_STAMP}
+-Dtables=odps://{ODPS_PROJ_NAME}/tables/boundary_train_{TIME_STAMP},odps://{ODPS_PROJ_NAME}/tables/boundary_test_{TIME_STAMP}
+-Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":2, "cpu":1000, "gpu":100, "memory":40000}}'
+-Darn={ROLEARN}
+-Dbuckets=oss://{OSS_BUCKET_NAME}/
+-DossHost={OSS_ENDPOINT}
+-Dwith_evaluator=1
+;
diff --git a/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_best_export.config b/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_best_export.config
new file mode 100644
index 000000000..d0e36da0f
--- /dev/null
+++ b/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_best_export.config
@@ -0,0 +1,362 @@
+
+train_input_path: ""
+eval_input_path: ""
+model_dir: "oss://{OSS_BUCKET_NAME}/{EXP_NAME}/dwd_avazu_ctr2/best_export_test/"
+
+train_config {
+  save_summary_steps: 10
+  log_step_count_steps: 10
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  sync_replicas: true
+  num_steps: 600
+  save_checkpoints_steps: 50
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+export_config {
+  exporter_type: "best"
+  exports_to_keep: 2
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: STRING
+    default_val:""
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  prefetch_size: 32
+  input_type: OdpsInputV2
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 50
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 64
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 64
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+model_config:{
+  model_class: "MultiTower"
+  feature_groups: {
+    group_name: "item"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "user"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "user_item"
+    feature_names: "hour"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:DEEP
+  }
+
+  multi_tower {
+
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: [384, 320, 256, 192, 128]
+      }
+    }
+
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [384, 320, 256, 192, 128]
+      }
+    }
+
+    towers {
+      input: "user_item"
+      dnn {
+        hidden_units: [384, 320, 256, 192, 128]
+      }
+    }
+
+    final_dnn {
+      hidden_units: [256, 192, 128, 64]
+    }
+    l2_regularization: 0.0
+  }
+  embedding_regularization: 0.0
+}
diff --git a/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_v4.config b/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_v4.config
index fca45948d..03ef5f5fb 100644
--- a/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_v4.config
+++ b/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_v4.config
@@ -24,9 +24,6 @@ train_config {
   }
 
   sync_replicas: true
-  #train_distribute: MirroredStrategy
-  #num_gpus_per_worker: 2
-  #is_profiling: true
 }
 
 eval_config {
diff --git a/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_v5_export_test.config b/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_v5_export_test.config
new file mode 100644
index 000000000..9e280b9db
--- /dev/null
+++ b/samples/odps_script/configs/dwd_avazu_ctr_deepmodel_ext_v5_export_test.config
@@ -0,0 +1,361 @@
+
+train_input_path: ""
+eval_input_path: ""
+model_dir: "oss://{OSS_BUCKET_NAME}/{EXP_NAME}/dwd_avazu_ctr2/no_such_model/"
+
+train_config {
+  num_steps:100
+  save_checkpoints_steps: 100
+  save_summary_steps: 100
+  log_step_count_steps: 100
+  # fine_tune_checkpoint: ""
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 0.0000001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+
+  sync_replicas: true
+  #train_distribute: MirroredStrategy
+  #num_gpus_per_worker: 2
+  #is_profiling: true
+}
+
+eval_config {
+  metrics_set: {
+       auc {}
+  }
+}
+
+data_config {
+  separator: ","
+  input_fields: {
+    input_name: "label"
+    input_type: INT64
+    default_val:"0"
+  }
+  input_fields: {
+    input_name: "hour"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c1"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "banner_pos"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "site_id"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "site_domain"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "site_category"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "app_id"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "app_domain"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "app_category"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_id"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_ip"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_model"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_type"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "device_conn_type"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c14"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c15"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c16"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c17"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c18"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c19"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c20"
+    input_type: STRING
+    default_val:""
+  }
+  input_fields: {
+    input_name: "c21"
+    input_type: STRING
+    default_val:""
+  }
+  label_fields: "label"
+
+  batch_size: 1024
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: OdpsInputV2
+}
+
+feature_configs: {
+  input_names: "hour"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 50
+}
+feature_configs: {
+  input_names: "c1"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "banner_pos"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "site_id"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "site_domain"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "site_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "app_id"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "app_domain"
+  feature_type: IdFeature
+  embedding_dim: 20
+  hash_bucket_size: 1000
+}
+feature_configs: {
+  input_names: "app_category"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs: {
+  input_names: "device_id"
+  feature_type: IdFeature
+  embedding_dim: 64
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_ip"
+  feature_type: IdFeature
+  embedding_dim: 64
+  hash_bucket_size: 100000
+}
+feature_configs: {
+  input_names: "device_model"
+  feature_type: IdFeature
+  embedding_dim: 32
+  hash_bucket_size: 10000
+}
+feature_configs: {
+  input_names: "device_type"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "device_conn_type"
+  feature_type: IdFeature
+  embedding_dim: 8
+  hash_bucket_size: 10
+}
+feature_configs: {
+  input_names: "c14"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c15"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c16"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c17"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c18"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c19"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c20"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+feature_configs: {
+  input_names: "c21"
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 500
+}
+model_config:{
+  model_class: "MultiTower"
+  feature_groups: {
+    group_name: "item"
+    feature_names: "c1"
+    feature_names: "banner_pos"
+    feature_names: "site_id"
+    feature_names: "site_domain"
+    feature_names: "site_category"
+    feature_names: "app_id"
+    feature_names: "app_domain"
+    feature_names: "app_category"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "user"
+    feature_names: "device_id"
+    feature_names: "device_ip"
+    feature_names: "device_model"
+    feature_names: "device_type"
+    feature_names: "device_conn_type"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "user_item"
+    feature_names: "hour"
+    feature_names: "c14"
+    feature_names: "c15"
+    feature_names: "c16"
+    feature_names: "c17"
+    feature_names: "c18"
+    feature_names: "c19"
+    feature_names: "c20"
+    feature_names: "c21"
+    wide_deep:DEEP
+  }
+
+  multi_tower {
+
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: [384, 320, 256, 192, 128]
+      }
+    }
+
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [384, 320, 256, 192, 128]
+      }
+    }
+
+    towers {
+      input: "user_item"
+      dnn {
+        hidden_units: [384, 320, 256, 192, 128]
+      }
+    }
+
+    final_dnn {
+      hidden_units: [256, 192, 128, 64]
+    }
+    l2_regularization: 0.0
+  }
+  embedding_regularization: 0.0
+}
diff --git a/samples/odps_script/configs/taobao_fg_work_que.config b/samples/odps_script/configs/taobao_fg_work_que.config
new file mode 100644
index 000000000..edc61e1b3
--- /dev/null
+++ b/samples/odps_script/configs/taobao_fg_work_que.config
@@ -0,0 +1,293 @@
+model_dir: "oss://{OSS_BUCKET_NAME}/{EXP_NAME}/work_que/"
+
+train_config {
+  optimizer_config {
+    use_moving_average: false
+    adam_optimizer {
+      learning_rate {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.0001
+          decay_steps: 100000
+          decay_factor: 0.5
+          min_learning_rate: 1e-07
+        }
+      }
+    }
+  }
+  num_steps: 400
+  sync_replicas: true
+  log_step_count_steps: 200
+}
+eval_config {
+  metrics_set {
+    auc {
+    }
+  }
+}
+data_config {
+  batch_size: 1024
+  label_fields: "clk"
+  input_type: OdpsRTPInput
+  selected_cols: "clk,features"
+  separator: ""
+  input_fields {
+    input_name: "clk"
+    input_type: INT32
+    default_val: "0"
+  }
+  input_fields {
+    input_name: "user_id"
+  }
+  input_fields {
+    input_name: "cms_segid"
+  }
+  input_fields {
+    input_name: "cms_group_id"
+  }
+  input_fields {
+    input_name: "age_level"
+  }
+  input_fields {
+    input_name: "pvalue_level"
+  }
+  input_fields {
+    input_name: "shopping_level"
+  }
+  input_fields {
+    input_name: "occupation"
+  }
+  input_fields {
+    input_name: "new_user_class_level"
+  }
+  input_fields {
+    input_name: "adgroup_id"
+  }
+  input_fields {
+    input_name: "cate_id"
+  }
+  input_fields {
+    input_name: "campaign_id"
+  }
+  input_fields {
+    input_name: "customer"
+  }
+  input_fields {
+    input_name: "brand"
+  }
+  input_fields {
+    input_name: "price"
+    input_type: DOUBLE
+    default_val: "0.0"
+  }
+  input_fields {
+    input_name: "pid"
+  }
+  input_fields {
+    input_name: "user_tag_cate"
+  }
+  input_fields {
+    input_name: "combo_brand"
+  }
+  input_fields {
+    input_name: "combo_cate_id"
+  }
+  rtp_separator: ";"
+  pai_worker_queue: true
+}
+feature_configs {
+  input_names: "user_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "cms_segid"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+  separator: ""
+}
+feature_configs {
+  input_names: "cms_group_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+  separator: ""
+}
+feature_configs {
+  input_names: "age_level"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "pvalue_level"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "shopping_level"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "occupation"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "new_user_class_level"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+  separator: ""
+}
+feature_configs {
+  input_names: "adgroup_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "cate_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "campaign_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "customer"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "brand"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "price"
+  feature_type: RawFeature
+  separator: ""
+}
+feature_configs {
+  input_names: "pid"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "user_tag_cate"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "combo_brand"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+  separator: ""
+}
+feature_configs {
+  input_names: "combo_cate_id"
+  feature_type: TagFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+  separator: ""
+}
+model_config {
+  model_class: "MultiTower"
+  feature_groups {
+    group_name: "item"
+    feature_names: "adgroup_id"
+    feature_names: "cate_id"
+    feature_names: "campaign_id"
+    feature_names: "customer"
+    feature_names: "brand"
+    feature_names: "price"
+    feature_names: "pid"
+    wide_deep: DEEP
+  }
+  feature_groups {
+    group_name: "user"
+    feature_names: "user_id"
+    feature_names: "cms_segid"
+    feature_names: "cms_group_id"
+    feature_names: "age_level"
+    feature_names: "pvalue_level"
+    feature_names: "shopping_level"
+    feature_names: "occupation"
+    feature_names: "new_user_class_level"
+    feature_names: "user_tag_cate"
+    wide_deep: DEEP
+  }
+  feature_groups {
+    group_name: "combo"
+    feature_names: "combo_brand"
+    feature_names: "combo_cate_id"
+    wide_deep: DEEP
+  }
+  embedding_regularization: 1e-05
+  multi_tower {
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: 192
+        hidden_units: 256
+        hidden_units: 192
+        hidden_units: 128
+      }
+    }
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: 192
+        hidden_units: 256
+        hidden_units: 192
+        hidden_units: 128
+      }
+    }
+    towers {
+      input: "combo"
+      dnn {
+        hidden_units: 192
+        hidden_units: 256
+        hidden_units: 192
+        hidden_units: 128
+      }
+    }
+    final_dnn {
+      hidden_units: 256
+      hidden_units: 192
+      hidden_units: 128
+      hidden_units: 64
+    }
+    l2_regularization: 0.0001
+  }
+}
+export_config {
+  multi_placeholder: false
+}
diff --git a/samples/odps_script/configs/taobao_multi_tower_boundary_test.config b/samples/odps_script/configs/taobao_multi_tower_boundary_test.config
new file mode 100644
index 000000000..b190ec2c8
--- /dev/null
+++ b/samples/odps_script/configs/taobao_multi_tower_boundary_test.config
@@ -0,0 +1,282 @@
+train_input_path: ""
+eval_input_path: ""
+model_dir: "oss://{OSS_BUCKET_NAME}/easy_rec_odps_test/{EXP_NAME}/edit_boundary_test/checkpoints/"
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 100
+}
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: DOUBLE
+  }
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: OdpsInputV2
+}
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: RawFeature
+  embedding_dim: 16
+}
+model_config: {
+  model_class: 'MultiTower'
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'item'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'combo'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  multi_tower {
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "combo"
+      dnn {
+        hidden_units: [128, 96, 64, 32]
+      }
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
+export_config {
+}
diff --git a/samples/odps_script/configs/taobao_multi_tower_multi_value_test.config b/samples/odps_script/configs/taobao_multi_tower_multi_value_test.config
new file mode 100644
index 000000000..77018edf5
--- /dev/null
+++ b/samples/odps_script/configs/taobao_multi_tower_multi_value_test.config
@@ -0,0 +1,288 @@
+train_input_path: ""
+eval_input_path: ""
+model_dir: "oss://{OSS_BUCKET_NAME}/easy_rec_odps_test/{EXP_NAME}/multi_value/checkpoints/"
+train_config {
+  log_step_count_steps: 100
+  optimizer_config: {
+    adam_optimizer: {
+      learning_rate: {
+        exponential_decay_learning_rate {
+          initial_learning_rate: 0.001
+          decay_steps: 1000
+          decay_factor: 0.5
+          min_learning_rate: 0.00001
+        }
+      }
+    }
+    use_moving_average: false
+  }
+  save_checkpoints_steps: 100
+  sync_replicas: True
+  num_steps: 100
+}
+eval_config {
+  metrics_set: {
+    auc {}
+  }
+}
+data_config {
+  input_fields {
+    input_name:'clk'
+    input_type: INT32
+  }
+  input_fields {
+    input_name:'buy'
+    input_type: INT32
+  }
+  input_fields {
+    input_name: 'pid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'adgroup_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cate_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'campaign_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'customer'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'brand'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'user_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_segid'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'cms_group_id'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'final_gender_code'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'age_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'pvalue_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'shopping_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'occupation'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'new_user_class_level'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_category_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'tag_brand_list'
+    input_type: STRING
+  }
+  input_fields {
+    input_name: 'price'
+    input_type: INT32
+  }
+  label_fields: 'clk'
+  batch_size: 4096
+  num_epochs: 10000
+  prefetch_size: 32
+  input_type: OdpsInputV2
+}
+feature_configs : {
+  input_names: 'pid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'adgroup_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cate_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10000
+}
+feature_configs : {
+  input_names: 'campaign_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'customer'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'brand'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'user_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100000
+}
+feature_configs : {
+  input_names: 'cms_segid'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'cms_group_id'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 100
+}
+feature_configs : {
+  input_names: 'final_gender_code'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'age_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'pvalue_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'shopping_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'occupation'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+  input_names: 'new_user_class_level'
+  feature_type: IdFeature
+  embedding_dim: 16
+  hash_bucket_size: 10
+}
+feature_configs : {
+   input_names: 'tag_category_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+   input_names: 'tag_brand_list'
+   feature_type: TagFeature
+   separator: '|'
+   hash_bucket_size: 100000
+   embedding_dim: 16
+}
+feature_configs : {
+  input_names: 'price'
+  feature_type: IdFeature
+  embedding_dim: 16
+  num_buckets: 50
+}
+model_config: {
+  model_class: 'MultiTower'
+  feature_groups: {
+    group_name: 'user'
+    feature_names: 'user_id'
+    feature_names: 'cms_segid'
+    feature_names: 'cms_group_id'
+    feature_names: 'age_level'
+    feature_names: 'pvalue_level'
+    feature_names: 'shopping_level'
+    feature_names: 'occupation'
+    feature_names: 'new_user_class_level'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'item'
+    feature_names: 'adgroup_id'
+    feature_names: 'cate_id'
+    feature_names: 'campaign_id'
+    feature_names: 'customer'
+    feature_names: 'brand'
+    feature_names: 'price'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'combo'
+    feature_names: 'pid'
+    feature_names: 'tag_category_list'
+    feature_names: 'tag_brand_list'
+    wide_deep: DEEP
+  }
+  multi_tower {
+    towers {
+      input: "user"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "item"
+      dnn {
+        hidden_units: [256, 128, 96, 64]
+      }
+    }
+    towers {
+      input: "combo"
+      dnn {
+        hidden_units: [128, 96, 64, 32]
+      }
+    }
+    final_dnn {
+      hidden_units: [128, 96, 64, 32, 16]
+    }
+    l2_regularization: 1e-6
+  }
+  embedding_regularization: 1e-4
+}
+export_config {
+  multi_value_fields {
+    input_name: 'tag_category_list'
+    input_name: 'tag_brand_list'
+  }
+  placeholder_named_by_input: true
+}
diff --git a/samples/odps_script/embedding_variable/train.sql b/samples/odps_script/embedding_variable/train.sql
index 89589a1e4..930878346 100644
--- a/samples/odps_script/embedding_variable/train.sql
+++ b/samples/odps_script/embedding_variable/train.sql
@@ -1,7 +1,8 @@
 pai -name easy_rec_ext
 -Dconfig=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/configs/taobao_fg.config
 -Dcmd=train
--Dtables=odps://{ODPS_PROJ_NAME}/tables/inner_ev_train_{TIME_STAMP},odps://{ODPS_PROJ_NAME}/tables/inner_ev_test_{TIME_STAMP}
+-Dtrain_tables=odps://{ODPS_PROJ_NAME}/tables/inner_ev_train_{TIME_STAMP},odps://{ODPS_PROJ_NAME}/tables/inner_ev_train_{TIME_STAMP}
+-Deval_tables=odps://{ODPS_PROJ_NAME}/tables/inner_ev_test_{TIME_STAMP}
 -Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000,"gpu":100, "memory":40000}}'
 -Darn={ROLEARN}
 -Dbuckets=oss://{OSS_BUCKET_NAME}/
diff --git a/samples/odps_script/embedding_variable/train_work_que.sql b/samples/odps_script/embedding_variable/train_work_que.sql
new file mode 100644
index 000000000..3b912d887
--- /dev/null
+++ b/samples/odps_script/embedding_variable/train_work_que.sql
@@ -0,0 +1,12 @@
+pai -name easy_rec_ext
+-Dconfig=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/configs/taobao_fg_work_que.config
+-Dcmd=train
+-Dtrain_tables=odps://{ODPS_PROJ_NAME}/tables/inner_ev_train_{TIME_STAMP},odps://{ODPS_PROJ_NAME}/tables/inner_ev_train_{TIME_STAMP}
+-Deval_tables=odps://{ODPS_PROJ_NAME}/tables/inner_ev_test_{TIME_STAMP}
+-Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000,"gpu":100, "memory":40000}}'
+-Darn={ROLEARN}
+-Dbuckets=oss://{OSS_BUCKET_NAME}/
+-DossHost={OSS_ENDPOINT}
+-Dselected_cols=clk,features
+-Dwith_evaluator=1
+;
diff --git a/samples/odps_script/multi_value/create_external_multi_value_table.sql b/samples/odps_script/multi_value/create_external_multi_value_table.sql
new file mode 100644
index 000000000..bf46174df
--- /dev/null
+++ b/samples/odps_script/multi_value/create_external_multi_value_table.sql
@@ -0,0 +1,60 @@
+drop TABLE IF EXISTS external_multi_value_test_{TIME_STAMP} ;
+create EXTERNAL table external_multi_value_test_{TIME_STAMP}(
+   clk   bigint
+   ,buy   bigint
+   ,pid   string
+   ,adgroup_id   string
+   ,cate_id   string
+   ,campaign_id   string
+   ,customer   string
+   ,brand   string
+   ,user_id   string
+   ,cms_segid   string
+   ,cms_group_id   string
+   ,final_gender_code   string
+   ,age_level   string
+   ,pvalue_level   string
+   ,shopping_level   string
+   ,occupation   string
+   ,new_user_class_level   string
+   ,tag_category_list   string
+   ,tag_brand_list   string
+   ,price   bigint
+)
+STORED BY 'com.aliyun.odps.CsvStorageHandler'
+WITH SERDEPROPERTIES (
+ 'odps.properties.rolearn'='{ROLEARN}'
+)
+LOCATION 'oss://{OSS_BUCKET_NAME}/{EXP_NAME}/test_data/tb_data/train/'
+;
+
+
+drop TABLE IF EXISTS external_multi_value_train_{TIME_STAMP} ;
+create EXTERNAL table external_multi_value_train_{TIME_STAMP}(
+   clk   bigint
+   ,buy   bigint
+   ,pid   string
+   ,adgroup_id   string
+   ,cate_id   string
+   ,campaign_id   string
+   ,customer   string
+   ,brand   string
+   ,user_id   string
+   ,cms_segid   string
+   ,cms_group_id   string
+   ,final_gender_code   string
+   ,age_level   string
+   ,pvalue_level   string
+   ,shopping_level   string
+   ,occupation   string
+   ,new_user_class_level   string
+   ,tag_category_list   string
+   ,tag_brand_list   string
+   ,price   bigint
+)
+STORED BY 'com.aliyun.odps.CsvStorageHandler'
+WITH SERDEPROPERTIES (
+ 'odps.properties.rolearn'='{ROLEARN}'
+)
+LOCATION 'oss://{OSS_BUCKET_NAME}/{EXP_NAME}/test_data/tb_data/test/'
+;
diff --git a/samples/odps_script/multi_value/create_inner_multi_value_table.sql b/samples/odps_script/multi_value/create_inner_multi_value_table.sql
new file mode 100644
index 000000000..defd9bdc5
--- /dev/null
+++ b/samples/odps_script/multi_value/create_inner_multi_value_table.sql
@@ -0,0 +1,56 @@
+drop TABLE IF EXISTS multi_value_test_{TIME_STAMP} ;
+create table multi_value_test_{TIME_STAMP}(
+   clk   bigint
+   ,buy   bigint
+   ,pid   string
+   ,adgroup_id   string
+   ,cate_id   string
+   ,campaign_id   string
+   ,customer   string
+   ,brand   string
+   ,user_id   string
+   ,cms_segid   string
+   ,cms_group_id   string
+   ,final_gender_code   string
+   ,age_level   string
+   ,pvalue_level   string
+   ,shopping_level   string
+   ,occupation   string
+   ,new_user_class_level   string
+   ,tag_category_list   string
+   ,tag_brand_list   string
+   ,price   bigint
+)
+;
+
+INSERT OVERWRITE TABLE multi_value_test_{TIME_STAMP}
+select * from external_multi_value_test_{TIME_STAMP} ;
+
+
+drop TABLE IF EXISTS multi_value_train_{TIME_STAMP} ;
+create  table multi_value_train_{TIME_STAMP}(
+   clk   bigint
+   ,buy   bigint
+   ,pid   string
+   ,adgroup_id   string
+   ,cate_id   string
+   ,campaign_id   string
+   ,customer   string
+   ,brand   string
+   ,user_id   string
+   ,cms_segid   string
+   ,cms_group_id   string
+   ,final_gender_code   string
+   ,age_level   string
+   ,pvalue_level   string
+   ,shopping_level   string
+   ,occupation   string
+   ,new_user_class_level   string
+   ,tag_category_list   string
+   ,tag_brand_list   string
+   ,price   bigint
+)
+;
+
+INSERT OVERWRITE TABLE multi_value_train_{TIME_STAMP}
+select * from external_multi_value_train_{TIME_STAMP} ;
diff --git a/samples/odps_script/multi_value/drop_table.sql b/samples/odps_script/multi_value/drop_table.sql
new file mode 100644
index 000000000..ab5faa821
--- /dev/null
+++ b/samples/odps_script/multi_value/drop_table.sql
@@ -0,0 +1,4 @@
+drop TABLE if EXISTS multi_value_test_{TIME_STAMP};
+drop TABLE if EXISTS external_multi_value_test_{TIME_STAMP};
+drop TABLE if EXISTS multi_value_train_{TIME_STAMP};
+drop TABLE if EXISTS external_multi_value_train_{TIME_STAMP};
diff --git a/samples/odps_script/multi_value/train_multi_tower_model.sql b/samples/odps_script/multi_value/train_multi_tower_model.sql
new file mode 100644
index 000000000..e75cd55a3
--- /dev/null
+++ b/samples/odps_script/multi_value/train_multi_tower_model.sql
@@ -0,0 +1,10 @@
+pai -name easy_rec_ext
+-Dconfig=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/configs/taobao_multi_tower_multi_value_test.config
+-Dcmd=train
+-Dtables=odps://{ODPS_PROJ_NAME}/tables/multi_value_train_{TIME_STAMP},odps://{ODPS_PROJ_NAME}/tables/multi_value_test_{TIME_STAMP}
+-Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":2, "cpu":1000, "memory":40000}}'
+-Darn={ROLEARN}
+-Dbuckets=oss://{OSS_BUCKET_NAME}/
+-DossHost={OSS_ENDPOINT}
+-Dwith_evaluator=1
+;
diff --git a/samples/odps_script/other_test/test_export_update_model_dir.sql b/samples/odps_script/other_test/test_export_update_model_dir.sql
new file mode 100644
index 000000000..677c5c64d
--- /dev/null
+++ b/samples/odps_script/other_test/test_export_update_model_dir.sql
@@ -0,0 +1,9 @@
+pai -name easy_rec_ext
+-Dconfig=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/configs/dwd_avazu_ctr_deepmodel_ext_v5_export_test.config
+-Dcmd=export
+-Dmodel_dir=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/dwd_avazu_ctr2/checkpoints5/
+-Dexport_dir=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/dwd_avazu_ctr2/checkpoints5/savemodel_v1/
+-Darn={ROLEARN}
+-Dbuckets=oss://{OSS_BUCKET_NAME}/
+-DossHost={OSS_ENDPOINT}
+;
diff --git a/samples/odps_script/other_test/test_train_before_export.sql b/samples/odps_script/other_test/test_train_before_export.sql
new file mode 100644
index 000000000..e4dc0b37c
--- /dev/null
+++ b/samples/odps_script/other_test/test_train_before_export.sql
@@ -0,0 +1,10 @@
+pai -name easy_rec_ext
+-Dconfig=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/configs/dwd_avazu_ctr_deepmodel_ext_v5.config
+-Dcmd=train
+-Dtables=odps://{ODPS_PROJ_NAME}/tables/deepfm_train_{TIME_STAMP},odps://{ODPS_PROJ_NAME}/tables/deepfm_test_{TIME_STAMP}
+-Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":2, "cpu":1000,"gpu":100, "memory":40000}}'
+-Darn={ROLEARN}
+-Dbuckets=oss://{OSS_BUCKET_NAME}/
+-DossHost={OSS_ENDPOINT}
+-Dversion=20201029
+;
diff --git a/samples/odps_script/other_test/test_train_distribute_strategy_ess.sql b/samples/odps_script/other_test/test_train_distribute_strategy_ess.sql
index f302d299a..286eec1de 100644
--- a/samples/odps_script/other_test/test_train_distribute_strategy_ess.sql
+++ b/samples/odps_script/other_test/test_train_distribute_strategy_ess.sql
@@ -7,4 +7,5 @@ pai -name easy_rec_ext
 -Darn={ROLEARN}
 -Dbuckets=oss://{OSS_BUCKET_NAME}/
 -DossHost={OSS_ENDPOINT}
+-Deval_method=none
 ;
diff --git a/samples/odps_script/other_test/test_train_version.sql b/samples/odps_script/other_test/test_train_version.sql
index fe206e009..91526dc35 100644
--- a/samples/odps_script/other_test/test_train_version.sql
+++ b/samples/odps_script/other_test/test_train_version.sql
@@ -2,9 +2,10 @@ pai -name easy_rec_ext
 -Dconfig=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/configs/dwd_avazu_ctr_deepmodel_ext_v5.config
 -Dcmd=train
 -Dtables=odps://{ODPS_PROJ_NAME}/tables/deepfm_train_{TIME_STAMP},odps://{ODPS_PROJ_NAME}/tables/deepfm_test_{TIME_STAMP}
+-Dmodel_dir=oss://{OSS_BUCKET_NAME}/{EXP_NAME}/dwd_avazu_ctr2/checkpoints_version/
 -Dcluster='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":2, "cpu":1000,"gpu":100, "memory":40000}}'
 -Darn={ROLEARN}
 -Dbuckets=oss://{OSS_BUCKET_NAME}/
 -DossHost={OSS_ENDPOINT}
--Dversion=20201019
+-Dversion=20201029
 ;
diff --git a/samples/rtp_fg/fg_bucketize.json b/samples/rtp_fg/fg_bucketize.json
new file mode 100644
index 000000000..34d6de26c
--- /dev/null
+++ b/samples/rtp_fg/fg_bucketize.json
@@ -0,0 +1,26 @@
+{
+  "features": [
+     {"expression": "user:user_id", "feature_name": "user_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:cms_segid", "feature_name": "cms_segid", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:cms_group_id", "feature_name": "cms_group_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:age_level", "feature_name": "age_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:pvalue_level", "feature_name": "pvalue_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:shopping_level", "feature_name": "shopping_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:occupation", "feature_name": "occupation", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:new_user_class_level", "feature_name": "new_user_class_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "item:adgroup_id", "feature_name": "adgroup_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:cate_id", "feature_name": "cate_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:campaign_id", "feature_name": "campaign_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:customer", "feature_name": "customer", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:brand", "feature_name": "brand", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:price", "feature_name": "price", "feature_type":"raw_feature", "value_type":"Integer", "combiner":"mean", "group":"item", "bucketize_boundaries": "10,20,30,40,50,60"},
+     {"expression": "item:pid", "feature_name": "pid", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "user:tag_category_list", "feature_name": "user_tag_cate", "feature_type":"id_feature", "hash_bucket_size":100000, "group":"user"},
+     {"map": "user:tag_brand_list", "key":"item:brand", "feature_name": "combo_brand", "feature_type":"lookup_feature",  "needDiscrete":true, "hash_bucket_size":100000, "group":"combo"},
+     {"map": "user:tag_category_list", "key":"item:cate_id", "feature_name": "combo_cate_id", "feature_type":"lookup_feature",  "needDiscrete":true, "hash_bucket_size":10000, "group":"combo"}
+ ],
+ "reserves": [
+   "user_id", "campaign_id", "clk"
+ ],
+ "multi_val_sep": "|"
+}
diff --git a/samples/rtp_fg/fg_bucketize_model_config.json b/samples/rtp_fg/fg_bucketize_model_config.json
new file mode 100644
index 000000000..d6f51b436
--- /dev/null
+++ b/samples/rtp_fg/fg_bucketize_model_config.json
@@ -0,0 +1,29 @@
+{
+  "features": [
+     {"expression": "user:user_id", "feature_name": "user_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:cms_segid", "feature_name": "cms_segid", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:cms_group_id", "feature_name": "cms_group_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:age_level", "feature_name": "age_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:pvalue_level", "feature_name": "pvalue_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:shopping_level", "feature_name": "shopping_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:occupation", "feature_name": "occupation", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:new_user_class_level", "feature_name": "new_user_class_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "item:adgroup_id", "feature_name": "adgroup_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:cate_id", "feature_name": "cate_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:campaign_id", "feature_name": "campaign_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:customer", "feature_name": "customer", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:brand", "feature_name": "brand", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:price", "feature_name": "price", "feature_type":"raw_feature", "value_type":"Integer", "combiner":"mean", "group":"item", "boundaries":[0,10,20,30,40,50]},
+     {"expression": "item:pid", "feature_name": "pid", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "user:tag_category_list", "feature_name": "user_tag_cate", "feature_type":"id_feature", "hash_bucket_size":100000, "group":"user"},
+     {"map": "user:tag_brand_list", "key":"item:brand", "feature_name": "combo_brand", "feature_type":"lookup_feature",  "needDiscrete":true, "hash_bucket_size":100000, "group":"combo"},
+     {"map": "user:tag_category_list", "key":"item:cate_id", "feature_name": "combo_cate_id", "feature_type":"lookup_feature",  "needDiscrete":true, "hash_bucket_size":10000, "group":"combo"}
+ ],
+ "reserves": [
+   "user_id", "campaign_id", "clk"
+ ],
+ "multi_val_sep": "|",
+ "model_path": "samples/rtp_fg/wide_and_deep.config",
+ "model_dir": "experiments/rtp_fg/wide_and_deep_update_model",
+ "edit_config_json": [{"model_config.wide_and_deep.dnn.hidden_units": [48, 24]}]
+}
diff --git a/samples/rtp_fg/fg_bucketize_v2.json b/samples/rtp_fg/fg_bucketize_v2.json
new file mode 100644
index 000000000..526d4bd66
--- /dev/null
+++ b/samples/rtp_fg/fg_bucketize_v2.json
@@ -0,0 +1,26 @@
+{
+  "features": [
+     {"expression": "user:user_id", "feature_name": "user_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:cms_segid", "feature_name": "cms_segid", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:cms_group_id", "feature_name": "cms_group_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:age_level", "feature_name": "age_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:pvalue_level", "feature_name": "pvalue_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:shopping_level", "feature_name": "shopping_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:occupation", "feature_name": "occupation", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "user:new_user_class_level", "feature_name": "new_user_class_level", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 10, "embedding_dim": 16, "group":"user"},
+     {"expression": "item:adgroup_id", "feature_name": "adgroup_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:cate_id", "feature_name": "cate_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:campaign_id", "feature_name": "campaign_id", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:customer", "feature_name": "customer", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:brand", "feature_name": "brand", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "item:price", "feature_name": "price", "feature_type":"raw_feature", "value_type":"Integer", "combiner":"mean", "group":"item", "boundaries":[0,10,20,30,40,50]},
+     {"expression": "item:pid", "feature_name": "pid", "feature_type":"id_feature", "value_type":"String", "combiner":"mean", "hash_bucket_size": 100000, "embedding_dim": 16, "group":"item"},
+     {"expression": "user:tag_category_list", "feature_name": "user_tag_cate", "feature_type":"id_feature", "hash_bucket_size":100000, "group":"user"},
+     {"map": "user:tag_brand_list", "key":"item:brand", "feature_name": "combo_brand", "feature_type":"lookup_feature",  "needDiscrete":true, "hash_bucket_size":100000, "group":"combo"},
+     {"map": "user:tag_category_list", "key":"item:cate_id", "feature_name": "combo_cate_id", "feature_type":"lookup_feature",  "needDiscrete":true, "hash_bucket_size":10000, "group":"combo"}
+ ],
+ "reserves": [
+   "user_id", "campaign_id", "clk"
+ ],
+ "multi_val_sep": "|"
+}
diff --git a/samples/rtp_fg/wide_and_deep.config b/samples/rtp_fg/wide_and_deep.config
new file mode 100644
index 000000000..c5e5d577b
--- /dev/null
+++ b/samples/rtp_fg/wide_and_deep.config
@@ -0,0 +1,54 @@
+model_config:{
+  model_class: "WideAndDeep"
+  feature_groups: {
+    group_name: "deep"
+    feature_names: "adgroup_id"
+    feature_names: "cate_id"
+    feature_names: "campaign_id"
+    feature_names: "customer"
+    feature_names: "brand"
+    feature_names: "price"
+    feature_names: "pid"
+
+    feature_names: "user_id"
+    feature_names: "cms_segid"
+    feature_names: "cms_group_id"
+    feature_names: "age_level"
+    feature_names: "pvalue_level"
+    feature_names: "shopping_level"
+    feature_names: "occupation"
+    feature_names: "new_user_class_level"
+    feature_names: "user_tag_cate"
+    wide_deep:DEEP
+  }
+  feature_groups: {
+    group_name: "wide"
+    feature_names: "adgroup_id"
+    feature_names: "cate_id"
+    feature_names: "campaign_id"
+    feature_names: "customer"
+    feature_names: "brand"
+    feature_names: "price"
+    feature_names: "pid"
+
+    feature_names: "user_id"
+    feature_names: "cms_segid"
+    feature_names: "cms_group_id"
+    feature_names: "age_level"
+    feature_names: "pvalue_level"
+    feature_names: "shopping_level"
+    feature_names: "occupation"
+    feature_names: "new_user_class_level"
+    feature_names: "user_tag_cate"
+    wide_deep:WIDE
+  }
+
+  wide_and_deep {
+    dnn {
+      hidden_units: [128, 64, 32]
+    }
+
+    l2_regularization: 1e-5
+  }
+  embedding_regularization: 1e-7
+}
diff --git a/scripts/build_read_the_docs.sh b/scripts/build_read_the_docs.sh
new file mode 100644
index 000000000..650f4e519
--- /dev/null
+++ b/scripts/build_read_the_docs.sh
@@ -0,0 +1,13 @@
+# Make sure in python3 environment
+
+# remove old protos
+find easy_rec/python/protos/ -name \"*_pb2.py\" | xargs rm -rf
+# make proto
+bash scripts/gen_proto.sh
+PATH=./protoc/bin/ protoc/bin/protoc --doc_out=html,proto.html:docs/source easy_rec/python/protos/*.proto
+sed -i 's#<p>#<pre>#g;s#</p>#</pre>#g' docs/source/proto.html
+
+pip3 install -r requirements/docs.txt
+pip install -r requirements/docs.txt
+pip3 install tensorflow==2.3
+pip install tensorflow==2.3
diff --git a/scripts/merge_code.sh b/scripts/merge_code.sh
new file mode 100644
index 000000000..ddf207f73
--- /dev/null
+++ b/scripts/merge_code.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+target_dir=$1
+
+if [ ! -d "$target_dir" ]
+then 
+  echo "$target_dir does not exist"
+  exit 1
+fi
+
+oss_config=$2
+if [ ! -e "$oss_config" ]
+then
+  echo "ossutil config[$oss_config] does not exist"
+  exit 1
+fi
+
+CP=/usr/bin/cp
+if [ ! -e "$CP" ]
+then 
+  echo "$CP does not exist"
+  exit 1
+fi
+
+OSSUTIL=`which ossutil`
+if [ $? -ne 0 ]
+then
+   echo "ossutil is not find in path"
+   exit 1
+fi
+
+$CP -rf $target_dir/data ./
+$CP -rf $target_dir/docs ./
+$CP -rf $target_dir/samples ./
+$CP -rf $target_dir/easy_rec ./
+$CP -rf $target_dir/pai_jobs ./
+$CP -rf $target_dir/requirements ./
+$CP -rf $target_dir/requirements.txt ./
+$CP -rf $target_dir/setup.cfg ./
+$CP -rf $target_dir/setup.py ./
+
+git add easy_rec
+git add samples
+git add docs
+git add pai_jobs
+git add requirements
+git add requirements.txt
+git add setup.cfg setup.py
+
+
+find easy_rec -name "*.pyc" | xargs rm -rf 
+find easy_rec  -name "*_pb2.py" | xargs rm -rf 
+find . -name "*.swp" | xargs rm -rf 
+find . -name "*.swo" | xargs rm -rf 
+
+version=`date +%Y%m%d`
+data_name=easy_rec_data_${version}.tar.gz
+tar -cvzf $data_name data
+$OSSUTIL --config=$oss_config cp $data_name oss://easyrec/data/
+sed -i -e "s/data\/easyrec_data\(_[0-9]\+\)\?.tar.gz/data\/easyrec_data_${version}.tar.gz/g" README.md
+
+echo "merge is done, please commit and push your changes."
diff --git a/setup.cfg b/setup.cfg
index 1fdfc53a5..765b254cf 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -7,7 +7,7 @@ multi_line_output = 7
 force_single_line = true
 known_standard_library = setuptools
 known_first_party = easy_rec
-known_third_party = common_io,future,google,numpy,oss2,pai,pandas,six,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow
+known_third_party = absl,datahub,future,google,numpy,odps,oss2,pai,pandas,psutil,six,sklearn,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,yaml
 no_lines_before = LOCALFOLDER
 default_section = THIRDPARTY
 skip = easy_rec/python/protos
diff --git a/setup.py b/setup.py
index 50014c5ff..64ae2d416 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,14 @@
 # -*- encoding:utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
+import codecs
+import os
 from setuptools import find_packages
 from setuptools import setup
 
 
 def readme():
-  with open('README.md') as f:
+  with codecs.open('README.md', encoding='utf-8') as f:
     content = f.read()
   return content
 
@@ -15,7 +17,9 @@ def readme():
 
 
 def get_version():
-  with open(version_file, 'r') as f:
+  if 'BUILD_EASYREC_DOC' in os.environ:
+    os.system("bash -x scripts/build_read_the_docs.sh") 
+  with codecs.open(version_file, 'r') as f:
     exec(compile(f.read(), version_file, 'exec'))
   return locals()['__version__']
 
@@ -34,7 +38,7 @@ def parse_line(line):
       yield line
 
   def parse_require_file(fpath):
-    with open(fpath, 'r') as f:
+    with codecs.open(fpath, 'r') as f:
       for line in f.readlines():
         line = line.strip()
         if line and not line.startswith('#'):