From b07501adbd8b793fc3130278c7dd2ab754b71b62 Mon Sep 17 00:00:00 2001 From: Pengzhi Gao Date: Sat, 12 Oct 2019 17:44:32 -0400 Subject: [PATCH] Polish BERT example (#229) --- examples/bert/data/README.md | 4 ++-- examples/bert/prepare_data.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/examples/bert/data/README.md b/examples/bert/data/README.md index e5887c9ea..38fe60614 100644 --- a/examples/bert/data/README.md +++ b/examples/bert/data/README.md @@ -1,6 +1,6 @@ This gives the explanation on data preparation. -When you run `data/download_glue_data.py` in the parent directory, by default, all datasets in GLEU will be stored here. For more information on GLUE, please refer to +When you run `data/download_glue_data.py` in the parent directory, by default, all datasets in the General Language Understanding Evaluation (GLUE) will be stored here. For more information on GLUE, please refer to [gluebenchmark](https://gluebenchmark.com/tasks) Here we show the data format of the SSN-2 dataset. @@ -26,4 +26,4 @@ index sentence * The test data is in a different format: the first column is a unique index for each test example, the second column is the space-seperated string. -In [`bert/utils/data_utils.py`](https://github.com/asyml/texar/blob/master/examples/bert/utils/data_utils.py), there are 5 types of `Data Processor` Implemented. You can run `python bert_classifier_main.py` and specify `--task` to run on different datasets. +In [`bert/utils/data_utils.py`](https://github.com/asyml/texar-pytorch/blob/master/examples/bert/utils/data_utils.py), there are 5 types of `Data Processor` implemented. You can run `python bert_classifier_main.py` and specify `--task` to run on different datasets. diff --git a/examples/bert/prepare_data.py b/examples/bert/prepare_data.py index e62f294f1..8026b0853 100644 --- a/examples/bert/prepare_data.py +++ b/examples/bert/prepare_data.py @@ -44,10 +44,6 @@ help="The output directory where the pickled files will be generated. " "By default it will be set to 'data/{task}'. E.g.: if " "task is 'MRPC', it will be set as 'data/MRPC'") -parser.add_argument( - "--lower-case", type=bool, default=True, - help="Whether to lower case the input text. Should be True for uncased " - "models and False for cased models.") parser.add_argument( "--config-data", default="config_data", help="The dataset config.") args = parser.parse_args()