From 4c2cc49fdb1239b1970c60016c77768c140f9b70 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 7 Jul 2020 10:49:21 +0100 Subject: [PATCH] Deprecation notice for get_ende_bleu.sh This script is harmful because it propagates a non-standard way to compute BLEU that is not reflective of the WMT 2014 task. Entirely too many papers are submitted with BLEU scores computed in undocumented ways. It's not even reasonable to allow people to run this script to compare against prior work, because most prior work does not document which script it used. And there are multiple of these running around. https://www.aclweb.org/anthology/W18-6319/ --- tensor2tensor/utils/get_ende_bleu.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensor2tensor/utils/get_ende_bleu.sh b/tensor2tensor/utils/get_ende_bleu.sh index e48fad36d..431626444 100755 --- a/tensor2tensor/utils/get_ende_bleu.sh +++ b/tensor2tensor/utils/get_ende_bleu.sh @@ -1,5 +1,7 @@ #!/bin/bash +echo Do not use this script because the tokenization and treatment of the reference is not standard. Use sacrebleu instead. When comparing to a paper that used this script, explain how their BLEU was non-standard. 1>&2 + mosesdecoder=~/mosesdecoder tok_gold_targets=newstest2013.tok.de