From 691f2c73ff70387b1075b710387031b6aeb64969 Mon Sep 17 00:00:00 2001
From: Sheng Zha <szha@users.noreply.github.com>
Date: Wed, 17 Jul 2019 13:31:59 -0700
Subject: [PATCH] [DOC] add missing dataset document (#832)

* add missing dataset document

* adjust squad
---
 docs/api/modules/data.rst     | 33 ++++++++++++++++++++++++++++++++-
 src/gluonnlp/data/__init__.py | 11 ++++++-----
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/docs/api/modules/data.rst b/docs/api/modules/data.rst
index 2067cbbe23..8c2924bc01 100644
--- a/docs/api/modules/data.rst
+++ b/docs/api/modules/data.rst
@@ -97,6 +97,7 @@ Analogy-based evaluation datasets include:
     GoogleAnalogyTestSet
     BiggerAnalogyTestSet
 
+
 CoNLL Datasets
 ~~~~~~~~~~~~~~
 The `CoNLL <http://www.conll.org/previous-tasks>`_ datasets are from a series of annual
@@ -129,7 +130,7 @@ Machine Translation Datasets
     WMT2014BPE
     WMT2016
     WMT2016BPE
-    SQuAD
+
 
 Intent Classification and Slot Labeling
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -141,6 +142,36 @@ Intent Classification and Slot Labeling
     SNIPSDataset
 
 
+Question Answering
+~~~~~~~~~~~~~~~~~~
+
+`Stanford Question Answering Dataset (SQuAD) <https://rajpurkar.github.io/SQuAD-explorer/>`_ is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.
+
+.. autosummary::
+    :nosignatures:
+
+    SQuAD
+
+
+GLUE Benchmark
+~~~~~~~~~~~~~~
+
+The `General Language Understanding Evaluation (GLUE) benchmark <https://gluebenchmark.com/>`_ is a collection of resources for training, evaluating, and analyzing natural language understanding systems.
+
+.. autosummary::
+    :nosignatures:
+
+    GlueCoLA
+    GlueSST2
+    GlueSTSB
+    GlueQQP
+    GlueRTE
+    GlueMNLI
+    GlueQNLI
+    GlueWNLI
+    GlueMRPC
+
+
 Datasets
 --------
 
diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py
index a1b2b943ba..7f2d7025d5 100644
--- a/src/gluonnlp/data/__init__.py
+++ b/src/gluonnlp/data/__init__.py
@@ -23,7 +23,7 @@
 from . import (batchify, candidate_sampler, conll, corpora, dataloader,
                dataset, question_answering, registry, sampler, sentiment,
                stream, transforms, translation, utils,
-               word_embedding_evaluation, intent_slot)
+               word_embedding_evaluation, intent_slot, glue)
 from .candidate_sampler import *
 from .conll import *
 from .glue import *
@@ -42,7 +42,8 @@
 from .intent_slot import *
 
 __all__ = (['batchify'] + utils.__all__ + transforms.__all__ + sampler.__all__
-           + dataset.__all__ + corpora.__all__ + sentiment.__all__ +
-           word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ +
-           translation.__all__ + registry.__all__ + question_answering.__all__
-           + dataloader.__all__ + candidate_sampler.__all__)
+           + dataset.__all__ + corpora.__all__ + sentiment.__all__
+           + word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__
+           + translation.__all__ + registry.__all__ + question_answering.__all__
+           + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__
+           + glue.__all__)