From 691f2c73ff70387b1075b710387031b6aeb64969 Mon Sep 17 00:00:00 2001 From: Sheng Zha Date: Wed, 17 Jul 2019 13:31:59 -0700 Subject: [PATCH] [DOC] add missing dataset document (#832) * add missing dataset document * adjust squad --- docs/api/modules/data.rst | 33 ++++++++++++++++++++++++++++++++- src/gluonnlp/data/__init__.py | 11 ++++++----- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/docs/api/modules/data.rst b/docs/api/modules/data.rst index 2067cbbe23..8c2924bc01 100644 --- a/docs/api/modules/data.rst +++ b/docs/api/modules/data.rst @@ -97,6 +97,7 @@ Analogy-based evaluation datasets include: GoogleAnalogyTestSet BiggerAnalogyTestSet + CoNLL Datasets ~~~~~~~~~~~~~~ The `CoNLL `_ datasets are from a series of annual @@ -129,7 +130,7 @@ Machine Translation Datasets WMT2014BPE WMT2016 WMT2016BPE - SQuAD + Intent Classification and Slot Labeling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -141,6 +142,36 @@ Intent Classification and Slot Labeling SNIPSDataset +Question Answering +~~~~~~~~~~~~~~~~~~ + +`Stanford Question Answering Dataset (SQuAD) `_ is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable. + +.. autosummary:: + :nosignatures: + + SQuAD + + +GLUE Benchmark +~~~~~~~~~~~~~~ + +The `General Language Understanding Evaluation (GLUE) benchmark `_ is a collection of resources for training, evaluating, and analyzing natural language understanding systems. + +.. autosummary:: + :nosignatures: + + GlueCoLA + GlueSST2 + GlueSTSB + GlueQQP + GlueRTE + GlueMNLI + GlueQNLI + GlueWNLI + GlueMRPC + + Datasets -------- diff --git a/src/gluonnlp/data/__init__.py b/src/gluonnlp/data/__init__.py index a1b2b943ba..7f2d7025d5 100644 --- a/src/gluonnlp/data/__init__.py +++ b/src/gluonnlp/data/__init__.py @@ -23,7 +23,7 @@ from . import (batchify, candidate_sampler, conll, corpora, dataloader, dataset, question_answering, registry, sampler, sentiment, stream, transforms, translation, utils, - word_embedding_evaluation, intent_slot) + word_embedding_evaluation, intent_slot, glue) from .candidate_sampler import * from .conll import * from .glue import * @@ -42,7 +42,8 @@ from .intent_slot import * __all__ = (['batchify'] + utils.__all__ + transforms.__all__ + sampler.__all__ - + dataset.__all__ + corpora.__all__ + sentiment.__all__ + - word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ + - translation.__all__ + registry.__all__ + question_answering.__all__ - + dataloader.__all__ + candidate_sampler.__all__) + + dataset.__all__ + corpora.__all__ + sentiment.__all__ + + word_embedding_evaluation.__all__ + stream.__all__ + conll.__all__ + + translation.__all__ + registry.__all__ + question_answering.__all__ + + dataloader.__all__ + candidate_sampler.__all__ + intent_slot.__all__ + + glue.__all__)