From aea7b84c2bdf91978fee54b45b717a550b0c107c Mon Sep 17 00:00:00 2001 From: Guthrie Armstrong Date: Wed, 12 Apr 2017 22:29:08 -0400 Subject: [PATCH] Fix typo in `word_tokenize` docstring Unless I'm misunderstanding something, `nltk.tokenize.word_tokenize()` splits text into words, not into sentences (which `nltk.tokenize.sent_tokenize()` does). --- nltk/tokenize/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nltk/tokenize/__init__.py b/nltk/tokenize/__init__.py index 77ed32f8f9..3eed916179 100644 --- a/nltk/tokenize/__init__.py +++ b/nltk/tokenize/__init__.py @@ -103,7 +103,7 @@ def word_tokenize(text, language='english'): along with :class:`.PunktSentenceTokenizer` for the specified language). - :param text: text to split into sentences + :param text: text to split into words :param language: the model name in the Punkt corpus """ return [token for sent in sent_tokenize(text, language)