diff --git a/source/notebooks/Thai_Dependency_Parser.ipynb b/source/notebooks/Thai_Dependency_Parser.ipynb
index 38810c6..a2caa3b 100644
--- a/source/notebooks/Thai_Dependency_Parser.ipynb
+++ b/source/notebooks/Thai_Dependency_Parser.ipynb
@@ -1,167 +1,267 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "W-sH6hmWtM9v"
+ },
+ "source": [
+ "# Thai Dependency Parser\n",
+ "\n",
+ "PyThaiNLP does not come with a dependency parser. Instead, you can use the dependency parser from [spaCy-Thai](github.com/KoichiYasuoka/spaCy-Thai), which was trained on Universal Dependencies. This tutorial shows you how to get started."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Install spaCy-Thai."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
"colab": {
- "name": "Thai_Dependency_Parser.ipynb",
- "provenance": [],
- "collapsed_sections": []
+ "base_uri": "https://localhost:8080/"
},
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
+ "id": "fxkrKlM9s-tc",
+ "outputId": "6a9796bd-a818-4062-c8ae-bb3856b50515"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Collecting spacy_thai\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ca/2d/c2e71a4143d6d9cd9db6744e328dfb9f65b98ad7607644d0ad4369bce303/spacy_thai-0.6.2-py3-none-any.whl (5.1MB)\n",
+ "\u001b[K |████████████████████████████████| 5.1MB 11.2MB/s \n",
+ "\u001b[?25hCollecting ufal.udpipe>=1.2.0\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e5/72/2b8b9dc7c80017c790bb3308bbad34b57accfed2ac2f1f4ab252ff4e9cb2/ufal.udpipe-1.2.0.3.tar.gz (304kB)\n",
+ "\u001b[K |████████████████████████████████| 307kB 45.8MB/s \n",
+ "\u001b[?25hRequirement already satisfied: spacy>=2.2.2 in /usr/local/lib/python3.7/dist-packages (from spacy_thai) (2.2.4)\n",
+ "Collecting deplacy>=1.9.2\n",
+ " Downloading https://files.pythonhosted.org/packages/11/58/87b6286c9578fc456de1363f877228ee0d117b8de238e3e2cd49dbc06eaa/deplacy-1.9.3-py3-none-any.whl\n",
+ "Collecting pythainlp>=2.2.6\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c1/09/1215cb6f6ef0cfc9dbb427a961fda8a47c111955f782f659ca2d38c79adc/pythainlp-2.2.6-py3-none-any.whl (10.6MB)\n",
+ "\u001b[K |████████████████████████████████| 10.6MB 28.7MB/s \n",
+ "\u001b[?25hRequirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.0.5)\n",
+ "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (2.23.0)\n",
+ "Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (7.4.0)\n",
+ "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (3.0.5)\n",
+ "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (0.8.2)\n",
+ "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.1.3)\n",
+ "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (2.0.5)\n",
+ "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (0.4.1)\n",
+ "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (4.41.1)\n",
+ "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.0.5)\n",
+ "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.19.5)\n",
+ "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.0.0)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (54.1.2)\n",
+ "Collecting tinydb>=3.0\n",
+ " Downloading https://files.pythonhosted.org/packages/af/cd/1ce3d93818cdeda0446b8033d21e5f32daeb3a866bbafd878a9a62058a9c/tinydb-4.4.0-py3-none-any.whl\n",
+ "Collecting python-crfsuite>=0.9.6\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/79/47/58f16c46506139f17de4630dbcfb877ce41a6355a1bbf3c443edb9708429/python_crfsuite-0.9.7-cp37-cp37m-manylinux1_x86_64.whl (743kB)\n",
+ "\u001b[K |████████████████████████████████| 747kB 68.5MB/s \n",
+ "\u001b[?25hRequirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->spacy_thai) (3.0.4)\n",
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->spacy_thai) (1.24.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->spacy_thai) (2020.12.5)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->spacy_thai) (2.10)\n",
+ "Requirement already satisfied: importlib-metadata>=0.20; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->spacy_thai) (3.7.2)\n",
+ "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->spacy_thai) (3.7.4.3)\n",
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->spacy_thai) (3.4.1)\n",
+ "Building wheels for collected packages: ufal.udpipe\n",
+ " Building wheel for ufal.udpipe (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for ufal.udpipe: filename=ufal.udpipe-1.2.0.3-cp37-cp37m-linux_x86_64.whl size=5626703 sha256=a58565fc21a1f9d3a7c51a3aea138cf612babbefb36ae05cbaccec852b55d967\n",
+ " Stored in directory: /root/.cache/pip/wheels/0c/9d/db/6d3404c33da5b7adb6c6972853efb6a27649d3ba15f7e9bebb\n",
+ "Successfully built ufal.udpipe\n",
+ "Installing collected packages: ufal.udpipe, deplacy, tinydb, python-crfsuite, pythainlp, spacy-thai\n",
+ "Successfully installed deplacy-1.9.3 pythainlp-2.2.6 python-crfsuite-0.9.7 spacy-thai-0.6.2 tinydb-4.4.0 ufal.udpipe-1.2.0.3\n"
+ ]
}
+ ],
+ "source": [
+ "!pip install spacy_thai"
+ ]
},
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "W-sH6hmWtM9v"
- },
- "source": [
- "# Thai Dependency Parser\n",
- "\n",
- "PyThaiNLP have not ```Thai Dependency Parser```. You can use Dependency Parser from spaCy-Thai.\n",
- "\n",
- "\n",
- "spaCy-Thai work with Universal Dependencies. [github.com/KoichiYasuoka/spaCy-Thai](https://github.com/KoichiYasuoka/spaCy-Thai)\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "fxkrKlM9s-tc",
- "outputId": "6a9796bd-a818-4062-c8ae-bb3856b50515"
- },
- "source": [
- "!pip install spacy_thai"
- ],
- "execution_count": 1,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Collecting spacy_thai\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ca/2d/c2e71a4143d6d9cd9db6744e328dfb9f65b98ad7607644d0ad4369bce303/spacy_thai-0.6.2-py3-none-any.whl (5.1MB)\n",
- "\u001b[K |████████████████████████████████| 5.1MB 11.2MB/s \n",
- "\u001b[?25hCollecting ufal.udpipe>=1.2.0\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e5/72/2b8b9dc7c80017c790bb3308bbad34b57accfed2ac2f1f4ab252ff4e9cb2/ufal.udpipe-1.2.0.3.tar.gz (304kB)\n",
- "\u001b[K |████████████████████████████████| 307kB 45.8MB/s \n",
- "\u001b[?25hRequirement already satisfied: spacy>=2.2.2 in /usr/local/lib/python3.7/dist-packages (from spacy_thai) (2.2.4)\n",
- "Collecting deplacy>=1.9.2\n",
- " Downloading https://files.pythonhosted.org/packages/11/58/87b6286c9578fc456de1363f877228ee0d117b8de238e3e2cd49dbc06eaa/deplacy-1.9.3-py3-none-any.whl\n",
- "Collecting pythainlp>=2.2.6\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c1/09/1215cb6f6ef0cfc9dbb427a961fda8a47c111955f782f659ca2d38c79adc/pythainlp-2.2.6-py3-none-any.whl (10.6MB)\n",
- "\u001b[K |████████████████████████████████| 10.6MB 28.7MB/s \n",
- "\u001b[?25hRequirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.0.5)\n",
- "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (2.23.0)\n",
- "Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (7.4.0)\n",
- "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (3.0.5)\n",
- "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (0.8.2)\n",
- "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.1.3)\n",
- "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (2.0.5)\n",
- "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (0.4.1)\n",
- "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (4.41.1)\n",
- "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.0.5)\n",
- "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.19.5)\n",
- "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (1.0.0)\n",
- "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy>=2.2.2->spacy_thai) (54.1.2)\n",
- "Collecting tinydb>=3.0\n",
- " Downloading https://files.pythonhosted.org/packages/af/cd/1ce3d93818cdeda0446b8033d21e5f32daeb3a866bbafd878a9a62058a9c/tinydb-4.4.0-py3-none-any.whl\n",
- "Collecting python-crfsuite>=0.9.6\n",
- "\u001b[?25l Downloading https://files.pythonhosted.org/packages/79/47/58f16c46506139f17de4630dbcfb877ce41a6355a1bbf3c443edb9708429/python_crfsuite-0.9.7-cp37-cp37m-manylinux1_x86_64.whl (743kB)\n",
- "\u001b[K |████████████████████████████████| 747kB 68.5MB/s \n",
- "\u001b[?25hRequirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->spacy_thai) (3.0.4)\n",
- "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->spacy_thai) (1.24.3)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->spacy_thai) (2020.12.5)\n",
- "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy>=2.2.2->spacy_thai) (2.10)\n",
- "Requirement already satisfied: importlib-metadata>=0.20; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->spacy_thai) (3.7.2)\n",
- "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->spacy_thai) (3.7.4.3)\n",
- "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy>=2.2.2->spacy_thai) (3.4.1)\n",
- "Building wheels for collected packages: ufal.udpipe\n",
- " Building wheel for ufal.udpipe (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
- " Created wheel for ufal.udpipe: filename=ufal.udpipe-1.2.0.3-cp37-cp37m-linux_x86_64.whl size=5626703 sha256=a58565fc21a1f9d3a7c51a3aea138cf612babbefb36ae05cbaccec852b55d967\n",
- " Stored in directory: /root/.cache/pip/wheels/0c/9d/db/6d3404c33da5b7adb6c6972853efb6a27649d3ba15f7e9bebb\n",
- "Successfully built ufal.udpipe\n",
- "Installing collected packages: ufal.udpipe, deplacy, tinydb, python-crfsuite, pythainlp, spacy-thai\n",
- "Successfully installed deplacy-1.9.3 pythainlp-2.2.6 python-crfsuite-0.9.7 spacy-thai-0.6.2 tinydb-4.4.0 ufal.udpipe-1.2.0.3\n"
- ],
- "name": "stdout"
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "dqhdtyuQs_Mi"
- },
- "source": [
- "import spacy_thai\n",
- "nlp=spacy_thai.load()"
- ],
- "execution_count": 2,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "37h9csjAtEZB"
- },
- "source": [
- "doc=nlp(\"พวกเราใช้ภาษาไทย\")"
- ],
- "execution_count": 6,
- "outputs": []
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Import spaCy-Thai."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "id": "dqhdtyuQs_Mi"
+ },
+ "outputs": [],
+ "source": [
+ "import spacy_thai\n",
+ "nlp=spacy_thai.load()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Do the dependency parse by calling `nlp` on a sentence. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "id": "37h9csjAtEZB"
+ },
+ "outputs": [],
+ "source": [
+ "doc=nlp(\"พวกเราใช้ภาษาไทย\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can visualize the dependency parse with [deplacy](https://spacy.io/universe/project/deplacy), a tree visualizer for Universal Dependencies."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 365
},
+ "id": "IGyncBg6tI_D",
+ "outputId": "ee2f8ed3-7218-415d-d062-36b76a59fcf9"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 365
- },
- "id": "IGyncBg6tI_D",
- "outputId": "ee2f8ed3-7218-415d-d062-36b76a59fcf9"
- },
- "source": [
- "import graphviz\n",
- "import deplacy\n",
- "graphviz.Source(deplacy.dot(doc))"
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n"
],
- "execution_count": 7,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- ""
- ],
- "image/svg+xml": "\n\n\n\n\n"
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 7
- }
+ "text/plain": [
+ ""
]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "B5XXoxFttMO2"
- },
- "source": [
- ""
- ],
- "execution_count": null,
- "outputs": []
+ },
+ "execution_count": 4,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
}
- ]
-}
\ No newline at end of file
+ ],
+ "source": [
+ "import graphviz\n",
+ "import deplacy\n",
+ "graphviz.Source(deplacy.dot(doc))"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "collapsed_sections": [],
+ "name": "Thai_Dependency_Parser.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}