Skip to content

Commit

Permalink
Merge pull request #50 from bact/main
Browse files Browse the repository at this point in the history
Fix load_dict() output type + add unittest
  • Loading branch information
bact authored Nov 9, 2021
2 parents 849fd0c + c968b6b commit db92776
Show file tree
Hide file tree
Showing 10 changed files with 253 additions and 9 deletions.
2 changes: 1 addition & 1 deletion nlpo3-python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion nlpo3-python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "nlpo3-python"
version = "1.2.0"
version = "1.2.1"
edition = "2018"
license = "Apache-2.0"
authors = ["Thanathip Suntorntip Gorlph"]
Expand Down
24 changes: 20 additions & 4 deletions nlpo3-python/nlpo3/__init__.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,37 @@
from typing import List
# Python-binding for nlpO3, an natural language process library.
#
# Provides a tokenizer.
#
# Authors:
# Thanathip Suntorntip
# Arthit Suriyawongkul

from pathlib import Path
from typing import List, Tuple

# import from .so (Rust)
from ._nlpo3_python_backend import load_dict as rust_load_dict
from ._nlpo3_python_backend import segment as rust_segment

# TODO: load_dict from in-memory list of words

def load_dict(file_path: str, dict_name: str) -> tuple[str, bool]:

def load_dict(file_path: str, dict_name: str) -> Tuple[str, bool]:
"""Load dictionary from a file.
Load a dictionary file into an in-memory dictionary collection,
and assigned dict_name to it.
*** This function does not override an existing dict name. ***
:param file_path: Absolute path to a dictionary file
:param file_path: Path to a dictionary file
:type file_path: str
:param dict_name: A unique dictionary name, use for reference.
:type dict_name: str
:return tuple[human_readable_result_str, bool]
"""
return rust_load_dict(file_path, dict_name)
path = Path(file_path).resolve()

return rust_load_dict(str(path), dict_name)


def segment(
Expand All @@ -46,5 +58,9 @@ def segment(
:return: List of tokens
:rtype: List[str]
"""
if not text or not isinstance(text, str):
return []

result = rust_segment(text, dict_name, safe, parallel)

return result
2 changes: 1 addition & 1 deletion nlpo3-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "nlpo3"
version = "1.2.0"
version = "1.2.1"
description = "Python binding for nlpO3 Thai language processing library in Rust"
readme = "README.md"
requires-python = ">=3.6"
Expand Down
2 changes: 1 addition & 1 deletion nlpo3-python/setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = nlpo3
version = 1.2.0
version = 1.2.1
description = Python binding for nlpO3 Thai language processing library
long_description =
Python binding for nlpO3, a Thai natural language processing library in Rust.
Expand Down
3 changes: 2 additions & 1 deletion nlpo3-python/setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from setuptools import setup
from setuptools import find_packages, setup
from setuptools_rust import Binding, RustExtension, Strip

setup(
packages=find_packages(exclude=["notebooks", "tests"]),
rust_extensions=[
RustExtension(
"nlpo3._nlpo3_python_backend",
Expand Down
8 changes: 8 additions & 0 deletions nlpo3-python/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
//! Python-binding for nlpO3, an natural language process library.
//!
//! Provides a tokenizer.
//!
//! Authors:
//! Thanathip Suntorntip
//! Arthit Suriyawongkul

use ahash::AHashMap as HashMap;
use lazy_static::lazy_static;
use nlpo3::tokenizer;
Expand Down
12 changes: 12 additions & 0 deletions nlpo3-python/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""
Unit test
"""
import sys
import unittest

sys.path.append("../nlpo3")

loader = unittest.TestLoader()
testSuite = loader.discover("tests")
testRunner = unittest.TextTestRunner(verbosity=1)
testRunner.run(testSuite)
7 changes: 7 additions & 0 deletions nlpo3-python/tests/data/test_dict.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ค่า
ค่าจ้าง
ค่าจ้างเพื่อชีวิต
ค่าแรง
ค่ายทหาร
คน
ไข่
200 changes: 200 additions & 0 deletions nlpo3-python/tests/test_tokenize.py

Large diffs are not rendered by default.

0 comments on commit db92776

Please sign in to comment.