diff --git a/Makefile b/Makefile index d771514..c81f5e0 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: test clean +.PHONY: test bench clean sentencepiece/sentencepiece_model.pb.go: sentencepiece/sentencepiece_model.proto protoc --go_out=. $< @@ -10,5 +10,8 @@ cmd/dumpspm/dumpspm: cmd/dumpspm/main.go test: go test -cover -coverprofile=c.out ./sentencepiece && go tool cover -html=c.out -o coverage.html +bench: + go test -benchmem ./sentencepiece -bench Benchmark.* + clean: rm -f *.out coverage.html cmd/dumpspm/dumpspm \ No newline at end of file diff --git a/README.md b/README.md index 7f1bc77..1499df5 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ Sentence Piece Encoder ====================== This is pure go implementation of the sentencepiece encoder. -It takes a sentencepiece model and tokenizes it. +Create an encoder for the given sentencepiece model and then use +use the `Tokenize` function to split the input text into tokens. Example: