Skip to content

Commit

Permalink
No public description
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 584656350
  • Loading branch information
roark-google authored and copybara-github committed Nov 22, 2023
1 parent 2ac6dc4 commit af0b07c
Show file tree
Hide file tree
Showing 12 changed files with 771 additions and 63 deletions.
12 changes: 12 additions & 0 deletions WORKSPACE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ git_repository(
tag = "v4.24.3",
)

# -------------------------------------------------------------------------
# nlohmann json. See
# https://github.com/nlohmann/json
# -------------------------------------------------------------------------

http_archive(
name = "com_github_nlohmann_json",
urls = ["https://github.com/nlohmann/json/archive/refs/heads/develop.zip"],
sha256 = "5d9b3a054b4d0f51e47dc6fbb031c75166869a816cfaef08bb0f18be209dddb3",
strip_prefix = "json-develop"
)

# Import external protobuf dependencies into this workspace.
load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
protobuf_deps()
Expand Down
51 changes: 51 additions & 0 deletions nisaba/translit/tools/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

# Error rate calculation utilities for specific transliteration use cases.

load("@pip_deps//:requirements.bzl", "requirement")

package(
default_applicable_licenses = [
],
Expand Down Expand Up @@ -59,3 +61,52 @@ cc_test(
"@com_google_googletest//:gtest_main",
],
)

cc_test(
name = "emd_tsv_to_json_test",
size = "medium",
srcs = ["emd_tsv_to_json_test.cc"],
deps = [
":calculate_error_rate_lib",
"//nisaba/port:file_util",
"//nisaba/port:status-matchers",
"//nisaba/port:utf8_util",
"@com_github_nlohmann_json//:json",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
],
)

py_binary(
name = "calculate_emd_cer",
srcs = ["calculate_emd_cer.py"],
python_version = "PY3",
deps = [
":emd_cer",
"@io_abseil_py//absl:app",
"@io_abseil_py//absl/flags",
requirement("numpy"),
requirement("pyemd"),
],
)

py_library(
name = "emd_cer",
srcs = ["emd_cer.py"],
deps = [
requirement("numpy"),
requirement("pyemd"),
],
)

py_test(
name = "emd_cer_test",
srcs = ["emd_cer_test.py"],
python_version = "PY3",
srcs_version = "PY3",
deps = [
":emd_cer",
"@io_abseil_py//absl/testing:absltest",
],
)
51 changes: 51 additions & 0 deletions nisaba/translit/tools/calculate_emd_cer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2023 Nisaba Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# -*- coding: utf-8 -*-
r"""Calculate Earth movers distance.
"""

import io
import json
import re

from absl import app
from absl import flags

import numpy as np
import pyemd
from nisaba.translit.tools import emd_cer

FLAGS = flags.FLAGS
flags.DEFINE_string('json_path', '', 'Input file')

def main(unused_argv):
parsed_json = []
TotEdits = 0
TotLen = 0
with io.open(FLAGS.json_path, mode='r',
encoding='UTF-8', closefd=True) as data_json:
parsed_json.extend([json.loads(line) for line in data_json.readlines()])
for jline in parsed_json:
[Edits, RefLen] = emd_cer.emd_error_and_length(jline)
# We don't separate subst/ins/del, hence the two zero columns in output.
print(RefLen + '\t' + Edits + '\t0\t0\t' + Edits/RefLen)
TotEdits += Edits
TotLen += RefLen
print('Total edits:\t' + TotEdits)
print('Total reference length:\t' + TotLen)
print('Overall CER:\t' + TotEdits/TotLen)

if __name__ == '__main__':
app.run(main)
Loading

0 comments on commit af0b07c

Please sign in to comment.