Skip to content

Commit

Permalink
refactor: replace I18n with a custom transliterator
Browse files Browse the repository at this point in the history
  • Loading branch information
matteoredz committed Oct 23, 2023
1 parent 4e3eafb commit 2df1e72
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 6 deletions.
1 change: 1 addition & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Metrics/BlockLength:
Metrics/ClassLength:
Enabled: true
Exclude:
- 'lib/itax_code/transliterator.rb'
- 'lib/itax_code/utils.rb'
- 'test/**/*'

Expand Down
1 change: 0 additions & 1 deletion itax_code.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ Gem::Specification.new do |spec|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
spec.require_paths = ["lib"]
spec.metadata["rubygems_mfa_required"] = "false"
spec.add_dependency "i18n", "~> 1.14", ">= 1.14.1"
end
211 changes: 211 additions & 0 deletions lib/itax_code/transliterator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
# frozen_string_literal: true

module ItaxCode
# https://github.com/ruby-i18n/i18n/blob/master/lib/i18n/backend/transliterator.rb
class Transliterator
DEFAULT_REPLACEMENT_CHAR = "?"

def transliterate(string, replacement = nil)
replacement ||= DEFAULT_REPLACEMENT_CHAR
string.gsub(/[^\x00-\x7f]/u) { |char| approximations[char] || replacement }
end

private

def approximations
@approximations ||= {
"À" => "A",
"Á" => "A",
"Â" => "A",
"Ã" => "A",
"Ä" => "A",
"Å" => "A",
"Æ" => "AE",
"Ç" => "C",
"È" => "E",
"É" => "E",
"Ê" => "E",
"Ë" => "E",
"Ì" => "I",
"Í" => "I",
"Î" => "I",
"Ï" => "I",
"Ð" => "D",
"Ñ" => "N",
"Ò" => "O",
"Ó" => "O",
"Ô" => "O",
"Õ" => "O",
"Ö" => "O",
"×" => "x",
"Ø" => "O",
"Ù" => "U",
"Ú" => "U",
"Û" => "U",
"Ü" => "U",
"Ý" => "Y",
"Þ" => "Th",
"ß" => "ss",
"ẞ" => "SS",
"à" => "a",
"á" => "a",
"â" => "a",
"ã" => "a",
"ä" => "a",
"å" => "a",
"æ" => "ae",
"ç" => "c",
"è" => "e",
"é" => "e",
"ê" => "e",
"ë" => "e",
"ì" => "i",
"í" => "i",
"î" => "i",
"ï" => "i",
"ð" => "d",
"ñ" => "n",
"ò" => "o",
"ó" => "o",
"ô" => "o",
"õ" => "o",
"ö" => "o",
"ø" => "o",
"ù" => "u",
"ú" => "u",
"û" => "u",
"ü" => "u",
"ý" => "y",
"þ" => "th",
"ÿ" => "y",
"Ā" => "A",
"ā" => "a",
"Ă" => "A",
"ă" => "a",
"Ą" => "A",
"ą" => "a",
"Ć" => "C",
"ć" => "c",
"Ĉ" => "C",
"ĉ" => "c",
"Ċ" => "C",
"ċ" => "c",
"Č" => "C",
"č" => "c",
"Ď" => "D",
"ď" => "d",
"Đ" => "D",
"đ" => "d",
"Ē" => "E",
"ē" => "e",
"Ĕ" => "E",
"ĕ" => "e",
"Ė" => "E",
"ė" => "e",
"Ę" => "E",
"ę" => "e",
"Ě" => "E",
"ě" => "e",
"Ĝ" => "G",
"ĝ" => "g",
"Ğ" => "G",
"ğ" => "g",
"Ġ" => "G",
"ġ" => "g",
"Ģ" => "G",
"ģ" => "g",
"Ĥ" => "H",
"ĥ" => "h",
"Ħ" => "H",
"ħ" => "h",
"Ĩ" => "I",
"ĩ" => "i",
"Ī" => "I",
"ī" => "i",
"Ĭ" => "I",
"ĭ" => "i",
"Į" => "I",
"į" => "i",
"İ" => "I",
"ı" => "i",
"IJ" => "IJ",
"ij" => "ij",
"Ĵ" => "J",
"ĵ" => "j",
"Ķ" => "K",
"ķ" => "k",
"ĸ" => "k",
"Ĺ" => "L",
"ĺ" => "l",
"Ļ" => "L",
"ļ" => "l",
"Ľ" => "L",
"ľ" => "l",
"Ŀ" => "L",
"ŀ" => "l",
"Ł" => "L",
"ł" => "l",
"Ń" => "N",
"ń" => "n",
"Ņ" => "N",
"ņ" => "n",
"Ň" => "N",
"ň" => "n",
"ʼn" => "'n",
"Ŋ" => "NG",
"ŋ" => "ng",
"Ō" => "O",
"ō" => "o",
"Ŏ" => "O",
"ŏ" => "o",
"Ő" => "O",
"ő" => "o",
"Œ" => "OE",
"œ" => "oe",
"Ŕ" => "R",
"ŕ" => "r",
"Ŗ" => "R",
"ŗ" => "r",
"Ř" => "R",
"ř" => "r",
"Ś" => "S",
"ś" => "s",
"Ŝ" => "S",
"ŝ" => "s",
"Ş" => "S",
"ş" => "s",
"Š" => "S",
"š" => "s",
"Ţ" => "T",
"ţ" => "t",
"Ť" => "T",
"ť" => "t",
"Ŧ" => "T",
"ŧ" => "t",
"Ũ" => "U",
"ũ" => "u",
"Ū" => "U",
"ū" => "u",
"Ŭ" => "U",
"ŭ" => "u",
"Ů" => "U",
"ů" => "u",
"Ű" => "U",
"ű" => "u",
"Ų" => "U",
"ų" => "u",
"Ŵ" => "W",
"ŵ" => "w",
"Ŷ" => "Y",
"ŷ" => "y",
"Ÿ" => "Y",
"Ź" => "Z",
"ź" => "z",
"Ż" => "Z",
"ż" => "z",
"Ž" => "Z",
"ž" => "z"
}
end
end
end
5 changes: 3 additions & 2 deletions lib/itax_code/utils.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true

require "csv"
require "i18n"
require "itax_code/transliterator"

module ItaxCode
class Utils
Expand All @@ -23,7 +23,8 @@ def slugged(string, separator = "-")
def transliterate(string)
return string if string.ascii_only?

I18n.transliterate(string.unicode_normalize(:nfc))
transliterator = Transliterator.new
transliterator.transliterate(string.unicode_normalize(:nfc))
end

def tax_code_sections_regex
Expand Down
3 changes: 0 additions & 3 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,3 @@
Minitest::Test.class_eval do
extend TestMacro
end

# Disable available locale checks to avoid warnings running the test suite.
I18n.enforce_available_locales = false

0 comments on commit 2df1e72

Please sign in to comment.