Skip to content

Commit

Permalink
Add cp866i encoding for Belarusian
Browse files Browse the repository at this point in the history
  • Loading branch information
insolor committed Dec 3, 2023
1 parent e065cb1 commit 06048ab
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 15 deletions.
51 changes: 51 additions & 0 deletions alternative_encodings/cp866i.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import codecs
import encodings.cp866 as cp866

from functools import lru_cache

from .common import get_codec, get_incremental_encoder, get_incremental_decoder, get_stream_writer, get_stream_reader

# Codec APIs
class Codec(cp866.Codec):
def encode(self, input, errors="strict"):
input = input.replace("і", "i").replace("І", "I")
return super().encode(input, errors)


class IncrementalEncoder(cp866.IncrementalEncoder):
def encode(self, input, final=False):
input = input.replace("і", "i").replace("І", "I")
return super().encode(input, final)


IncrementalDecoder = cp866.IncrementalDecoder

# encodings module API
codec = Codec()


regentry = codecs.CodecInfo(
name="cp866i",
encode=codec.encode,
decode=codec.decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
)


def search_function(encoding):
if regentry.name == encoding:
return regentry

return None


def register():
codecs.register(search_function)


def unregister():
try:
codecs.unregister(search_function)
except AttributeError:
pass
26 changes: 11 additions & 15 deletions alternative_encodings/viscii.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,25 +40,21 @@

# encodings module API

codec = Codec()

@lru_cache()
def getregentry():
return codecs.CodecInfo(
name="viscii",
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

regentry = codecs.CodecInfo(
name="viscii",
encode=codec.encode,
decode=codec.decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
)

def search_function(encoding):
entry = getregentry()

if entry.name == encoding:
return entry
def search_function(encoding):
if regentry.name == encoding:
return regentry

return None

Expand Down
29 changes: 29 additions & 0 deletions tests/test_cp866i.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import codecs

import pytest
import alternative_encodings.cp866i as cp866i
from utils import codec


@pytest.fixture(scope="module", autouse=True)
def register_codec():
with codec(cp866i):
yield


source_data = "У Іўі худы жвавы чорт у зялёнай камізэльцы пабег пад'есці фаршу з юшкай"
encoded = (
b"\x93 I\xf7i \xe5\xe3\xa4\xeb \xa6\xa2\xa0\xa2\xeb \xe7\xae\xe0\xe2 \xe3 \xa7\xef\xab\xf1\xad\xa0\xa9 " +
b"\xaa\xa0\xaci\xa7\xed\xab\xec\xe6\xeb \xaf\xa0\xa1\xa5\xa3 \xaf\xa0\xa4'\xa5\xe1\xe6i \xe4\xa0\xe0\xe8\xe3 " +
b"\xa7 \xee\xe8\xaa\xa0\xa9"
)


def test_viscii_encode():
assert codecs.encode(source_data, "cp866i") == encoded


def test_viscii_decode():
result = codecs.decode(encoded, "cp866i")
result = result.replace("i", "і").replace("I", "І")
assert result == source_data

0 comments on commit 06048ab

Please sign in to comment.