From 06048ab8530ab6aafacc25fe692dca16b32cc6b4 Mon Sep 17 00:00:00 2001 From: insolor Date: Sun, 3 Dec 2023 15:04:23 +0300 Subject: [PATCH] Add cp866i encoding for Belarusian --- alternative_encodings/cp866i.py | 51 +++++++++++++++++++++++++++++++++ alternative_encodings/viscii.py | 26 +++++++---------- tests/test_cp866i.py | 29 +++++++++++++++++++ 3 files changed, 91 insertions(+), 15 deletions(-) create mode 100644 alternative_encodings/cp866i.py create mode 100644 tests/test_cp866i.py diff --git a/alternative_encodings/cp866i.py b/alternative_encodings/cp866i.py new file mode 100644 index 0000000..7b82a12 --- /dev/null +++ b/alternative_encodings/cp866i.py @@ -0,0 +1,51 @@ +import codecs +import encodings.cp866 as cp866 + +from functools import lru_cache + +from .common import get_codec, get_incremental_encoder, get_incremental_decoder, get_stream_writer, get_stream_reader + +# Codec APIs +class Codec(cp866.Codec): + def encode(self, input, errors="strict"): + input = input.replace("і", "i").replace("І", "I") + return super().encode(input, errors) + + +class IncrementalEncoder(cp866.IncrementalEncoder): + def encode(self, input, final=False): + input = input.replace("і", "i").replace("І", "I") + return super().encode(input, final) + + +IncrementalDecoder = cp866.IncrementalDecoder + +# encodings module API +codec = Codec() + + +regentry = codecs.CodecInfo( + name="cp866i", + encode=codec.encode, + decode=codec.decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, +) + + +def search_function(encoding): + if regentry.name == encoding: + return regentry + + return None + + +def register(): + codecs.register(search_function) + + +def unregister(): + try: + codecs.unregister(search_function) + except AttributeError: + pass diff --git a/alternative_encodings/viscii.py b/alternative_encodings/viscii.py index 1e2434e..3b99f5d 100644 --- a/alternative_encodings/viscii.py +++ b/alternative_encodings/viscii.py @@ -40,25 +40,21 @@ # encodings module API +codec = Codec() -@lru_cache() -def getregentry(): - return codecs.CodecInfo( - name="viscii", - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) +regentry = codecs.CodecInfo( + name="viscii", + encode=codec.encode, + decode=codec.decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, +) -def search_function(encoding): - entry = getregentry() - if entry.name == encoding: - return entry +def search_function(encoding): + if regentry.name == encoding: + return regentry return None diff --git a/tests/test_cp866i.py b/tests/test_cp866i.py new file mode 100644 index 0000000..32d65af --- /dev/null +++ b/tests/test_cp866i.py @@ -0,0 +1,29 @@ +import codecs + +import pytest +import alternative_encodings.cp866i as cp866i +from utils import codec + + +@pytest.fixture(scope="module", autouse=True) +def register_codec(): + with codec(cp866i): + yield + + +source_data = "У Іўі худы жвавы чорт у зялёнай камізэльцы пабег пад'есці фаршу з юшкай" +encoded = ( + b"\x93 I\xf7i \xe5\xe3\xa4\xeb \xa6\xa2\xa0\xa2\xeb \xe7\xae\xe0\xe2 \xe3 \xa7\xef\xab\xf1\xad\xa0\xa9 " + + b"\xaa\xa0\xaci\xa7\xed\xab\xec\xe6\xeb \xaf\xa0\xa1\xa5\xa3 \xaf\xa0\xa4'\xa5\xe1\xe6i \xe4\xa0\xe0\xe8\xe3 " + + b"\xa7 \xee\xe8\xaa\xa0\xa9" +) + + +def test_viscii_encode(): + assert codecs.encode(source_data, "cp866i") == encoded + + +def test_viscii_decode(): + result = codecs.decode(encoded, "cp866i") + result = result.replace("i", "і").replace("I", "І") + assert result == source_data