Skip to content

Commit

Permalink
sync some calibre's updates
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Oct 15, 2024
1 parent 796d9bd commit 1ed3285
Show file tree
Hide file tree
Showing 19 changed files with 555 additions and 154 deletions.
5 changes: 3 additions & 2 deletions application/lib/calibre/ebooks/epub/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@

import re
from itertools import count
from calibre.ebooks.oeb.base import XHTML_NS
from calibre.ebooks.oeb.base import OEBBook

from lxml.etree import XPath

from calibre.ebooks.oeb.base import XHTML_NS, OEBBook

NSMAP = {'h': XHTML_NS, 'html': XHTML_NS, 'xhtml': XHTML_NS}
PAGE_RE = re.compile(r'page', re.IGNORECASE)
ROMAN_RE = re.compile(r'^[ivxlcdm]+$', re.IGNORECASE)
Expand Down
5 changes: 3 additions & 2 deletions application/lib/calibre/ebooks/epub/periodical.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
__copyright__ = '2010, Kovid Goyal <[email protected]>'
__docformat__ = 'restructuredtext en'

from uuid import uuid4
import time
from uuid import uuid4

from calibre import prepare_string_for_xml as xml
from calibre import strftime
from calibre.constants import __appname__, __version__
from calibre import strftime, prepare_string_for_xml as xml
from calibre.utils.date import parse_date

SONY_METADATA = '''\
Expand Down
113 changes: 102 additions & 11 deletions application/lib/calibre/ebooks/html_entities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>

from calibre import my_unichr

html5_entities = {
# ENTITY_DATA {{{
Expand Down Expand Up @@ -91,7 +92,7 @@
'DifferentialD': 'ⅆ',
'Dopf': '𝔻',
'Dot': '¨',
'DotDot': '⃜\u20dc',
'DotDot': '⃜',
'DotEqual': '≐',
'DoubleContourIntegral': '∯',
'DoubleDot': '¨',
Expand Down Expand Up @@ -502,7 +503,7 @@
'TRADE': '™',
'TSHcy': 'Ћ',
'TScy': 'Ц',
'Tab': ' ',
'Tab': '\t',
'Tau': 'Τ',
'Tcaron': 'Ť',
'Tcedil': 'Ţ',
Expand Down Expand Up @@ -1105,6 +1106,7 @@
'hearts': '♥',
'heartsuit': '♥',
'hellip': '…',
'hellips': '…',
'hercon': '⊹',
'hfr': '𝔥',
'hksearow': '⤥',
Expand Down Expand Up @@ -1857,6 +1859,7 @@
'square': '□',
'squarf': '▪',
'squf': '▪',
'squot': "'",
'srarr': '→',
'sscr': '𝓈',
'ssetmn': '∖',
Expand Down Expand Up @@ -2133,19 +2136,107 @@
}


if __name__ == '__main__':
def entity_to_unicode_in_python(match, exceptions=(), encoding='cp1252', result_exceptions={}):
def check(ch):
return result_exceptions.get(ch, ch)

ent = match.group(1)
if ent in exceptions:
return '&'+ent+';'
if ent in {'apos', 'squot'}: # squot is generated by some broken CMS software
return check("'")
if ent == 'hellips':
ent = 'hellip'
if ent.startswith('#'):
try:
if ent[1] in ('x', 'X'):
num = int(ent[2:], 16)
else:
num = int(ent[1:])
except:
return '&'+ent+';'
if encoding is None or num > 255:
return check(my_unichr(num))
try:
return check(bytes(bytearray((num,))).decode(encoding))
except UnicodeDecodeError:
return check(my_unichr(num))
from calibre.ebooks.html_entities import html5_entities
try:
return check(html5_entities[ent])
except KeyError:
pass
from polyglot.html_entities import name2codepoint
try:
return check(my_unichr(name2codepoint[ent]))
except KeyError:
return '&'+ent+';'


def find_tests():
import unittest
class TestHTMLEntityReplacement(unittest.TestCase):
def test_html_entity_replacement(self):
from calibre_extensions.fast_html_entities import replace_all_entities
def t(inp, exp):
self.assertEqual(exp, replace_all_entities(inp), f'Failed for input: {inp!r}')
def x(inp, exp):
self.assertEqual(exp, replace_all_entities(inp, True), f'Failed for input: {inp!r}')
t('a&#1234;b', 'aӒb')
t('', '')
t('a', 'a')
t('&', '&')
t('&amp', '&amp')
t('&amp;', '&')
t('a&;b &#;c', 'a&;b &#;c')
t('&lt;', '<')
t('&amp;&lt;', '&<')
t('a&amp;b&lt;c', 'a&b<c')
t('a&acE;b', 'a∾̳b')
t('a&#1234;b', 'aӒb')
t('a&#X1234;b', 'a\u1234b')
t('a&#x1034fA;b', 'a\U001034fAb')
t('a&#0;b&#x000;c', 'abc')
x('&amp;&lt;&gt;&apos;&quot;', '&amp;&lt;&gt;&apos;&quot;')

return unittest.defaultTestLoader.loadTestsFromTestCase(TestHTMLEntityReplacement)


def generate_entity_lists():
import re
from html5lib.constants import entities
entities = {k.replace(';', ''): entities[k] for k in entities}
from html import entities as e
entities = {k.rstrip(';'): e.name2codepoint[k] for k in e.name2codepoint}
entities.update({k.rstrip(';'): e.html5[k] for k in e.html5})
# common misspelled entity names
for k, v in {'squot': "'", 'hellips': entities['hellip']}.items():
if k not in entities:
entities[k] = v
lines = []
native_lines = '''\
struct html_entity { const char *name, *val; }
%%
'''.splitlines()

def esc_for_c(x):
if x == '\n':
return '\\n'
if x in '''"\\''':
return '\\' + x
return x

for k in sorted(entities):
lines.append(f" '{k}': {entities[k]!r},")
v = entities[k]
lines.append(f" '{k}': {v!r},")
native_lines.append(f'"{esc_for_c(k)}","{esc_for_c(v)}"')

with open(__file__, 'r+b') as f:
raw = f.read().decode('utf-8')
raw = re.sub(r'^# ENTITY_DATA {{{.+^# }}}',
'# ENTITY_DATA {{{\n' + '\n'.join(lines) + '\n# }}}',
raw, flags=re.M | re.DOTALL)
f.seek(0), f.truncate()
f.write(raw.encode('utf-8'))
pat = re.compile(r'^# ENTITY_DATA {{{.+?^# }}}', flags=re.M | re.DOTALL)
raw = pat.sub(lambda m: '# ENTITY_DATA {{{\n' + '\n'.join(lines) + '\n# }}}', raw)
f.seek(0), f.truncate(), f.write(raw.encode('utf-8'))

import subprocess
with open(__file__.replace('.py', '.h'), 'wb') as f:
cp = subprocess.run(['gperf', '--struct-type', '--readonly', '--includes', '--compare-strncmp'], input='\n'.join(native_lines).encode(), stdout=f)
if cp.returncode != 0:
raise SystemExit(cp.returncode)
7 changes: 6 additions & 1 deletion application/lib/calibre/library/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ def generate_test_db(library_path, # {{{
max_authors=10,
max_tags=10
):
import random, string, os, sys, time
import os
import random
import string
import sys
import time

from calibre.constants import preferred_encoding

if not os.path.exists(library_path):
Expand Down
13 changes: 7 additions & 6 deletions application/lib/calibre/library/comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@

from calibre import prepare_string_for_xml
from calibre.constants import preferred_encoding
from calibre.ebooks.BeautifulSoup import (
BeautifulSoup, CData, Comment, Declaration, NavigableString,
ProcessingInstruction
)
from calibre.ebooks.BeautifulSoup import BeautifulSoup, CData, Comment, Declaration, NavigableString, ProcessingInstruction
from calibre.utils.html2text import html2text

# Hackish - ignoring sentences ending or beginning in numbers to avoid
Expand Down Expand Up @@ -168,7 +165,11 @@ def test_comments_to_html(self):
('a <?xml asd> b\n\ncd',
'<p class="description">a b</p><p class="description">cd</p>'),
]:
cval = comments_to_html(pat)
self.assertEqual(cval, val)
try:
cval = comments_to_html(pat)
except DeprecationWarning:
pass # new lxml + old Beautiful soup == deprecation warning
else:
self.assertEqual(cval, val)

return unittest.defaultTestLoader.loadTestsFromTestCase(Test)
4 changes: 1 addition & 3 deletions application/lib/calibre/utils/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@
from calibre.constants import DEBUG
from calibre.ebooks.metadata.book.base import field_metadata
from calibre.utils.config import tweaks
from calibre.utils.formatter_functions import (
StoredObjectType, formatter_functions, function_object_type, get_database,
)
from calibre.utils.formatter_functions import StoredObjectType, formatter_functions, function_object_type, get_database
from calibre.utils.icu import strcmp
from calibre.utils.localization import _
from polyglot.builtins import error_message
Expand Down
3 changes: 2 additions & 1 deletion application/lib/calibre/utils/imghdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>


from struct import unpack, error
import os
from struct import error, unpack

from calibre.utils.speedups import ReadOnlyFileBuffer
from polyglot.builtins import string_or_bytes

Expand Down
4 changes: 2 additions & 2 deletions application/lib/calibre/utils/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ def encoder(obj):
if isinstance(obj, (set, frozenset)):
return encoded(1, tuple(obj), ExtType)
if getattr(obj, '__calibre_serializable__', False):
from calibre.db.categories import Tag
from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.field_metadata import FieldMetadata, fm_as_dict
from calibre.db.categories import Tag
if isinstance(obj, Metadata):
from calibre.ebooks.metadata.book.serialize import metadata_as_dict
return encoded(
Expand Down Expand Up @@ -60,8 +60,8 @@ def json_dumps(data, **kw):


def decode_metadata(x, for_json):
from polyglot.binary import from_base64_bytes
from calibre.ebooks.metadata.book.serialize import metadata_from_dict
from polyglot.binary import from_base64_bytes
obj = metadata_from_dict(x)
if for_json and obj.cover_data and obj.cover_data[1]:
obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1])
Expand Down
4 changes: 3 additions & 1 deletion application/lib/calibre/utils/short_uuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
Generate UUID encoded using a user specified alphabet.
'''

import string, math, uuid as _uuid
import math
import string
import uuid as _uuid


def num_to_string(number, alphabet, alphabet_len, pad_to_length=None):
Expand Down
2 changes: 2 additions & 0 deletions application/lib/calibre/utils/smartypants.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,8 @@ def test_ordinal_numbers(self):

def test_educated_quotes(self):
self.assertEqual(sp('''"Isn't this fun?"'''), '''&#8220;Isn&#8217;t this fun?&#8221;''')
self.assertEqual(sp("'abc'"), '&#8216;abc&#8217;')


tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestSmartypantsAllAttributes)
if return_tests:
Expand Down
8 changes: 6 additions & 2 deletions application/lib/calibre/utils/speedups.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ class ReadOnlyFileBuffer:

''' A zero copy implementation of a file like object. Uses memoryviews for efficiency. '''

def __init__(self, raw):
def __init__(self, raw: bytes, name: str = ''):
self.sz, self.mv = len(raw), (raw if isinstance(raw, memoryview) else memoryview(raw))
self.pos = 0
self.name: str = name

def tell(self):
return self.pos

def read(self, n=None):
def read(self, n = None) -> memoryview:
if n is None:
ans = self.mv[self.pos:]
self.pos = self.sz
Expand All @@ -35,6 +36,9 @@ def seek(self, pos, whence=os.SEEK_SET):
self.pos = max(0, min(self.pos, self.sz))
return self.pos

def seekable(self):
return True

def getvalue(self):
return self.mv

Expand Down
10 changes: 7 additions & 3 deletions application/lib/calibre/utils/terminal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

import os, sys, re
import os
import re
import sys

from calibre.prints import is_binary
from calibre.constants import iswindows
from calibre.prints import is_binary
from polyglot.builtins import iteritems

if iswindows:
Expand Down Expand Up @@ -233,7 +235,9 @@ class CONSOLE_SCREEN_BUFFER_INFO(Structure):


def get_term_geometry():
import fcntl, termios, struct
import fcntl
import struct
import termios

def ioctl_GWINSZ(fd):
try:
Expand Down
7 changes: 4 additions & 3 deletions application/lib/calibre/utils/threadpool.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@

# standard library modules
import threading

from polyglot import queue

# exceptions
Expand All @@ -72,7 +73,7 @@ class WorkerThread(threading.Thread):
"""

def __init__(self, requestsQueue, resultsQueue, **kwds):
"""Set up thread in daemonic mode and start it immediatedly.
"""Set up thread in daemonic mode and start it immediately.
requestsQueue and resultQueue are instances of queue.Queue passed
by the ThreadPool class when it creates a new worker thread.
Expand All @@ -96,9 +97,9 @@ def run(self):
break # and exit
try:
self.resultQueue.put(
(request, request.callable(request.args, **request.kwds))
(request, request.callable(*request.args, **request.kwds))
)
except Exception as e:
except:
request.exception = True
import traceback
self.resultQueue.put((request, traceback.format_exc()))
Expand Down
Loading

0 comments on commit 1ed3285

Please sign in to comment.