diff --git a/Taskfile.yaml b/Taskfile.yaml index 5bc5bf7..53b75ff 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -40,8 +40,8 @@ tasks: - .env sources: - '*.py' - - 'chii/**.py' - - 'rpc/**.py' + - 'chii/**/*.py' + - 'rpc/**/*.py' generates: - a-file-not-exists-so-it-always-rerun cmd: python start_grpc_server.py diff --git a/chii/compat/phpseralize.py b/chii/compat/phpseralize.py index b47192d..87928d0 100644 --- a/chii/compat/phpseralize.py +++ b/chii/compat/phpseralize.py @@ -8,7 +8,12 @@ Copyright 2007-2016 by Armin Ronacher. """ +from __future__ import annotations + +from collections import OrderedDict from io import BytesIO +from types import MappingProxyType +from typing import Any, Mapping default_errors = "strict" @@ -16,6 +21,7 @@ "dict_to_list", "load", "loads", + "dumps", ) @@ -139,3 +145,123 @@ def dict_to_list(d): return [d[x] for x in range(len(d))] except KeyError as e: # pragma: no cover raise ValueError("dict is not a sequence") from e + + +class PHPSerializeError(Exception): + """Bencode encode error.""" + + +def dumps(value: Any, /) -> bytes: + """Encode value into the phpserialize format.""" + with BytesIO() as r: + __encode(value, r, set()) + return r.getvalue() + + +def __encode(value: Any, r: BytesIO, seen: set[int]) -> None: + if isinstance(value, str): + return __encode_bytes(value.encode("UTF-8"), r) + + if isinstance(value, int): + return __encode_int(value, r) + + if isinstance(value, float): + r.write(f"d:{value};".encode()) + return None + + if isinstance(value, bytes): + return __encode_bytes(value, r) + + if isinstance(value, bool): + if value: + r.write(b"b:1;") + else: + r.write(b"b:0;") + return None + + if value is None: + r.write(b"N;") + return None + + i = id(value) + if isinstance(value, (dict, OrderedDict, MappingProxyType)): + if i in seen: + raise PHPSerializeError(f"circular reference found {value!r}") + seen.add(i) + __encode_mapping(value, r, seen) + seen.remove(i) + return None + + if isinstance(value, (list, tuple)): + if i in seen: + raise PHPSerializeError(f"circular reference found {value!r}") + seen.add(i) + + r.write(f"a:{len(value)}:{{".encode()) + for index, item in enumerate(value): + __encode_int(index, r) + __encode(item, r, seen) + r.write(b"}") + + seen.remove(i) + return None + + if isinstance(value, bytearray): + __encode_bytes(bytes(value), r) + return None + + raise TypeError(f"type '{type(value)!r}' not supported") + + +def __encode_int(value: int, r: BytesIO) -> None: + r.write(b"i:") + # will handle bool and enum.IntEnum + r.write(str(int(value)).encode()) + r.write(b";") + + +def __encode_bytes(x: bytes, r: BytesIO) -> None: + r.write(b"s:") + r.write(str(len(x)).encode()) + r.write(b':"') + r.write(x) + r.write(b'";') + + +def __encode_mapping(x: Mapping[Any, Any], r: BytesIO, seen: set[int]) -> None: + r.write(b"a:") + r.write(str(len(x)).encode()) + r.write(b":{") + + # force all keys to bytes, because str and bytes are incomparable + for k, v in x.items(): + __encode_bytes(__key_to_binary(k), r) + __encode(v, r, seen) + + r.write(b"}") + + +def __check_duplicated_keys(s: list[tuple[bytes, object]]) -> None: + last_key: bytes = s[0][0] + for current, _ in s[1:]: + if last_key == current: + raise PHPSerializeError( + f"find duplicated keys {last_key!r} and {current.decode()}" + ) + last_key = current + + +def __key_to_binary(key: Any) -> bytes: + if isinstance(key, bytes): + return key + + if isinstance(key, str): + return key.encode() + + if isinstance(key, int): + return str(key).encode() + + if key is None: + return b"" + + raise TypeError(f"expected value as dict key {key!r}") diff --git a/chii/db/tables.py b/chii/db/tables.py index aa75423..c06c363 100644 --- a/chii/db/tables.py +++ b/chii/db/tables.py @@ -1,9 +1,10 @@ import datetime +import html import time from dataclasses import dataclass, field from typing import cast -from sqlalchemy import Column, text +from sqlalchemy import Column, String, text, types from sqlalchemy.dialects.mysql import ( CHAR, INTEGER, @@ -59,8 +60,8 @@ class ChiiTimeline: ) }, ) - memo: str = field( - default="", metadata={"sa": Column("tml_memo", MEDIUMTEXT, nullable=False)} + memo: bytes = field( + default=b"", metadata={"sa": Column("tml_memo", MEDIUMTEXT, nullable=False)} ) img: str = field( default="", @@ -107,3 +108,125 @@ class ChiiTimeline: # type helper for ChiiTimeline.uid.desc() ChiiTimeline_column_id: Column[int] = cast(Column[int], ChiiTimeline.id) ChiiTimeline_column_uid: Column[int] = cast(Column[int], ChiiTimeline.uid) + + +class HTMLEscapedString(types.TypeDecorator): + impl = String + + cache_ok = True + + def process_bind_param(self, value, dialect): + """python value to db value""" + return html.escape(value) + + def process_result_value(self, value, dialect): + """db value to python value""" + return html.unescape(value) + + +@reg.mapped +@dataclass(kw_only=True) +class ChiiSubject: + __tablename__ = "chii_subjects" + __sa_dataclass_metadata_key__ = "sa" + + id: int = field( + metadata={"sa": Column("subject_id", MEDIUMINT(8), primary_key=True)} + ) + type_id: int = field( + metadata={ + "sa": Column("subject_type_id", SMALLINT(6), server_default=text("'0'")) + } + ) + name: str = field(metadata={"sa": Column("subject_name", HTMLEscapedString(80))}) + name_cn: str = field( + metadata={"sa": Column("subject_name_cn", HTMLEscapedString(80))} + ) + user_id: str = field( + metadata={"sa": Column("subject_uid", String(20), comment="isbn / imdb")} + ) + creator: int = field(metadata={"sa": Column("subject_creator", MEDIUMINT(8))}) + created_at: int = field( + metadata={ + "sa": Column("subject_dateline", INTEGER(10), server_default=text("'0'")) + } + ) + image: str = field(metadata={"sa": Column("subject_image", String(255))}) + platform: int = field( + metadata={ + "sa": Column("subject_platform", SMALLINT(6), server_default=text("'0'")) + } + ) + infobox: str = field(metadata={"sa": Column("field_infobox", MEDIUMTEXT)}) + summary: str = field( + metadata={"sa": Column("field_summary", MEDIUMTEXT, comment="summary")} + ) + author_summary: str = field( + metadata={"sa": Column("field_5", MEDIUMTEXT, comment="author summary")} + ) + volumes: int = field( + metadata={ + "sa": Column( + "field_volumes", + MEDIUMINT(8), + server_default=text("'0'"), + comment="卷数", + ) + } + ) + eps: int = field( + metadata={"sa": Column("field_eps", MEDIUMINT(8), server_default=text("'0'"))} + ) + wish: int = field( + metadata={ + "sa": Column("subject_wish", MEDIUMINT(8), server_default=text("'0'")) + } + ) + collect: int = field( + metadata={ + "sa": Column("subject_collect", MEDIUMINT(8), server_default=text("'0'")) + } + ) + doing: int = field( + metadata={ + "sa": Column("subject_doing", MEDIUMINT(8), server_default=text("'0'")) + } + ) + on_hold: int = field( + metadata={ + "sa": Column( + "subject_on_hold", + MEDIUMINT(8), + server_default=text("'0'"), + comment="搁置人数", + ) + } + ) + dropped: int = field( + metadata={ + "sa": Column( + "subject_dropped", + MEDIUMINT(8), + server_default=text("'0'"), + comment="抛弃人数", + ) + } + ) + series: int = field( + metadata={ + "sa": Column("subject_series", TINYINT(1), server_default=text("'0'")) + } + ) + series_entry: int = field( + metadata={ + "sa": Column( + "subject_series_entry", MEDIUMINT(8), server_default=text("'0'") + ) + } + ) + idx_cn: str = field(metadata={"sa": Column("subject_idx_cn", String(1))}) + airtime: int = field(metadata={"sa": Column("subject_airtime", TINYINT(1))}) + nsfw: int = field(metadata={"sa": Column("subject_nsfw", TINYINT(1))}) + ban: int = field( + metadata={"sa": Column("subject_ban", TINYINT(1), server_default=text("'0'"))} + ) diff --git a/chii/db/tables_test.py b/chii/db/tables_test.py new file mode 100644 index 0000000..11d7559 --- /dev/null +++ b/chii/db/tables_test.py @@ -0,0 +1,9 @@ +from chii.db.sa import sync_session_maker +from chii.db.tables import ChiiSubject + + +def test_subject_get(): + SessionMaker = sync_session_maker() + with SessionMaker() as session: + s = session.get(ChiiSubject, 333707) + assert s.name == "Love, Death & Robots Volume 3" diff --git a/poetry.lock b/poetry.lock index fc8685e..acd2a51 100644 --- a/poetry.lock +++ b/poetry.lock @@ -412,17 +412,6 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "libphpserialize" -version = "0.0.8" -description = "A port of PHP's serialize function, in pure python" -optional = false -python-versions = ">=3.7" -files = [ - {file = "libphpserialize-0.0.8-py3-none-any.whl", hash = "sha256:7f2940ae00e22978076323fc7471531eb95967d45edd940fa0f014466e4d9519"}, - {file = "libphpserialize-0.0.8.tar.gz", hash = "sha256:ad72e7ff47ddad6d576e7ff7b1ce26d0d4f0d2c0c5a578cb10c4181c39b34856"}, -] - [[package]] name = "more-itertools" version = "10.5.0" @@ -1143,4 +1132,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "7c4cdfbf6ecdf39cc8beed4d8c8059b92416b9fba076449efbb78b1416aec548" +content-hash = "9deae6ff84848edaf08417f7f0fc531743e318d93be4b5e2ecfe7e8f5188874b" diff --git a/pyproject.toml b/pyproject.toml index e274945..b91323a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,6 @@ python = "^3.10" # dependencies SQLAlchemy = { version = "2.0.35" } grpcio = "1.66.1" -libphpserialize = "0.0.8" pymysql = "1.1.1" pydantic = "2.9.2" pydantic-settings = "2.5.2" @@ -68,7 +67,9 @@ extend-exclude = "api/v1" [tool.ruff] extend-exclude = [".venv", "api"] +target-version = "py310" +[tool.ruff.lint] select = [ "B", "C", @@ -130,10 +131,9 @@ ignore = [ 'TRY003', 'TRY201', 'TRY301', + 'PLR0911', 'PLR0912', 'PLR0915', 'PLR2004', 'PGH003', ] - -target-version = "py310" diff --git a/rpc/timeline_service.py b/rpc/timeline_service.py index 3efabb3..ca2a730 100644 --- a/rpc/timeline_service.py +++ b/rpc/timeline_service.py @@ -2,7 +2,6 @@ import time from typing import Optional -import phpserialize as php import pydantic from grpc import RpcContext from sqlalchemy.orm import Session @@ -22,7 +21,11 @@ from chii.compat import phpseralize from chii.config import config from chii.db import sa -from chii.db.tables import ChiiTimeline, ChiiTimeline_column_id, ChiiTimeline_column_uid +from chii.db.tables import ( + ChiiTimeline, + ChiiTimeline_column_id, + ChiiTimeline_column_uid, +) from chii.timeline import ( SUBJECT_TYPE_MAP, ProgressMemo, @@ -92,9 +95,9 @@ def merge_previous_timeline( ): escaped = html.escape(req.comment) if tl.batch: - memo = BatchMeme.validate_python(phpseralize.loads(tl.memo.encode())) + memo = BatchMeme.validate_python(phpseralize.loads(tl.memo)) else: - m = SubjectMemo.model_validate(phpseralize.loads(tl.memo.encode())) + m = SubjectMemo.model_validate(phpseralize.loads(tl.memo)) if int(m.subject_id) == req.subject.id: # save request called twice, just ignore should_update = False @@ -107,7 +110,7 @@ def merge_previous_timeline( m.collect_rate = req.rate if should_update: - tl.memo = php.serialize(m.model_dump()) + tl.memo = phpseralize.dumps(m.model_dump()) session.add(tl) return @@ -121,7 +124,7 @@ def merge_previous_timeline( ) tl.batch = 1 - tl.memo = php.serialize( + tl.memo = phpseralize.dumps( {key: value.model_dump() for key, value in memo.items()} ) @@ -143,7 +146,7 @@ def create_subject_collection_timeline( cat=TimelineCat.Subject, type=type, uid=req.user_id, - memo=php.serialize(memo.model_dump()), + memo=phpseralize.dumps(memo.model_dump()), batch=0, related=str(req.subject.id), ) @@ -195,14 +198,14 @@ def __episode_collect(self, req: EpisodeCollectRequest) -> EpisodeCollectRespons and tl.batch == 0 and tl.related == str(req.subject.id) ): - tl.memo = php.serialize(memo.model_dump()) + tl.memo = phpseralize.dumps(memo.model_dump()) session.add(tl) return EpisodeCollectResponse(ok=True) session.add( ChiiTimeline( uid=req.user_id, - memo=php.serialize(memo.model_dump()), + memo=phpseralize.dumps(memo.model_dump()), cat=TimelineCat.Progress, type=tlType, source=5, @@ -264,7 +267,7 @@ def __subject_progress( and tl.batch == 0 and tl.related == str(req.subject.id) ): - tl.memo = php.serialize(memo.model_dump()) + tl.memo = phpseralize.dumps(memo.model_dump()) session.add(tl) session.commit() return SubjectProgressResponse(ok=True) @@ -272,7 +275,7 @@ def __subject_progress( session.add( ChiiTimeline( uid=req.user_id, - memo=php.serialize(memo.model_dump()), + memo=phpseralize.dumps(memo.model_dump()), cat=TimelineCat.Progress, type=tlType, batch=0,