Skip to content

Commit

Permalink
fix php serialize
Browse files Browse the repository at this point in the history
  • Loading branch information
trim21 committed Sep 21, 2024
1 parent 8f5f15f commit f022dd6
Show file tree
Hide file tree
Showing 7 changed files with 281 additions and 31 deletions.
4 changes: 2 additions & 2 deletions Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ tasks:
- .env
sources:
- '*.py'
- 'chii/**.py'
- 'rpc/**.py'
- 'chii/**/*.py'
- 'rpc/**/*.py'
generates:
- a-file-not-exists-so-it-always-rerun
cmd: python start_grpc_server.py
Expand Down
126 changes: 126 additions & 0 deletions chii/compat/phpseralize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,20 @@
Copyright 2007-2016 by Armin Ronacher.
"""

from __future__ import annotations

from collections import OrderedDict
from io import BytesIO
from types import MappingProxyType
from typing import Any, Mapping

default_errors = "strict"

__all__ = (
"dict_to_list",
"load",
"loads",
"dumps",
)


Expand Down Expand Up @@ -139,3 +145,123 @@ def dict_to_list(d):
return [d[x] for x in range(len(d))]
except KeyError as e: # pragma: no cover
raise ValueError("dict is not a sequence") from e


class PHPSerializeError(Exception):
"""Bencode encode error."""


def dumps(value: Any, /) -> bytes:
"""Encode value into the phpserialize format."""
with BytesIO() as r:
__encode(value, r, set())
return r.getvalue()


def __encode(value: Any, r: BytesIO, seen: set[int]) -> None:
if isinstance(value, str):
return __encode_bytes(value.encode("UTF-8"), r)

if isinstance(value, int):
return __encode_int(value, r)

if isinstance(value, float):
r.write(f"d:{value};".encode())
return None

if isinstance(value, bytes):
return __encode_bytes(value, r)

if isinstance(value, bool):
if value:
r.write(b"b:1;")
else:
r.write(b"b:0;")
return None

if value is None:
r.write(b"N;")
return None

i = id(value)
if isinstance(value, (dict, OrderedDict, MappingProxyType)):
if i in seen:
raise PHPSerializeError(f"circular reference found {value!r}")
seen.add(i)
__encode_mapping(value, r, seen)
seen.remove(i)
return None

if isinstance(value, (list, tuple)):
if i in seen:
raise PHPSerializeError(f"circular reference found {value!r}")
seen.add(i)

r.write(f"a:{len(value)}:{{".encode())
for index, item in enumerate(value):
__encode_int(index, r)
__encode(item, r, seen)
r.write(b"}")

seen.remove(i)
return None

if isinstance(value, bytearray):
__encode_bytes(bytes(value), r)
return None

raise TypeError(f"type '{type(value)!r}' not supported")


def __encode_int(value: int, r: BytesIO) -> None:
r.write(b"i:")
# will handle bool and enum.IntEnum
r.write(str(int(value)).encode())
r.write(b";")


def __encode_bytes(x: bytes, r: BytesIO) -> None:
r.write(b"s:")
r.write(str(len(x)).encode())
r.write(b':"')
r.write(x)
r.write(b'";')


def __encode_mapping(x: Mapping[Any, Any], r: BytesIO, seen: set[int]) -> None:
r.write(b"a:")
r.write(str(len(x)).encode())
r.write(b":{")

# force all keys to bytes, because str and bytes are incomparable
for k, v in x.items():
__encode_bytes(__key_to_binary(k), r)
__encode(v, r, seen)

r.write(b"}")


def __check_duplicated_keys(s: list[tuple[bytes, object]]) -> None:
last_key: bytes = s[0][0]
for current, _ in s[1:]:
if last_key == current:
raise PHPSerializeError(
f"find duplicated keys {last_key!r} and {current.decode()}"
)
last_key = current


def __key_to_binary(key: Any) -> bytes:
if isinstance(key, bytes):
return key

if isinstance(key, str):
return key.encode()

if isinstance(key, int):
return str(key).encode()

if key is None:
return b""

raise TypeError(f"expected value as dict key {key!r}")
129 changes: 126 additions & 3 deletions chii/db/tables.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import datetime
import html
import time
from dataclasses import dataclass, field
from typing import cast

from sqlalchemy import Column, text
from sqlalchemy import Column, String, text, types
from sqlalchemy.dialects.mysql import (
CHAR,
INTEGER,
Expand Down Expand Up @@ -59,8 +60,8 @@ class ChiiTimeline:
)
},
)
memo: str = field(
default="", metadata={"sa": Column("tml_memo", MEDIUMTEXT, nullable=False)}
memo: bytes = field(
default=b"", metadata={"sa": Column("tml_memo", MEDIUMTEXT, nullable=False)}
)
img: str = field(
default="",
Expand Down Expand Up @@ -107,3 +108,125 @@ class ChiiTimeline:
# type helper for ChiiTimeline.uid.desc()
ChiiTimeline_column_id: Column[int] = cast(Column[int], ChiiTimeline.id)
ChiiTimeline_column_uid: Column[int] = cast(Column[int], ChiiTimeline.uid)


class HTMLEscapedString(types.TypeDecorator):
impl = String

cache_ok = True

def process_bind_param(self, value, dialect):
"""python value to db value"""
return html.escape(value)

def process_result_value(self, value, dialect):
"""db value to python value"""
return html.unescape(value)


@reg.mapped
@dataclass(kw_only=True)
class ChiiSubject:
__tablename__ = "chii_subjects"
__sa_dataclass_metadata_key__ = "sa"

id: int = field(
metadata={"sa": Column("subject_id", MEDIUMINT(8), primary_key=True)}
)
type_id: int = field(
metadata={
"sa": Column("subject_type_id", SMALLINT(6), server_default=text("'0'"))
}
)
name: str = field(metadata={"sa": Column("subject_name", HTMLEscapedString(80))})
name_cn: str = field(
metadata={"sa": Column("subject_name_cn", HTMLEscapedString(80))}
)
user_id: str = field(
metadata={"sa": Column("subject_uid", String(20), comment="isbn / imdb")}
)
creator: int = field(metadata={"sa": Column("subject_creator", MEDIUMINT(8))})
created_at: int = field(
metadata={
"sa": Column("subject_dateline", INTEGER(10), server_default=text("'0'"))
}
)
image: str = field(metadata={"sa": Column("subject_image", String(255))})
platform: int = field(
metadata={
"sa": Column("subject_platform", SMALLINT(6), server_default=text("'0'"))
}
)
infobox: str = field(metadata={"sa": Column("field_infobox", MEDIUMTEXT)})
summary: str = field(
metadata={"sa": Column("field_summary", MEDIUMTEXT, comment="summary")}
)
author_summary: str = field(
metadata={"sa": Column("field_5", MEDIUMTEXT, comment="author summary")}
)
volumes: int = field(
metadata={
"sa": Column(
"field_volumes",
MEDIUMINT(8),
server_default=text("'0'"),
comment="卷数",
)
}
)
eps: int = field(
metadata={"sa": Column("field_eps", MEDIUMINT(8), server_default=text("'0'"))}
)
wish: int = field(
metadata={
"sa": Column("subject_wish", MEDIUMINT(8), server_default=text("'0'"))
}
)
collect: int = field(
metadata={
"sa": Column("subject_collect", MEDIUMINT(8), server_default=text("'0'"))
}
)
doing: int = field(
metadata={
"sa": Column("subject_doing", MEDIUMINT(8), server_default=text("'0'"))
}
)
on_hold: int = field(
metadata={
"sa": Column(
"subject_on_hold",
MEDIUMINT(8),
server_default=text("'0'"),
comment="搁置人数",
)
}
)
dropped: int = field(
metadata={
"sa": Column(
"subject_dropped",
MEDIUMINT(8),
server_default=text("'0'"),
comment="抛弃人数",
)
}
)
series: int = field(
metadata={
"sa": Column("subject_series", TINYINT(1), server_default=text("'0'"))
}
)
series_entry: int = field(
metadata={
"sa": Column(
"subject_series_entry", MEDIUMINT(8), server_default=text("'0'")
)
}
)
idx_cn: str = field(metadata={"sa": Column("subject_idx_cn", String(1))})
airtime: int = field(metadata={"sa": Column("subject_airtime", TINYINT(1))})
nsfw: int = field(metadata={"sa": Column("subject_nsfw", TINYINT(1))})
ban: int = field(
metadata={"sa": Column("subject_ban", TINYINT(1), server_default=text("'0'"))}
)
9 changes: 9 additions & 0 deletions chii/db/tables_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from chii.db.sa import sync_session_maker
from chii.db.tables import ChiiSubject


def test_subject_get():
SessionMaker = sync_session_maker()
with SessionMaker() as session:
s = session.get(ChiiSubject, 333707)
assert s.name == "Love, Death & Robots Volume 3"
13 changes: 1 addition & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ python = "^3.10"
# dependencies
SQLAlchemy = { version = "2.0.35" }
grpcio = "1.66.1"
libphpserialize = "0.0.8"
pymysql = "1.1.1"
pydantic = "2.9.2"
pydantic-settings = "2.5.2"
Expand Down Expand Up @@ -68,7 +67,9 @@ extend-exclude = "api/v1"

[tool.ruff]
extend-exclude = [".venv", "api"]
target-version = "py310"

[tool.ruff.lint]
select = [
"B",
"C",
Expand Down Expand Up @@ -130,10 +131,9 @@ ignore = [
'TRY003',
'TRY201',
'TRY301',
'PLR0911',
'PLR0912',
'PLR0915',
'PLR2004',
'PGH003',
]

target-version = "py310"
Loading

0 comments on commit f022dd6

Please sign in to comment.