Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve serialization #253

Merged
merged 1 commit into from
Nov 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 38 additions & 17 deletions foamlib/_files/_files.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import os
import sys
from copy import deepcopy
from typing import Any, Optional, Tuple, Union, cast
Expand All @@ -16,7 +17,7 @@

from ._base import FoamFileBase
from ._io import FoamFileIO
from ._serialization import Kind, dumps
from ._serialization import Kind, dumps, normalize
from ._util import is_sequence


Expand Down Expand Up @@ -216,12 +217,28 @@ def __setitem__(
or keywords[2].endswith("Gradient")
)
):
kind = Kind.BINARY_FIELD if self.format == "binary" else Kind.FIELD
if self.format == "binary":
arch = self.get(("FoamFile", "arch"), default=None)
assert arch is None or isinstance(arch, str)
if (arch is not None and "scalar=32" in arch) or (
arch is None
and os.environ.get("WM_PRECISION_OPTION", default="DP") == "SP"
):
kind = Kind.SINGLE_PRECISION_BINARY_FIELD
else:
kind = Kind.DOUBLE_PRECISION_BINARY_FIELD
else:
kind = Kind.ASCII_FIELD
elif keywords == ("dimensions",):
kind = Kind.DIMENSIONS

if (
kind in (Kind.FIELD, Kind.BINARY_FIELD)
kind
in (
Kind.ASCII_FIELD,
Kind.DOUBLE_PRECISION_BINARY_FIELD,
Kind.SINGLE_PRECISION_BINARY_FIELD,
)
) and self.class_ == "dictionary":
if isinstance(data, (int, float)):
self.class_ = "volScalarField"
Expand Down Expand Up @@ -288,21 +305,25 @@ def __setitem__(
for k, v in data.items():
self[(*keywords, k)] = v

elif keywords:
parsed.put(
keywords,
deepcopy(data),
before
+ indentation
+ dumps(keywords[-1])
+ b" "
+ dumps(data, kind=kind)
+ b";"
+ after,
)

else:
parsed.put((), deepcopy(data), before + dumps(data, kind=kind) + after)
data = normalize(data)
assert not isinstance(data, Mapping)

if keywords:
parsed.put(
keywords,
data,
before
+ indentation
+ dumps(keywords[-1])
+ b" "
+ dumps(data, kind=kind)
+ b";"
+ after,
)

else:
parsed.put((), data, before + dumps(data, kind=kind) + after)

def __delitem__(self, keywords: str | tuple[str, ...] | None) -> None:
if not keywords:
Expand Down
109 changes: 90 additions & 19 deletions foamlib/_files/_serialization.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

import array
import contextlib
import itertools
import re
import sys
from enum import Enum, auto
from typing import cast
Expand All @@ -25,38 +27,97 @@
class Kind(Enum):
DEFAULT = auto()
SINGLE_ENTRY = auto()
FIELD = auto()
BINARY_FIELD = auto()
ASCII_FIELD = auto()
DOUBLE_PRECISION_BINARY_FIELD = auto()
SINGLE_PRECISION_BINARY_FIELD = auto()
DIMENSIONS = auto()


_TOKENS = re.compile(r'(?:[^\s"]|"(?:[^"])*")+')


def normalize(
data: FoamFileBase.Data, *, kind: Kind = Kind.DEFAULT
) -> FoamFileBase.Data:
if numpy and isinstance(data, np.ndarray):
ret = data.tolist()
assert isinstance(ret, list)
return ret

if kind == Kind.SINGLE_ENTRY and isinstance(data, tuple):
return normalize(list(data))

if isinstance(data, Mapping):
return {k: normalize(v, kind=kind) for k, v in data.items()}

if (
kind == Kind.DIMENSIONS
and is_sequence(data)
and len(data) <= 7
and all(isinstance(d, (int, float)) for d in data)
):
data = cast(Sequence[float], data)
return FoamFileBase.DimensionSet(*data)

if is_sequence(data) and not isinstance(data, tuple):
return [normalize(d, kind=Kind.SINGLE_ENTRY) for d in data]

if isinstance(data, str):
with contextlib.suppress(ValueError):
return int(data)

with contextlib.suppress(ValueError):
return float(data)

tokens = re.findall(_TOKENS, data)

if len(tokens) == 1:
return tokens[0] # type: ignore [no-any-return]

return tuple(tokens) if kind != Kind.SINGLE_ENTRY else " ".join(tokens)

if isinstance(data, FoamFileBase.Dimensioned):
value = normalize(data.value, kind=Kind.SINGLE_ENTRY)
assert isinstance(value, (int, float, list))
return FoamFileBase.Dimensioned(value, data.dimensions, data.name)

if isinstance(
data,
(int, float, bool, tuple, FoamFileBase.DimensionSet),
):
return data

msg = f"Unsupported data type: {type(data)}"
raise TypeError(msg)


def dumps(
data: FoamFileBase.Data,
*,
kind: Kind = Kind.DEFAULT,
) -> bytes:
if numpy and isinstance(data, np.ndarray):
return dumps(data.tolist(), kind=kind)
data = normalize(data, kind=kind)

if isinstance(data, Mapping):
entries = []
for k, v in data.items():
b = dumps(v, kind=kind)
if isinstance(v, Mapping):
entries.append(dumps(k) + b" {" + b + b"}")
elif not b:
entries.append(dumps(k) + b" {" + dumps(v) + b"}")
elif not v:
entries.append(dumps(k) + b";")
else:
entries.append(dumps(k) + b" " + b + b";")
entries.append(dumps(k) + b" " + dumps(v) + b";")

return b" ".join(entries)

if isinstance(data, FoamFileBase.DimensionSet) or (
kind == Kind.DIMENSIONS and is_sequence(data) and len(data) == 7
):
if isinstance(data, FoamFileBase.DimensionSet):
return b"[" + b" ".join(dumps(v) for v in data) + b"]"

if kind in (Kind.FIELD, Kind.BINARY_FIELD) and (
if kind in (
Kind.ASCII_FIELD,
Kind.DOUBLE_PRECISION_BINARY_FIELD,
Kind.SINGLE_PRECISION_BINARY_FIELD,
) and (
isinstance(data, (int, float))
or is_sequence(data)
and data
Expand All @@ -65,7 +126,11 @@ def dumps(
):
return b"uniform " + dumps(data, kind=Kind.SINGLE_ENTRY)

if kind in (Kind.FIELD, Kind.BINARY_FIELD) and is_sequence(data):
if kind in (
Kind.ASCII_FIELD,
Kind.DOUBLE_PRECISION_BINARY_FIELD,
Kind.SINGLE_PRECISION_BINARY_FIELD,
) and is_sequence(data):
if data and isinstance(data[0], (int, float)):
tensor_kind = b"scalar"
elif is_sequence(data[0]) and data[0] and isinstance(data[0][0], (int, float)):
Expand All @@ -80,25 +145,28 @@ def dumps(
else:
return dumps(data)

if kind == Kind.BINARY_FIELD:
if kind in (
Kind.DOUBLE_PRECISION_BINARY_FIELD,
Kind.SINGLE_PRECISION_BINARY_FIELD,
):
typecode = "f" if kind == Kind.SINGLE_PRECISION_BINARY_FIELD else "d"
if tensor_kind == b"scalar":
data = cast(Sequence[float], data)
contents = b"(" + array.array("d", data).tobytes() + b")"
contents = b"(" + array.array(typecode, data).tobytes() + b")"
else:
data = cast(Sequence[Sequence[float]], data)
contents = (
b"("
+ array.array("d", itertools.chain.from_iterable(data)).tobytes()
+ array.array(
typecode, itertools.chain.from_iterable(data)
).tobytes()
+ b")"
)
else:
contents = dumps(data, kind=Kind.SINGLE_ENTRY)

return b"nonuniform List<" + tensor_kind + b"> " + dumps(len(data)) + contents

if kind != Kind.SINGLE_ENTRY and isinstance(data, tuple):
return b" ".join(dumps(v) for v in data)

if isinstance(data, FoamFileBase.Dimensioned):
if data.name is not None:
return (
Expand All @@ -114,6 +182,9 @@ def dumps(
+ dumps(data.value, kind=Kind.SINGLE_ENTRY)
)

if isinstance(data, tuple):
return b" ".join(dumps(v) for v in data)

if is_sequence(data):
return b"(" + b" ".join(dumps(v, kind=Kind.SINGLE_ENTRY) for v in data) + b")"

Expand Down
28 changes: 17 additions & 11 deletions tests/test_files/test_dumps.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,36 @@ def test_serialize_data() -> None:
assert dumps("word") == b"word"
assert dumps(("word", "word")) == b"word word"
assert dumps('"a string"') == b'"a string"'
assert dumps(1, kind=Kind.FIELD) == b"uniform 1"
assert dumps(1.0, kind=Kind.FIELD) == b"uniform 1.0"
assert dumps(1.0e-3, kind=Kind.FIELD) == b"uniform 0.001"
assert dumps(1, kind=Kind.ASCII_FIELD) == b"uniform 1"
assert dumps(1.0, kind=Kind.ASCII_FIELD) == b"uniform 1.0"
assert dumps(1.0e-3, kind=Kind.ASCII_FIELD) == b"uniform 0.001"
assert dumps([1.0, 2.0, 3.0]) == b"(1.0 2.0 3.0)"
assert dumps([1, 2, 3], kind=Kind.FIELD) == b"uniform (1 2 3)"
assert dumps([1, 2, 3], kind=Kind.ASCII_FIELD) == b"uniform (1 2 3)"
assert (
dumps([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], kind=Kind.FIELD)
dumps([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], kind=Kind.ASCII_FIELD)
== b"nonuniform List<scalar> 10(1 2 3 4 5 6 7 8 9 10)"
)
assert (
dumps([[1, 2, 3], [4, 5, 6]], kind=Kind.FIELD)
dumps([[1, 2, 3], [4, 5, 6]], kind=Kind.ASCII_FIELD)
== b"nonuniform List<vector> 2((1 2 3) (4 5 6))"
)
assert dumps(1, kind=Kind.BINARY_FIELD) == b"uniform 1"
assert dumps(1.0, kind=Kind.BINARY_FIELD) == b"uniform 1.0"
assert dumps([1, 2, 3], kind=Kind.BINARY_FIELD) == b"uniform (1 2 3)"
assert dumps(1, kind=Kind.DOUBLE_PRECISION_BINARY_FIELD) == b"uniform 1"
assert dumps(1.0, kind=Kind.DOUBLE_PRECISION_BINARY_FIELD) == b"uniform 1.0"
assert (
dumps([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], kind=Kind.BINARY_FIELD)
dumps([1, 2, 3], kind=Kind.DOUBLE_PRECISION_BINARY_FIELD) == b"uniform (1 2 3)"
)
assert (
dumps([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], kind=Kind.DOUBLE_PRECISION_BINARY_FIELD)
== b'nonuniform List<scalar> 10(\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x08@\x00\x00\x00\x00\x00\x00\x10@\x00\x00\x00\x00\x00\x00\x14@\x00\x00\x00\x00\x00\x00\x18@\x00\x00\x00\x00\x00\x00\x1c@\x00\x00\x00\x00\x00\x00 @\x00\x00\x00\x00\x00\x00"@\x00\x00\x00\x00\x00\x00$@)'
)
assert (
dumps([[1, 2, 3], [4, 5, 6]], kind=Kind.BINARY_FIELD)
dumps([[1, 2, 3], [4, 5, 6]], kind=Kind.DOUBLE_PRECISION_BINARY_FIELD)
== b"nonuniform List<vector> 2(\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x08@\x00\x00\x00\x00\x00\x00\x10@\x00\x00\x00\x00\x00\x00\x14@\x00\x00\x00\x00\x00\x00\x18@)"
)
assert (
dumps([1, 2], kind=Kind.SINGLE_PRECISION_BINARY_FIELD)
== b"nonuniform List<scalar> 2(\x00\x00\x80?\x00\x00\x00@)"
)
assert (
dumps(FoamFile.DimensionSet(mass=1, length=1, time=-2)) == b"[1 1 -2 0 0 0 0]"
)
Expand Down
Loading