Skip to content

Commit

Permalink
Update type-parser for v2 (#1514)
Browse files Browse the repository at this point in the history
* New grammar for v2 type strings; start with JSON again.

* Type.parameter should not return None.

* Developing type parser: through OptionType.

* Fix empty record type string.

* Named record parsing is working.

* Restrictions on record name in the type string.

* Parsing records works.

* Prevent record type string from using reserved words in name.

* Tests are done; this is a working v2 type parser.

* Add a standalone v2 types parser to the codebase.

* New v2 type-parser is done.

* Ignore pylint errors for the auto-generated code, too.
  • Loading branch information
jpivarski authored Jun 23, 2022
1 parent 20deffd commit 4ba1dbb
Show file tree
Hide file tree
Showing 9 changed files with 4,427 additions and 382 deletions.
283 changes: 0 additions & 283 deletions src/awkward/_typeparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,286 +316,3 @@ def toast_v1(ptnode, highlevel, categorical):
def from_datashape_v1(typestr, highlevel=False):
parseobj = Lark_StandAlone(transformer=TreeToJson())
return toast_v1(parseobj.parse(typestr), highlevel, False)


def toast(ptnode, highlevel, categorical):
if ptnode.__class__.__name__ == "Token":
return ptnode.value

elif ptnode.data == "start":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "input":
assert len(ptnode.children) == 1
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "predefined_typestr":
if ptnode.children[0] == "string":
parms = {"__array__": "string"}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.ListType(
ak._v2.types.NumpyType(
"uint8", parameters={"__array__": "char"}, typestr="char"
),
parameters=parms,
typestr="string",
)
elif ptnode.children[0] == "char":
parms = {"__array__": "char"}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.NumpyType("uint8", parameters=parms, typestr="char")
elif ptnode.children[0] == "byte":
parms = {"__array__": "byte"}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.NumpyType("uint8", parameters=parms, typestr="byte")
elif ptnode.children[0] == "bytes":
parms = {"__array__": "bytestring"}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.ListType(
ak._v2.types.NumpyType(
"uint8", parameters={"__array__": "byte"}, typestr="byte"
),
parameters=parms,
typestr="bytes",
)
else:
raise AssertionError(f"unhandled typestring {ptnode.children[0]}")

elif ptnode.data == "primitive":
if len(ptnode.children) == 1:
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.NumpyType(
toast(ptnode.children[0], highlevel, False), parameters=parms
)
elif len(ptnode.children) == 2:
parms = toast(ptnode.children[1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.NumpyType(
toast(ptnode.children[0], highlevel, categorical),
parms,
)
else:
raise AssertionError("unhandled NumpyType node")

elif ptnode.data == "categories":
assert highlevel is True
return toast(ptnode.children[0], highlevel, True)

elif ptnode.data == "unknown":
if len(ptnode.children) == 0:
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.UnknownType(parameters=parms)
elif len(ptnode.children) == 1:
parms = toast(ptnode.children[0], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.UnknownType(parameters=parms)
else:
raise AssertionError("unhandled UnknownType node")

elif ptnode.data == "listtype":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "list_single":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.ListType(
toast(ptnode.children[0], highlevel, False), parameters=parms
)

elif ptnode.data == "list_parm":
parms = toast(ptnode.children[1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.ListType(
toast(ptnode.children[0], highlevel, categorical), parms
)

elif ptnode.data == "uniontype":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "union_single":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
for node in ptnode.children:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.UnionType(content_list, parameters=parms)

elif ptnode.data == "union_parm":
parms = toast(ptnode.children[-1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
for node in ptnode.children[:-1]:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.UnionType(content_list, parms)

elif ptnode.data == "optiontype":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "option_single":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.OptionType(
toast(ptnode.children[0], highlevel, False), parameters=parms
)

elif ptnode.data == "option_parm":
parms = toast(ptnode.children[1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.OptionType(
toast(ptnode.children[0], highlevel, False),
parameters=parms,
)

elif ptnode.data == "option_highlevel":
assert highlevel
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.OptionType(
toast(ptnode.children[0], highlevel, False), parameters=parms
)

elif ptnode.data == "record":
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "record_tuple":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
for node in ptnode.children:
content_list.append(toast(node, highlevel, categorical))
return ak._v2.types.RecordType(content_list, None, parameters=parms)

elif ptnode.data == "record_dict":
parms = {}
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_types = []
content_keys = []
for i in range(0, len(ptnode.children), 2):
content_keys.append(ptnode.children[i])
content_types.append(toast(ptnode.children[i + 1], highlevel, categorical))
return ak._v2.types.RecordType(content_types, content_keys, parameters=parms)

elif ptnode.data == "record_tuple_param":
parms = toast(ptnode.children[-1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
for node in ptnode.children[:-1]:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.RecordType(content_list, None, parameters=parms)

elif ptnode.data == "record_struct":
parms = toast(ptnode.children[-1], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
content_keys = []
for node in ptnode.children[:-1]:
if isinstance(node, str):
content_keys.append(node)
else:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.RecordType(
content_list,
content_keys,
parameters=parms,
)

elif ptnode.data == "record_highlevel":
assert highlevel
parms = {"__record__": ptnode.children[0]}
if categorical:
parms.update({"__categorical__": True})
categorical = False
content_list = []
content_keys = []
for node in ptnode.children[1:]:
if isinstance(node, str):
content_keys.append(node)
else:
content_list.append(toast(node, highlevel, False))
return ak._v2.types.RecordType(
content_list,
content_keys,
parameters=parms,
)

elif ptnode.data == "regular":
assert (len(ptnode.children)) == 1
return toast(ptnode.children[0], highlevel, categorical)

elif ptnode.data == "regular_inparm":
assert len(ptnode.children) == 2
if highlevel:
return ak._v2.types.ArrayType(
toast(ptnode.children[1], highlevel, categorical), ptnode.children[0]
)
return ak._v2.types.RegularType(
toast(ptnode.children[1], highlevel, categorical), ptnode.children[0]
)

elif ptnode.data == "regular_outparm":
assert len(ptnode.children) == 3
parms = toast(ptnode.children[2], highlevel, False)
if categorical:
parms.update({"__categorical__": True})
categorical = False
return ak._v2.types.RegularType(
toast(ptnode.children[1], highlevel, False),
ptnode.children[0],
parms,
)

elif ptnode.data == "def_option":
assert len(ptnode.children) == 1
return ptnode.children[0]

elif ptnode.data == "options":
assert len(ptnode.children) == 1
return toast(ptnode.children[0], highlevel, categorical)

else:
raise AssertionError("unhandled node")


def from_datashape(typestr, highlevel=False):
parseobj = Lark_StandAlone(transformer=TreeToJson())
return toast(parseobj.parse(typestr), highlevel, False)
4 changes: 1 addition & 3 deletions src/awkward/_v2/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from awkward._typeparser.parser import from_datashape # noqa: F401

from awkward._v2.types.type import Type # noqa: F401
from awkward._v2.types.type import Type, from_datashape # noqa: F401
from awkward._v2.types.unknowntype import UnknownType # noqa: F401
from awkward._v2.types.numpytype import NumpyType # noqa: F401
from awkward._v2.types.regulartype import RegularType # noqa: F401
Expand Down
Loading

0 comments on commit 4ba1dbb

Please sign in to comment.