From b94fd1748ccc2c21c81abaf329dbf77c7594b295 Mon Sep 17 00:00:00 2001 From: gilcu3 <828241+gilcu3@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:28:11 +0100 Subject: [PATCH] support version 94; followed guide from https://github.com/cyfinoid/hbctool/blob/main/ADD_NEW_VERSION.md --- hbctool/hbc/__init__.py | 2 + hbctool/hbc/hbc94/__init__.py | 233 +++++ hbctool/hbc/hbc94/data/opcode.json | 960 ++++++++++++++++++ hbctool/hbc/hbc94/data/structure.json | 83 ++ hbctool/hbc/hbc94/parser.py | 338 ++++++ hbctool/hbc/hbc94/raw/BytecodeFileFormat.h | 510 ++++++++++ hbctool/hbc/hbc94/raw/BytecodeList.def | 887 ++++++++++++++++ .../hbc94/raw/SerializedLiteralGenerator.h | 90 ++ hbctool/hbc/hbc94/tool/opcode_generator.py | 92 ++ hbctool/hbc/hbc94/translator.py | 62 ++ 10 files changed, 3257 insertions(+) create mode 100644 hbctool/hbc/hbc94/__init__.py create mode 100644 hbctool/hbc/hbc94/data/opcode.json create mode 100644 hbctool/hbc/hbc94/data/structure.json create mode 100644 hbctool/hbc/hbc94/parser.py create mode 100644 hbctool/hbc/hbc94/raw/BytecodeFileFormat.h create mode 100644 hbctool/hbc/hbc94/raw/BytecodeList.def create mode 100644 hbctool/hbc/hbc94/raw/SerializedLiteralGenerator.h create mode 100644 hbctool/hbc/hbc94/tool/opcode_generator.py create mode 100644 hbctool/hbc/hbc94/translator.py diff --git a/hbctool/hbc/__init__.py b/hbctool/hbc/__init__.py index 2952be4..b3deae6 100644 --- a/hbctool/hbc/__init__.py +++ b/hbctool/hbc/__init__.py @@ -1,5 +1,6 @@ from hbctool.util import * +from hbctool.hbc.hbc94 import HBC94 from hbctool.hbc.hbc90 import HBC90 from hbctool.hbc.hbc85 import HBC85 from hbctool.hbc.hbc84 import HBC84 @@ -17,6 +18,7 @@ BYTECODE_ALIGNMENT = 4 HBC = { + 94: HBC94, 90: HBC90, 85: HBC85, 84: HBC84, diff --git a/hbctool/hbc/hbc94/__init__.py b/hbctool/hbc/hbc94/__init__.py new file mode 100644 index 0000000..41772c8 --- /dev/null +++ b/hbctool/hbc/hbc94/__init__.py @@ -0,0 +1,233 @@ +from hbctool.util import * +from .parser import parse, export, INVALID_LENGTH +from .translator import disassemble, assemble +from struct import pack, unpack + +NullTag = 0 +TrueTag = 1 << 4 +FalseTag = 2 << 4 +NumberTag = 3 << 4 +LongStringTag = 4 << 4 +ShortStringTag = 5 << 4 +ByteStringTag = 6 << 4 +IntegerTag = 7 << 4 +TagMask = 0x70 + +class HBC94: + def __init__(self, f=None): + if f: + self.obj = parse(f) + else: + self.obj = None + + def export(self, f): + export(self.getObj(), f) + + def getObj(self): + assert self.obj, "Obj is not set." + return self.obj + + def setObj(self, obj): + self.obj = obj + + def getVersion(self): + return 94 + + def getHeader(self): + return self.getObj()["header"] + + def getFunctionCount(self): + return self.getObj()["header"]["functionCount"] + + def getFunction(self, fid, disasm=True): + assert fid >= 0 and fid < self.getFunctionCount(), "Invalid function ID" + + functionHeader = self.getObj()["functionHeaders"][fid] + offset = functionHeader["offset"] + paramCount = functionHeader["paramCount"] + registerCount = functionHeader["frameSize"] + symbolCount = functionHeader["environmentSize"] + bytecodeSizeInBytes = functionHeader["bytecodeSizeInBytes"] + functionName = functionHeader["functionName"] + + instOffset = self.getObj()["instOffset"] + start = offset - instOffset + end = start + bytecodeSizeInBytes + bc = self.getObj()["inst"][start:end] + insts = bc + if disasm: + insts = disassemble(bc) + + functionNameStr, _ = self.getString(functionName) + + return functionNameStr, paramCount, registerCount, symbolCount, insts, functionHeader + + def setFunction(self, fid, func, disasm=True): + assert fid >= 0 and fid < self.getFunctionCount(), "Invalid function ID" + + functionName, paramCount, registerCount, symbolCount, insts, _ = func + + functionHeader = self.getObj()["functionHeaders"][fid] + + functionHeader["paramCount"] = paramCount + functionHeader["frameSize"] = registerCount + functionHeader["environmentSize"] = symbolCount + + # TODO : Make this work + # functionHeader["functionName"] = functionName + + offset = functionHeader["offset"] + bytecodeSizeInBytes = functionHeader["bytecodeSizeInBytes"] + + instOffset = self.getObj()["instOffset"] + start = offset - instOffset + + bc = insts + + if disasm: + bc = assemble(insts) + + assert len(bc) <= bytecodeSizeInBytes, "Overflowed instruction length is not supported yet." + functionHeader["bytecodeSizeInBytes"] = len(bc) + memcpy(self.getObj()["inst"], bc, start, len(bc)) + + def getStringCount(self): + return self.getObj()["header"]["stringCount"] + + def getString(self, sid): + assert sid >= 0 and sid < self.getStringCount(), "Invalid string ID" + + stringTableEntry = self.getObj()["stringTableEntries"][sid] + stringStorage = self.getObj()["stringStorage"] + stringTableOverflowEntries = self.getObj()["stringTableOverflowEntries"] + + isUTF16 = stringTableEntry["isUTF16"] + offset = stringTableEntry["offset"] + length = stringTableEntry["length"] + + if length >= INVALID_LENGTH: + stringTableOverflowEntry = stringTableOverflowEntries[offset] + offset = stringTableOverflowEntry["offset"] + length = stringTableOverflowEntry["length"] + + if isUTF16: + length*=2 + + s = bytes(stringStorage[offset:offset + length]) + return s.hex() if isUTF16 else s.decode("utf-8"), (isUTF16, offset, length) + + def setString(self, sid, val): + assert sid >= 0 and sid < self.getStringCount(), "Invalid string ID" + + stringTableEntry = self.getObj()["stringTableEntries"][sid] + stringStorage = self.getObj()["stringStorage"] + stringTableOverflowEntries = self.getObj()["stringTableOverflowEntries"] + + isUTF16 = stringTableEntry["isUTF16"] + offset = stringTableEntry["offset"] + length = stringTableEntry["length"] + + if length >= INVALID_LENGTH: + stringTableOverflowEntry = stringTableOverflowEntries[offset] + offset = stringTableOverflowEntry["offset"] + length = stringTableOverflowEntry["length"] + + if isUTF16: + s = list(bytes.fromhex(val)) + l = len(s)//2 + else: + l = len(val) + s = val.encode("utf-8") + + assert l <= length, "Overflowed string length is not supported yet." + + memcpy(stringStorage, s, offset, len(s)) + + def _checkBufferTag(self, buf, iid): + keyTag = buf[iid] + if keyTag & 0x80: + return (((keyTag & 0x0f) << 8) | (buf[iid + 1]), keyTag & TagMask) + else: + return (keyTag & 0x0f, keyTag & TagMask) + + def _SLPToString(self, tag, buf, iid, ind): + start = iid + ind + if tag == ByteStringTag: + type = "String" + val = buf[start] + ind += 1 + elif tag == ShortStringTag: + type = "String" + val = unpack("= 0 and aid < self.getArrayBufferSize(), "Invalid Array ID" + tag = self._checkBufferTag(self.getObj()["arrayBuffer"], aid) + ind = 2 if tag[0] > 0x0f else 1 + arr = [] + t = None + for _ in range(tag[0]): + t, val, ind = self._SLPToString(tag[1], self.getObj()["arrayBuffer"], aid, ind) + arr.append(val) + + return t, arr + + def getObjKeyBufferSize(self): + return self.getObj()["header"]["objKeyBufferSize"] + + def getObjKey(self, kid): + assert kid >= 0 and kid < self.getObjKeyBufferSize(), "Invalid ObjKey ID" + tag = self._checkBufferTag(self.getObj()["objKeyBuffer"], kid) + ind = 2 if tag[0] > 0x0f else 1 + keys = [] + t = None + for _ in range(tag[0]): + t, val, ind = self._SLPToString(tag[1], self.getObj()["objKeyBuffer"], kid, ind) + keys.append(val) + + return t, keys + + def getObjValueBufferSize(self): + return self.getObj()["header"]["objValueBufferSize"] + + def getObjValue(self, vid): + assert vid >= 0 and vid < self.getObjValueBufferSize(), "Invalid ObjValue ID" + tag = self._checkBufferTag(self.getObj()["objValueBuffer"], vid) + ind = 2 if tag[0] > 0x0f else 1 + keys = [] + t = None + for _ in range(tag[0]): + t, val, ind = self._SLPToString(tag[1], self.getObj()["objValueBuffer"], vid, ind) + keys.append(val) + + return t, keys diff --git a/hbctool/hbc/hbc94/data/opcode.json b/hbctool/hbc/hbc94/data/opcode.json new file mode 100644 index 0000000..576433e --- /dev/null +++ b/hbctool/hbc/hbc94/data/opcode.json @@ -0,0 +1,960 @@ +{ + "Unreachable": [], + "NewObjectWithBuffer": [ + "Reg8", + "UInt16", + "UInt16", + "UInt16", + "UInt16" + ], + "NewObjectWithBufferLong": [ + "Reg8", + "UInt16", + "UInt16", + "UInt32", + "UInt32" + ], + "NewObject": [ + "Reg8" + ], + "NewObjectWithParent": [ + "Reg8", + "Reg8" + ], + "NewArrayWithBuffer": [ + "Reg8", + "UInt16", + "UInt16", + "UInt16" + ], + "NewArrayWithBufferLong": [ + "Reg8", + "UInt16", + "UInt16", + "UInt32" + ], + "NewArray": [ + "Reg8", + "UInt16" + ], + "Mov": [ + "Reg8", + "Reg8" + ], + "MovLong": [ + "Reg32", + "Reg32" + ], + "Negate": [ + "Reg8", + "Reg8" + ], + "Not": [ + "Reg8", + "Reg8" + ], + "BitNot": [ + "Reg8", + "Reg8" + ], + "TypeOf": [ + "Reg8", + "Reg8" + ], + "Eq": [ + "Reg8", + "Reg8", + "Reg8" + ], + "StrictEq": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Neq": [ + "Reg8", + "Reg8", + "Reg8" + ], + "StrictNeq": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Less": [ + "Reg8", + "Reg8", + "Reg8" + ], + "LessEq": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Greater": [ + "Reg8", + "Reg8", + "Reg8" + ], + "GreaterEq": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Add": [ + "Reg8", + "Reg8", + "Reg8" + ], + "AddN": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Mul": [ + "Reg8", + "Reg8", + "Reg8" + ], + "MulN": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Div": [ + "Reg8", + "Reg8", + "Reg8" + ], + "DivN": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Mod": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Sub": [ + "Reg8", + "Reg8", + "Reg8" + ], + "SubN": [ + "Reg8", + "Reg8", + "Reg8" + ], + "LShift": [ + "Reg8", + "Reg8", + "Reg8" + ], + "RShift": [ + "Reg8", + "Reg8", + "Reg8" + ], + "URshift": [ + "Reg8", + "Reg8", + "Reg8" + ], + "BitAnd": [ + "Reg8", + "Reg8", + "Reg8" + ], + "BitXor": [ + "Reg8", + "Reg8", + "Reg8" + ], + "BitOr": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Inc": [ + "Reg8", + "Reg8" + ], + "Dec": [ + "Reg8", + "Reg8" + ], + "InstanceOf": [ + "Reg8", + "Reg8", + "Reg8" + ], + "IsIn": [ + "Reg8", + "Reg8", + "Reg8" + ], + "GetEnvironment": [ + "Reg8", + "UInt8" + ], + "StoreToEnvironment": [ + "Reg8", + "UInt8", + "Reg8" + ], + "StoreToEnvironmentL": [ + "Reg8", + "UInt16", + "Reg8" + ], + "StoreNPToEnvironment": [ + "Reg8", + "UInt8", + "Reg8" + ], + "StoreNPToEnvironmentL": [ + "Reg8", + "UInt16", + "Reg8" + ], + "LoadFromEnvironment": [ + "Reg8", + "Reg8", + "UInt8" + ], + "LoadFromEnvironmentL": [ + "Reg8", + "Reg8", + "UInt16" + ], + "GetGlobalObject": [ + "Reg8" + ], + "GetNewTarget": [ + "Reg8" + ], + "CreateEnvironment": [ + "Reg8" + ], + "CreateInnerEnvironment": [ + "Reg8", + "Reg8", + "UInt32" + ], + "DeclareGlobalVar": [ + "UInt32:S" + ], + "ThrowIfHasRestrictedGlobalProperty": [ + "UInt32:S" + ], + "GetByIdShort": [ + "Reg8", + "Reg8", + "UInt8", + "UInt8:S" + ], + "GetById": [ + "Reg8", + "Reg8", + "UInt8", + "UInt16:S" + ], + "GetByIdLong": [ + "Reg8", + "Reg8", + "UInt8", + "UInt32:S" + ], + "TryGetById": [ + "Reg8", + "Reg8", + "UInt8", + "UInt16:S" + ], + "TryGetByIdLong": [ + "Reg8", + "Reg8", + "UInt8", + "UInt32:S" + ], + "PutById": [ + "Reg8", + "Reg8", + "UInt8", + "UInt16:S" + ], + "PutByIdLong": [ + "Reg8", + "Reg8", + "UInt8", + "UInt32:S" + ], + "TryPutById": [ + "Reg8", + "Reg8", + "UInt8", + "UInt16:S" + ], + "TryPutByIdLong": [ + "Reg8", + "Reg8", + "UInt8", + "UInt32:S" + ], + "PutNewOwnByIdShort": [ + "Reg8", + "Reg8", + "UInt8:S" + ], + "PutNewOwnById": [ + "Reg8", + "Reg8", + "UInt16:S" + ], + "PutNewOwnByIdLong": [ + "Reg8", + "Reg8", + "UInt32:S" + ], + "PutNewOwnNEById": [ + "Reg8", + "Reg8", + "UInt16:S" + ], + "PutNewOwnNEByIdLong": [ + "Reg8", + "Reg8", + "UInt32:S" + ], + "PutOwnByIndex": [ + "Reg8", + "Reg8", + "UInt8" + ], + "PutOwnByIndexL": [ + "Reg8", + "Reg8", + "UInt32" + ], + "PutOwnByVal": [ + "Reg8", + "Reg8", + "Reg8", + "UInt8" + ], + "DelById": [ + "Reg8", + "Reg8", + "UInt16:S" + ], + "DelByIdLong": [ + "Reg8", + "Reg8", + "UInt32:S" + ], + "GetByVal": [ + "Reg8", + "Reg8", + "Reg8" + ], + "PutByVal": [ + "Reg8", + "Reg8", + "Reg8" + ], + "DelByVal": [ + "Reg8", + "Reg8", + "Reg8" + ], + "PutOwnGetterSetterByVal": [ + "Reg8", + "Reg8", + "Reg8", + "Reg8", + "UInt8" + ], + "GetPNameList": [ + "Reg8", + "Reg8", + "Reg8", + "Reg8" + ], + "GetNextPName": [ + "Reg8", + "Reg8", + "Reg8", + "Reg8", + "Reg8" + ], + "Call": [ + "Reg8", + "Reg8", + "UInt8" + ], + "Construct": [ + "Reg8", + "Reg8", + "UInt8" + ], + "Call1": [ + "Reg8", + "Reg8", + "Reg8" + ], + "CallDirect": [ + "Reg8", + "UInt8", + "UInt16" + ], + "Call2": [ + "Reg8", + "Reg8", + "Reg8", + "Reg8" + ], + "Call3": [ + "Reg8", + "Reg8", + "Reg8", + "Reg8", + "Reg8" + ], + "Call4": [ + "Reg8", + "Reg8", + "Reg8", + "Reg8", + "Reg8", + "Reg8" + ], + "CallLong": [ + "Reg8", + "Reg8", + "UInt32" + ], + "ConstructLong": [ + "Reg8", + "Reg8", + "UInt32" + ], + "CallDirectLongIndex": [ + "Reg8", + "UInt8", + "UInt32" + ], + "CallBuiltin": [ + "Reg8", + "UInt8", + "UInt8" + ], + "CallBuiltinLong": [ + "Reg8", + "UInt8", + "UInt32" + ], + "GetBuiltinClosure": [ + "Reg8", + "UInt8" + ], + "Ret": [ + "Reg8" + ], + "Catch": [ + "Reg8" + ], + "DirectEval": [ + "Reg8", + "Reg8" + ], + "Throw": [ + "Reg8" + ], + "ThrowIfEmpty": [ + "Reg8", + "Reg8" + ], + "Debugger": [], + "AsyncBreakCheck": [], + "ProfilePoint": [ + "UInt16" + ], + "CreateClosure": [ + "Reg8", + "Reg8", + "UInt16" + ], + "CreateClosureLongIndex": [ + "Reg8", + "Reg8", + "UInt32" + ], + "CreateGeneratorClosure": [ + "Reg8", + "Reg8", + "UInt16" + ], + "CreateGeneratorClosureLongIndex": [ + "Reg8", + "Reg8", + "UInt32" + ], + "CreateAsyncClosure": [ + "Reg8", + "Reg8", + "UInt16" + ], + "CreateAsyncClosureLongIndex": [ + "Reg8", + "Reg8", + "UInt32" + ], + "CreateThis": [ + "Reg8", + "Reg8", + "Reg8" + ], + "SelectObject": [ + "Reg8", + "Reg8", + "Reg8" + ], + "LoadParam": [ + "Reg8", + "UInt8" + ], + "LoadParamLong": [ + "Reg8", + "UInt32" + ], + "LoadConstUInt8": [ + "Reg8", + "UInt8" + ], + "LoadConstInt": [ + "Reg8", + "Imm32" + ], + "LoadConstDouble": [ + "Reg8", + "Double" + ], + "LoadConstBigInt": [ + "Reg8", + "UInt16" + ], + "LoadConstBigIntLongIndex": [ + "Reg8", + "UInt32" + ], + "LoadConstString": [ + "Reg8", + "UInt16:S" + ], + "LoadConstStringLongIndex": [ + "Reg8", + "UInt32:S" + ], + "LoadConstEmpty": [ + "Reg8" + ], + "LoadConstUndefined": [ + "Reg8" + ], + "LoadConstNull": [ + "Reg8" + ], + "LoadConstTrue": [ + "Reg8" + ], + "LoadConstFalse": [ + "Reg8" + ], + "LoadConstZero": [ + "Reg8" + ], + "CoerceThisNS": [ + "Reg8", + "Reg8" + ], + "LoadThisNS": [ + "Reg8" + ], + "ToNumber": [ + "Reg8", + "Reg8" + ], + "ToNumeric": [ + "Reg8", + "Reg8" + ], + "ToInt32": [ + "Reg8", + "Reg8" + ], + "AddEmptyString": [ + "Reg8", + "Reg8" + ], + "GetArgumentsPropByVal": [ + "Reg8", + "Reg8", + "Reg8" + ], + "GetArgumentsLength": [ + "Reg8", + "Reg8" + ], + "ReifyArguments": [ + "Reg8" + ], + "CreateRegExp": [ + "Reg8", + "UInt32:S", + "UInt32:S", + "UInt32" + ], + "SwitchImm": [ + "Reg8", + "UInt32", + "Addr32", + "UInt32", + "UInt32" + ], + "StartGenerator": [], + "ResumeGenerator": [ + "Reg8", + "Reg8" + ], + "CompleteGenerator": [], + "CreateGenerator": [ + "Reg8", + "Reg8", + "UInt16" + ], + "CreateGeneratorLongIndex": [ + "Reg8", + "Reg8", + "UInt32" + ], + "IteratorBegin": [ + "Reg8", + "Reg8" + ], + "IteratorNext": [ + "Reg8", + "Reg8", + "Reg8" + ], + "IteratorClose": [ + "Reg8", + "UInt8" + ], + "Jmp": [ + "Addr8" + ], + "JmpLong": [ + "Addr32" + ], + "JmpTrue": [ + "Addr8", + "Reg8" + ], + "JmpTrueLong": [ + "Addr32", + "Reg8" + ], + "JmpFalse": [ + "Addr8", + "Reg8" + ], + "JmpFalseLong": [ + "Addr32", + "Reg8" + ], + "JmpUndefined": [ + "Addr8", + "Reg8" + ], + "JmpUndefinedLong": [ + "Addr32", + "Reg8" + ], + "SaveGenerator": [ + "Addr8" + ], + "SaveGeneratorLong": [ + "Addr32" + ], + "JLess": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JLessLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JNotLess": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JNotLessLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JLessN": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JLessNLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JNotLessN": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JNotLessNLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JLessEqual": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JLessEqualLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JNotLessEqual": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JNotLessEqualLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JLessEqualN": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JLessEqualNLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JNotLessEqualN": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JNotLessEqualNLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JGreater": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JGreaterLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JNotGreater": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JNotGreaterLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JGreaterN": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JGreaterNLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JNotGreaterN": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JNotGreaterNLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JGreaterEqual": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JGreaterEqualLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JNotGreaterEqual": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JNotGreaterEqualLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JGreaterEqualN": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JGreaterEqualNLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JNotGreaterEqualN": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JNotGreaterEqualNLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JEqual": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JEqualLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JNotEqual": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JNotEqualLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JStrictEqual": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JStrictEqualLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "JStrictNotEqual": [ + "Addr8", + "Reg8", + "Reg8" + ], + "JStrictNotEqualLong": [ + "Addr32", + "Reg8", + "Reg8" + ], + "Add32": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Sub32": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Mul32": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Divi32": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Divu32": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Loadi8": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Loadu8": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Loadi16": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Loadu16": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Loadi32": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Loadu32": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Store8": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Store16": [ + "Reg8", + "Reg8", + "Reg8" + ], + "Store32": [ + "Reg8", + "Reg8", + "Reg8" + ] +} \ No newline at end of file diff --git a/hbctool/hbc/hbc94/data/structure.json b/hbctool/hbc/hbc94/data/structure.json new file mode 100644 index 0000000..b129dae --- /dev/null +++ b/hbctool/hbc/hbc94/data/structure.json @@ -0,0 +1,83 @@ +{ + "header": { + "magic": ["uint", 64, 1], + "version": ["uint", 32, 1], + "sourceHash": ["uint", 8, 20], + "fileLength": ["uint", 32, 1], + "globalCodeIndex": ["uint", 32, 1], + "functionCount": ["uint", 32, 1], + "stringKindCount": ["uint", 32, 1], + "identifierCount": ["uint", 32, 1], + "stringCount": ["uint", 32, 1], + "overflowStringCount": ["uint", 32, 1], + "stringStorageSize": ["uint", 32, 1], + "bigIntCount": ["uint", 32, 1], + "bigIntStorageSize": ["uint", 32, 1], + "regExpCount": ["uint", 32, 1], + "regExpStorageSize": ["uint", 32, 1], + "arrayBufferSize": ["uint", 32, 1], + "objKeyBufferSize": ["uint", 32, 1], + "objValueBufferSize": ["uint", 32, 1], + "segmentID": ["uint", 32, 1], + "cjsModuleCount": ["uint", 32, 1], + "functionSourceCount": ["uint", 32, 1], + "debugInfoOffset": ["uint", 32, 1], + "option": ["uint", 8, 1], + "padding": ["uint", 8, 19] + }, + "SmallFuncHeader": { + "offset": ["bit", 25, 1], + "paramCount": ["bit", 7, 1], + "bytecodeSizeInBytes": ["bit", 15, 1], + "functionName": ["bit", 17, 1], + "infoOffset": ["bit", 25, 1], + "frameSize": ["bit", 7, 1], + "environmentSize": ["bit", 8, 1], + "highestReadCacheIndex": ["bit", 8, 1], + "highestWriteCacheIndex": ["bit", 8, 1], + "flags": ["uint", 8, 1] + }, + "FuncHeader": { + "offset": ["uint", 32, 1], + "paramCount": ["uint", 32, 1], + "bytecodeSizeInBytes": ["uint", 32, 1], + "functionName": ["uint", 32, 1], + "infoOffset": ["uint", 32, 1], + "frameSize": ["uint", 32, 1], + "environmentSize": ["uint", 32, 1], + "highestReadCacheIndex": ["uint", 8, 1], + "highestWriteCacheIndex": ["uint", 8, 1], + "flags": ["uint", 8, 1] + }, + "SmallStringTableEntry": { + "isUTF16": ["bit", 1, 1], + "offset": ["bit", 23, 1], + "length": ["bit", 8, 1] + }, + "OverflowStringTableEntry": { + "offset": ["uint", 32, 1], + "length": ["uint", 32, 1] + }, + "StringStorage": ["uint", 8, 0], + "ArrayBuffer": ["uint", 8, 0], + "ObjKeyBuffer": ["uint", 8, 0], + "ObjValueBuffer": ["uint", 8, 0], + "RegExpTableEntry": { + "offset": ["uint", 32, 1], + "length": ["uint", 32, 1] + }, + "RegExpStorage": ["uint", 8, 0], + "CJSModuleTable": { + "first": ["uint", 32, 1], + "second": ["uint", 32, 1] + }, + "BigIntTableEntry": { + "offset": ["uint", 32, 1], + "length": ["uint", 32, 1] + }, + "BigIntStorage": ["uint", 8, 0], + "FunctionSourceTable": { + "first": ["uint", 32, 1], + "second": ["uint", 32, 1] + } +} diff --git a/hbctool/hbc/hbc94/parser.py b/hbctool/hbc/hbc94/parser.py new file mode 100644 index 0000000..2201995 --- /dev/null +++ b/hbctool/hbc/hbc94/parser.py @@ -0,0 +1,338 @@ +from hbctool.util import * +import json +import pathlib +import copy + +basepath = pathlib.Path(__file__).parent.absolute() + +MAGIC = 2240826417119764422 +BYTECODE_ALIGNMENT = 4 + +INVALID_OFFSET = (1 << 23) +INVALID_LENGTH = (1 << 8) - 1 + +structure = json.load(open(f"{basepath}/data/structure.json", "r")) + +headerS = structure["header"] +smallFunctionHeaderS = structure["SmallFuncHeader"] +functionHeaderS = structure["FuncHeader"] +stringTableEntryS = structure["SmallStringTableEntry"] +overflowStringTableEntryS = structure["OverflowStringTableEntry"] +stringStorageS = structure["StringStorage"] +arrayBufferS = structure["ArrayBuffer"] +objKeyBufferS = structure["ObjKeyBuffer"] +objValueBufferS = structure["ObjValueBuffer"] +bigIntTableEntryS = structure["BigIntTableEntry"] +bigIntStorageS = structure["BigIntStorage"] +regExpTableEntryS = structure["RegExpTableEntry"] +regExpStorageS = structure["RegExpStorage"] +cjsModuleTableS = structure["CJSModuleTable"] +funSourceTableS = structure["FunctionSourceTable"] + +def align(f): + f.pad(BYTECODE_ALIGNMENT) + +def parse(f): + obj = {} + + # Segment 1: Header + header = {} + for key in headerS: + header[key] = read(f, headerS[key]) + + obj["header"] = header + align(f) + + # Segment 2: Function Header + functionHeaders = [] + for i in range(header["functionCount"]): + functionHeader = {} + for key in smallFunctionHeaderS: + functionHeader[key] = read(f, smallFunctionHeaderS[key]) + + if (functionHeader["flags"] >> 5) & 1: + functionHeader["small"] = copy.deepcopy(functionHeader) + saved_pos = f.tell() + large_offset = (functionHeader["infoOffset"] << 16 ) | functionHeader["offset"] + f.seek(large_offset) + for key in functionHeaderS: + functionHeader[key] = read(f, functionHeaderS[key]) + + f.seek(saved_pos) + + functionHeaders.append(functionHeader) + + obj["functionHeaders"] = functionHeaders + align(f) + + # Segment 3: StringKind + # FIXME : Do nothing just skip + stringKinds = [] + for _ in range(header["stringKindCount"]): + stringKinds.append(readuint(f, bits=32)) + + obj["stringKinds"] = stringKinds + align(f) + + # Segment 3: IdentifierHash + # FIXME : Do nothing just skip + identifierHashes = [] + for _ in range(header["identifierCount"]): + identifierHashes.append(readuint(f, bits=32)) + + obj["identifierHashes"] = identifierHashes + align(f) + + # Segment 4: StringTable + stringTableEntries = [] + for _ in range(header["stringCount"]): + stringTableEntry = {} + for key in stringTableEntryS: + stringTableEntry[key] = read(f, stringTableEntryS[key]) + + stringTableEntries.append(stringTableEntry) + + obj["stringTableEntries"] = stringTableEntries + align(f) + + # Segment 5: StringTableOverflow + stringTableOverflowEntries = [] + for _ in range(header["overflowStringCount"]): + stringTableOverflowEntry = {} + for key in overflowStringTableEntryS: + stringTableOverflowEntry[key] = read(f, overflowStringTableEntryS[key]) + + stringTableOverflowEntries.append(stringTableOverflowEntry) + + obj["stringTableOverflowEntries"] = stringTableOverflowEntries + align(f) + + # Segment 6: StringStorage + stringStorageS[2] = header["stringStorageSize"] + stringStorage = read(f, stringStorageS) + + obj["stringStorage"] = stringStorage + align(f) + + # Segment 7: ArrayBuffer + arrayBufferS[2] = header["arrayBufferSize"] + arrayBuffer = read(f, arrayBufferS) + + obj["arrayBuffer"] = arrayBuffer + align(f) + + # Segment 9: ObjKeyBuffer + objKeyBufferS[2] = header["objKeyBufferSize"] + objKeyBuffer = read(f, objKeyBufferS) + + obj["objKeyBuffer"] = objKeyBuffer + align(f) + + # Segment 10: ObjValueBuffer + objValueBufferS[2] = header["objValueBufferSize"] + objValueBuffer = read(f, objValueBufferS) + + obj["objValueBuffer"] = objValueBuffer + align(f) + + # Segment XX: BigIntTable + bigIntTable = [] + for _ in range(header["bigIntCount"]): + bigIntEntry = {} + for key in bigIntTableEntryS: + bigIntEntry[key] = read(f, bigIntTableEntryS[key]) + + bigIntTable.append(bigIntEntry) + + obj["bigIntTable"] = bigIntTable + align(f) + + # Segment XX: BigIntStorage + bigIntStorageS[2] = header["bigIntStorageSize"] + bigIntStorage = read(f, bigIntStorageS) + + obj["bigIntStorage"] = bigIntStorage + align(f) + + # Segment 11: RegExpTable + regExpTable = [] + for _ in range(header["regExpCount"]): + regExpEntry = {} + for key in regExpTableEntryS: + regExpEntry[key] = read(f, regExpTableEntryS[key]) + + regExpTable.append(regExpEntry) + + obj["regExpTable"] = regExpTable + align(f) + + # Segment 12: RegExpStorage + regExpStorageS[2] = header["regExpStorageSize"] + regExpStorage = read(f, regExpStorageS) + + obj["regExpStorage"] = regExpStorage + align(f) + + # Segment 13: CJSModuleTable + cjsModuleTable = [] + for _ in range(header["cjsModuleCount"]): + cjsModuleEntry = {} + for key in cjsModuleTableS: + cjsModuleEntry[key] = read(f, cjsModuleTableS[key]) + + cjsModuleTable.append(cjsModuleEntry) + + obj["cjsModuleTable"] = cjsModuleTable + align(f) + + # Segment 14: FunctionSourceTable + # Not doing anything with this data right now; just advancing the file + # pointer + funSourceTable = [] + for _ in range(header["functionSourceCount"]): + funSourceEntry = {} + for key in funSourceTableS: + funSourceEntry[key] = read(f, funSourceTableS[key]) + + funSourceTable.append(funSourceEntry) + + obj["funSourceTable"] = funSourceTable + align(f) + + obj["instOffset"] = f.tell() + obj["inst"] = f.readall() + + return obj + +def export(obj, f): + # Segment 1: Header + header = obj["header"] + for key in headerS: + write(f, header[key], headerS[key]) + + align(f) + + overflowedFunctionHeaders = [] + # Segment 2: Function Header + functionHeaders = obj["functionHeaders"] + for i in range(header["functionCount"]): + functionHeader = functionHeaders[i] + if "small" in functionHeader: + for key in smallFunctionHeaderS: + write(f, functionHeader["small"][key], smallFunctionHeaderS[key]) + + overflowedFunctionHeaders.append(functionHeader) + + else: + for key in smallFunctionHeaderS: + write(f, functionHeader[key], smallFunctionHeaderS[key]) + + align(f) + + # Segment 3: StringKind + # FIXME : Do nothing just skip + stringKinds = obj["stringKinds"] + for i in range(header["stringKindCount"]): + writeuint(f, stringKinds[i], bits=32) + + align(f) + + # Segment 3: IdentifierHash + # FIXME : Do nothing just skip + identifierHashes = obj["identifierHashes"] + for i in range(header["identifierCount"]): + writeuint(f, identifierHashes[i], bits=32) + + align(f) + + # Segment 4: StringTable + stringTableEntries = obj["stringTableEntries"] + for i in range(header["stringCount"]): + for key in stringTableEntryS: + stringTableEntry = stringTableEntries[i] + write(f, stringTableEntry[key], stringTableEntryS[key]) + + align(f) + + # Segment 5: StringTableOverflow + stringTableOverflowEntries = obj["stringTableOverflowEntries"] + for i in range(header["overflowStringCount"]): + for key in overflowStringTableEntryS: + stringTableOverflowEntry = stringTableOverflowEntries[i] + write(f, stringTableOverflowEntry[key], overflowStringTableEntryS[key]) + + align(f) + + # Segment 6: StringStorage + stringStorage = obj["stringStorage"] + stringStorageS[2] = header["stringStorageSize"] + write(f, stringStorage, stringStorageS) + + align(f) + + # Segment 7: ArrayBuffer + arrayBuffer = obj["arrayBuffer"] + arrayBufferS[2] = header["arrayBufferSize"] + write(f, arrayBuffer, arrayBufferS) + + align(f) + + # Segment 9: ObjKeyBuffer + objKeyBuffer = obj["objKeyBuffer"] + objKeyBufferS[2] = header["objKeyBufferSize"] + write(f, objKeyBuffer, objKeyBufferS) + + align(f) + + # Segment 10: ObjValueBuffer + objValueBuffer = obj["objValueBuffer"] + objValueBufferS[2] = header["objValueBufferSize"] + write(f, objValueBuffer, objValueBufferS) + + align(f) + + # Segment 11: RegExpTable + regExpTable = obj["regExpTable"] + for i in range(header["regExpCount"]): + regExpEntry = regExpTable[i] + for key in regExpTableEntryS: + write(f, regExpEntry[key], regExpTableEntryS[key]) + + align(f) + + # Segment 12: RegExpStorage + regExpStorage = obj["regExpStorage"] + regExpStorageS[2] = header["regExpStorageSize"] + write(f, regExpStorage, regExpStorageS) + + align(f) + + # Segment 13: CJSModuleTable + cjsModuleTable = obj["cjsModuleTable"] + for i in range(header["cjsModuleCount"]): + cjsModuleEntry = cjsModuleTable[i] + for key in cjsModuleTableS: + write(f, cjsModuleEntry[key], cjsModuleTableS[key]) + + align(f) + + # Segment 14: FunctionSourceTable + funSourceTable = obj["funSourceTable"] + for i in range(header["functionSourceCount"]): + funSourceEntry = funSourceTable[i] + for key in funSourceTableS: + write(f, funSourceEntry[key], funSourceTableS[key]) + + align(f) + + # Write remaining + f.writeall(obj["inst"]) + + # Write Overflowed Function Header + for overflowedFunctionHeader in overflowedFunctionHeaders: + smallFunctionHeader = overflowedFunctionHeader["small"] + large_offset = (smallFunctionHeader["infoOffset"] << 16 ) | smallFunctionHeader["offset"] + f.seek(large_offset) + for key in functionHeaderS: + write(f, overflowedFunctionHeader[key], functionHeaderS[key]) + diff --git a/hbctool/hbc/hbc94/raw/BytecodeFileFormat.h b/hbctool/hbc/hbc94/raw/BytecodeFileFormat.h new file mode 100644 index 0000000..7f518e7 --- /dev/null +++ b/hbctool/hbc/hbc94/raw/BytecodeFileFormat.h @@ -0,0 +1,510 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#ifndef HERMES_BCGEN_HBC_BYTECODEFILEFORMAT_H +#define HERMES_BCGEN_HBC_BYTECODEFILEFORMAT_H + +#include "hermes/BCGen/HBC/BytecodeVersion.h" +#include "hermes/BCGen/HBC/StringKind.h" +#include "hermes/Regex/RegexSerialization.h" +#include "hermes/Support/BigIntSupport.h" +#include "hermes/Support/Compiler.h" +#include "hermes/Support/SHA1.h" +#include "hermes/Support/StringTableEntry.h" + +#include +#include +#include + +namespace hermes { +namespace hbc { + +// "Hermes" in ancient Greek encoded in UTF-16BE and truncated to 8 bytes. +const static uint64_t MAGIC = 0x1F1903C103BC1FC6; + +// The "delta prepped" form: a different magic number indicating that the +// bytecode file is in a form suitable for delta diffing, not execution. +const static uint64_t DELTA_MAGIC = ~MAGIC; + +/// Property cache index which indicates no caching. +static constexpr uint8_t PROPERTY_CACHING_DISABLED = 0; + +/// Alignment of data structures of in file. +static constexpr size_t BYTECODE_ALIGNMENT = alignof(uint32_t); + +/// Bytecode forms +enum class BytecodeForm { + /// Execution form (the default) is the bytecode prepared for execution. + Execution, + + /// Delta form is the bytecode prepared to minimize binary diff size. + Delta, +}; + +/// Storing information about the bytecode, needed when it is loaded by the +/// runtime. +union BytecodeOptions { + struct { + bool staticBuiltins : 1; + bool cjsModulesStaticallyResolved : 1; + bool hasAsync : 1; + }; + uint8_t _flags; + + BytecodeOptions() : _flags(0) {} +}; + +// See BytecodeFileFormatTest for details about bit field layouts +static_assert( + sizeof(BytecodeOptions) == 1, + "BytecodeOptions should take up 1 byte total"); + +/** + * Header of binary file. + */ +LLVM_PACKED_START +struct BytecodeFileHeader { + uint64_t magic; + uint32_t version; + uint8_t sourceHash[SHA1_NUM_BYTES]; + uint32_t fileLength; // Until the end of the BytecodeFileFooter. + uint32_t globalCodeIndex; + uint32_t functionCount; + uint32_t stringKindCount; // Number of string kind entries. + uint32_t identifierCount; // Number of strings which are identifiers. + uint32_t stringCount; // Number of strings in the string table. + uint32_t overflowStringCount; // Number of strings in the overflow table. + uint32_t stringStorageSize; // Bytes in the blob of string contents. + uint32_t bigIntCount; // number of bigints in the bigint table. + uint32_t bigIntStorageSize; // Bytes in the bigint table. + uint32_t regExpCount; + uint32_t regExpStorageSize; + uint32_t arrayBufferSize; + uint32_t objKeyBufferSize; + uint32_t objValueBufferSize; + uint32_t segmentID; // The ID of this segment. + uint32_t cjsModuleCount; // Number of modules. + uint32_t functionSourceCount; // Number of function sources preserved. + uint32_t debugInfoOffset; + BytecodeOptions options; + + // Insert any padding to make function headers that follow this file header + // less likely to cross cache lines. + uint8_t padding[19]; + + BytecodeFileHeader( + uint64_t magic, + uint32_t version, + const SHA1 &sourceHash, + uint32_t fileLength, + uint32_t globalCodeIndex, + uint32_t functionCount, + uint32_t stringKindCount, + uint32_t identifierCount, + uint32_t stringCount, + uint32_t overflowStringCount, + uint32_t stringStorageSize, + uint32_t bigIntCount, + uint32_t bigIntStorageSize, + uint32_t regExpCount, + uint32_t regExpStorageSize, + uint32_t arrayBufferSize, + uint32_t objKeyBufferSize, + uint32_t objValueBufferSize, + uint32_t segmentID, + uint32_t cjsModuleCount, + uint32_t functionSourceCount, + uint32_t debugInfoOffset, + BytecodeOptions options) + : magic(magic), + version(version), + sourceHash(), + fileLength(fileLength), + globalCodeIndex(globalCodeIndex), + functionCount(functionCount), + stringKindCount(stringKindCount), + identifierCount(identifierCount), + stringCount(stringCount), + overflowStringCount(overflowStringCount), + stringStorageSize(stringStorageSize), + bigIntCount(bigIntCount), + bigIntStorageSize(bigIntStorageSize), + regExpCount(regExpCount), + regExpStorageSize(regExpStorageSize), + arrayBufferSize(arrayBufferSize), + objKeyBufferSize(objKeyBufferSize), + objValueBufferSize(objValueBufferSize), + segmentID(segmentID), + cjsModuleCount(cjsModuleCount), + functionSourceCount(functionSourceCount), + debugInfoOffset(debugInfoOffset), + options(options) { + std::copy(sourceHash.begin(), sourceHash.end(), this->sourceHash); + std::fill(padding, padding + sizeof(padding), 0); + } +}; + +/** + * Footer of binary file. Used for summary information that is *not* + * read during normal execution (since that would hurt locality). + */ +struct BytecodeFileFooter { + uint8_t fileHash[SHA1_NUM_BYTES]; // Hash of everything above the footer. + + // NOTE: If we ever add any non-byte fields, we need to ensure alignment + // everywhere this struct is written. + + BytecodeFileFooter(const SHA1 &fileHash) { + std::copy(fileHash.begin(), fileHash.end(), this->fileHash); + } +}; + +/// The string table is an array of these entries, followed by an array of +/// OverflowStringTableEntry for the entries whose length or offset doesn't fit +/// into the bitfields. +struct SmallStringTableEntry { + // isUTF16 and isIdentifier cannot be bool because C++ spec allows padding + // at type boundaries. + // Regardless of LLVM_PACKED_START, + // * GCC and CLANG never adds padding at type boundaries. + // * MSVC always add padding at type boundaries. + // * In addition, in MSVC, for each list of continuous fields with the same + // types, they always occupy a multiple of the type's normal size. + uint32_t isUTF16 : 1; + uint32_t offset : 23; + uint32_t length : 8; + + static constexpr uint32_t INVALID_OFFSET = (1 << 23); + static constexpr uint32_t INVALID_LENGTH = (1 << 8) - 1; + + bool isOverflowed() const { + return length == INVALID_LENGTH; + } + + /// Construct a small entry from 'entry'. If any fields overflow, then set + /// 'overflowOffset' as the offset instead. + SmallStringTableEntry( + const StringTableEntry &entry, + uint32_t overflowOffset) { + isUTF16 = entry.isUTF16(); + if (entry.getOffset() < INVALID_OFFSET && + entry.getLength() < INVALID_LENGTH) { + offset = entry.getOffset(); + length = entry.getLength(); + } else { + assert(overflowOffset < INVALID_OFFSET); + offset = overflowOffset; + length = INVALID_LENGTH; + } + } +}; + +// See BytecodeFileFormatTest for details about bit field layouts +static_assert( + sizeof(SmallStringTableEntry) == 4, + "SmallStringTableEntry should take up 4 bytes total"); + +/// These are indexed by the 'offset' field of overflowed SmallStringTableEntry. +struct OverflowStringTableEntry { + uint32_t offset; + uint32_t length; + + OverflowStringTableEntry(uint32_t offset, uint32_t length) + : offset(offset), length(length) {} +}; + +union FunctionHeaderFlag { + enum { + ProhibitCall = 0, + ProhibitConstruct = 1, + ProhibitNone = 2, + }; + + struct { + /// Which kinds of calls are prohibited, constructed from the above enum. + uint8_t prohibitInvoke : 2; + bool strictMode : 1; + bool hasExceptionHandler : 1; + bool hasDebugInfo : 1; + bool overflowed : 1; + }; + uint8_t flags; + + FunctionHeaderFlag() { + flags = 0; + prohibitInvoke = ProhibitNone; + } + + /// \return true if the specified kind of invocation is prohibited by the + /// flags. + bool isCallProhibited(bool construct) const { + return prohibitInvoke == (uint8_t)construct; + } +}; + +// See BytecodeFileFormatTest for details about bit field layouts +static_assert( + sizeof(FunctionHeaderFlag) == 1, + "FunctionHeaderFlag should take up 1 byte total"); + +/// FUNC_HEADER_FIELDS is a macro for defining function header fields. +/// The args are API type, small storage type, name, and bit length. +/// The types can be different if the overflow supports a longer value than the +/// small storage type does. +#define FUNC_HEADER_FIELDS(V) \ + /* first word */ \ + V(uint32_t, uint32_t, offset, 25) \ + V(uint32_t, uint32_t, paramCount, 7) \ + /* second word */ \ + V(uint32_t, uint32_t, bytecodeSizeInBytes, 15) \ + V(uint32_t, uint32_t, functionName, 17) \ + /* third word */ \ + V(uint32_t, uint32_t, infoOffset, 25) \ + V(uint32_t, uint32_t, frameSize, 7) \ + /* fourth word, with flags below */ \ + V(uint32_t, uint8_t, environmentSize, 8) \ + V(uint8_t, uint8_t, highestReadCacheIndex, 8) \ + V(uint8_t, uint8_t, highestWriteCacheIndex, 8) + +/** + * Metadata of a function. + */ +struct FunctionHeader { +// Use api type here since FunctionHeader stores the full type. +#define DECLARE_FIELD(api_type, store_type, name, bits) api_type name; + FUNC_HEADER_FIELDS(DECLARE_FIELD) +#undef DECLARE_FIELD + + FunctionHeaderFlag flags{}; + + public: + FunctionHeader( + uint32_t size, + uint32_t paramCount, + uint32_t frameSize, + uint32_t envSize, + uint32_t functionNameID, + uint8_t hiRCacheIndex, + uint8_t hiWCacheIndex) + : offset(0), + paramCount(paramCount), + bytecodeSizeInBytes(size), + functionName(functionNameID), + infoOffset(0), + frameSize(frameSize), + environmentSize(envSize), + highestReadCacheIndex(hiRCacheIndex), + highestWriteCacheIndex(hiWCacheIndex) {} +}; + +/// Compact version of FunctionHeader. Fits most functions. +/// Has two possible states, indicated by 'overflowed' flag: +/// !overflowed: all fields are valid. +/// overflowed: only flags and getLargeHeaderOffset() are valid, +/// and at the latter is a FunctionHeader. +/// Note that msvc and compatible compilers will not put bitfields +/// of the same type in the same memory, so don't mix uint8_t and +/// uint32_t if you want them packed next to each other. + +struct SmallFuncHeader { +// Use the store_type since SmallFuncHeader attempts to minimize storage. +#define DECLARE_BITFIELD(api_type, store_type, name, bits) \ + store_type name : bits; + FUNC_HEADER_FIELDS(DECLARE_BITFIELD) +#undef DECLARE_BITFIELD + + FunctionHeaderFlag flags{}; + + /// Make a small header equivalent to 'large' if all values fit, + /// else set overflowed with large.infoOffset as large's offset. + SmallFuncHeader(const FunctionHeader &large) { + std::memset(this, 0, sizeof(SmallFuncHeader)); // Avoid leaking junk. + flags = large.flags; +#define CHECK_COPY_FIELD(api_type, store_type, name, bits) \ + if (large.name > (1U << bits) - 1) { \ + setLargeHeaderOffset(large.infoOffset); \ + return; \ + } \ + name = large.name; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + FUNC_HEADER_FIELDS(CHECK_COPY_FIELD) +#pragma GCC diagnostic pop +#undef CHECK_COPY_FIELD + assert(!flags.overflowed); + } + + void setLargeHeaderOffset(uint32_t largeHeaderOffset) { + flags.overflowed = true; + // Can use any fields to store the large offset; pick two big ones. + offset = largeHeaderOffset & 0xffff; + infoOffset = largeHeaderOffset >> 16; + } + + uint32_t getLargeHeaderOffset() const { + assert(flags.overflowed); + return (infoOffset << 16) | offset; + } +}; + +// Sizes of file and function headers are tuned for good cache line packing. +// If you change their size, try to avoid headers crossing cache lines. +static_assert( + sizeof(BytecodeFileHeader) % 32 == 0, + "BytecodeFileHeader size should be cache friendly"); + +static_assert( + 32 % sizeof(SmallFuncHeader) == 0, + "SmallFuncHeader size should be cache friendly"); + +struct ExceptionHandlerTableHeader { + uint32_t count; +}; + +/// We need HBCExceptionHandlerInfo other than using ExceptionHandlerInfo +/// directly because we don't need depth in HBC. +struct HBCExceptionHandlerInfo { + uint32_t start; + uint32_t end; + uint32_t target; +}; + +// The size of the file table and debug data. +struct DebugInfoHeader { + // Number of filenames stored in the table. + uint32_t filenameCount; + // Bytes in the filename storage contents. + uint32_t filenameStorageSize; + + // Count of the file table. + uint32_t fileRegionCount; + // Byte offset in the debug data for the scope descriptors. + uint32_t scopeDescDataOffset; + // Byte offset in the debug data for the textified callee table. + uint32_t textifiedCalleeOffset; + // Byte offset in the debug data for the string table. + uint32_t stringTableOffset; + // Size in bytes of the debug data. + uint32_t debugDataSize; +}; + +// The string id of files for given offsets in debug info. +struct DebugFileRegion { + uint32_t fromAddress; + uint32_t filenameId; + uint32_t sourceMappingUrlId; +}; + +LLVM_PACKED_END + +/// Visit each segment in a bytecode file in order. +/// This function defines the order of the bytecode file segments. +template +void visitBytecodeSegmentsInOrder(Visitor &visitor) { + visitor.visitFunctionHeaders(); + visitor.visitStringKinds(); + visitor.visitIdentifierHashes(); + visitor.visitSmallStringTable(); + visitor.visitOverflowStringTable(); + visitor.visitStringStorage(); + visitor.visitArrayBuffer(); + visitor.visitObjectKeyBuffer(); + visitor.visitObjectValueBuffer(); + visitor.visitBigIntTable(); + visitor.visitBigIntStorage(); + visitor.visitRegExpTable(); + visitor.visitRegExpStorage(); + visitor.visitCJSModuleTable(); + visitor.visitFunctionSourceTable(); +} + +/// BytecodeFileFields represents direct byte-level access to the structured +/// fields of a bytecode file, providing pointers and ArrayRefs referencing +/// directly into the buffer. Note some portions of the bytecode file are less +/// structured, such as the function info section; these are not exposed here. +/// Most clients will want to use an immutable BytecodeFileFields, which may be +/// initialized from a read-only buffer. Tools that want to modify the fields +/// in-place may initialize with Mutable=true. +template +struct BytecodeFileFields { + template + using Pointer = typename std::conditional::type; + + template + using Array = typename std:: + conditional, llvh::ArrayRef>::type; + + /// The file header. + Pointer header{nullptr}; + + /// List of function headers. Some of these may be overflow headers. + Array functionHeaders; + + /// The list of short string table entries. + Array stringTableEntries{}; + + /// Run-length encoding representing the kinds of strings in the table. + Array stringKinds{}; + + /// The list of identifier hashes. + Array identifierHashes{}; + + /// The list of overflowed string table entries. + Array stringTableOverflowEntries{}; + + /// The character buffer used for string storage. + Array stringStorage; + + /// Buffer for array literals. + Array arrayBuffer; + + /// Buffer for object keys. + Array objKeyBuffer; + + /// Buffer for object values. + Array objValueBuffer; + + /// List of bigint literals. + Array bigIntTable; + + /// Storage for bigint bytecode. + Array bigIntStorage; + + /// List of regexp literals. + Array regExpTable; + + /// Storage for regexp bytecode. + Array regExpStorage; + + /// List of CJS modules. + Array> cjsModuleTable; + + /// List of resolved CJS modules. + Array> cjsModuleTableStatic; + + /// List of function source table entries. + Array> functionSourceTable; + + /// Populate bytecode file fields from a buffer. The fields will point + /// directly into the buffer and it is the caller's responsibility to ensure + /// the result does not outlive the buffer. + /// \p form contains the expected bytecode form (Execution or Delta). + /// \return true on success, false on + /// failure, in which case an error is returned by reference. + bool populateFromBuffer( + Array bytes, + std::string *outError, + BytecodeForm form = BytecodeForm::Execution); +}; + +using ConstBytecodeFileFields = BytecodeFileFields; +using MutableBytecodeFileFields = BytecodeFileFields; + +} // namespace hbc +} // namespace hermes + +#endif // HERMES_BCGEN_HBC_BYTECODEFILEFORMAT_H diff --git a/hbctool/hbc/hbc94/raw/BytecodeList.def b/hbctool/hbc/hbc94/raw/BytecodeList.def new file mode 100644 index 0000000..6a90f91 --- /dev/null +++ b/hbctool/hbc/hbc94/raw/BytecodeList.def @@ -0,0 +1,887 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// Define default versions of all macros used. +#ifndef DEFINE_OPERAND_TYPE +#define DEFINE_OPERAND_TYPE(...) +#endif +#ifndef DEFINE_OPCODE_0 +#define DEFINE_OPCODE_0(name) DEFINE_OPCODE(name) +#endif +#ifndef DEFINE_OPCODE_1 +#define DEFINE_OPCODE_1(name, ...) DEFINE_OPCODE(name) +#endif +#ifndef DEFINE_OPCODE_2 +#define DEFINE_OPCODE_2(name, ...) DEFINE_OPCODE(name) +#endif +#ifndef DEFINE_OPCODE_3 +#define DEFINE_OPCODE_3(name, ...) DEFINE_OPCODE(name) +#endif +#ifndef DEFINE_OPCODE_4 +#define DEFINE_OPCODE_4(name, ...) DEFINE_OPCODE(name) +#endif +#ifndef DEFINE_OPCODE_5 +#define DEFINE_OPCODE_5(name, ...) DEFINE_OPCODE(name) +#endif +#ifndef DEFINE_OPCODE_6 +#define DEFINE_OPCODE_6(name, ...) DEFINE_OPCODE(name) +#endif +#ifndef DEFINE_OPCODE +#define DEFINE_OPCODE(...) +#endif +#ifndef DEFINE_JUMP_LONG_VARIANT +#define DEFINE_JUMP_LONG_VARIANT(...) +#endif +#ifndef DEFINE_RET_TARGET +#define DEFINE_RET_TARGET(...) +#endif +#ifndef ASSERT_EQUAL_LAYOUT1 +#define ASSERT_EQUAL_LAYOUT1(a, b) +#endif +#ifndef ASSERT_EQUAL_LAYOUT2 +#define ASSERT_EQUAL_LAYOUT2(a, b) +#endif +#ifndef ASSERT_EQUAL_LAYOUT3 +#define ASSERT_EQUAL_LAYOUT3(a, b) +#endif +#ifndef ASSERT_EQUAL_LAYOUT4 +#define ASSERT_EQUAL_LAYOUT4(a, b) +#endif +#ifndef ASSERT_MONOTONE_INCREASING +#define ASSERT_MONOTONE_INCREASING(first, ...) +#endif +#ifndef OPERAND_BIGINT_ID +#define OPERAND_BIGINT_ID(name, operandNumber) +#endif +#ifndef OPERAND_FUNCTION_ID +#define OPERAND_FUNCTION_ID(name, operandNumber) +#endif +#ifndef OPERAND_STRING_ID +#define OPERAND_STRING_ID(name, operandNumber) +#endif + +DEFINE_OPERAND_TYPE(Reg8, uint8_t) +DEFINE_OPERAND_TYPE(Reg32, uint32_t) +DEFINE_OPERAND_TYPE(UInt8, uint8_t) +DEFINE_OPERAND_TYPE(UInt16, uint16_t) +DEFINE_OPERAND_TYPE(UInt32, uint32_t) +DEFINE_OPERAND_TYPE(Addr8, int8_t) +DEFINE_OPERAND_TYPE(Addr32, int32_t) +DEFINE_OPERAND_TYPE(Imm32, int32_t) +DEFINE_OPERAND_TYPE(Double, double) + +/// Unreachable opcode for stubs and similar. This is first so that it has the +/// value zero. +DEFINE_OPCODE_0(Unreachable) + +/// Create an object from a static map of values, as for var={'a': 3}. +/// Any non-constant elements can be set afterwards with PutOwnByInd. +/// Arg1 is the destination. +/// Arg2 is a preallocation size hint. +/// Arg3 is the number of static elements. +/// Arg4 is the index in the object key buffer table. +/// Arg5 is the index in the object val buffer table. +DEFINE_OPCODE_5(NewObjectWithBuffer, Reg8, UInt16, UInt16, UInt16, UInt16) +DEFINE_OPCODE_5(NewObjectWithBufferLong, Reg8, UInt16, UInt16, UInt32, UInt32) + +/// Create a new, empty Object using the built-in constructor (regardless of +/// whether it was overridden). +/// Arg1 = {} +DEFINE_OPCODE_1(NewObject, Reg8) + +/// Create a new empty Object with the specified parent. If the parent is +/// null, no parent is used. If the parent is not an object, the builtin +/// Object.prototype is used. Otherwise the parent itself is used. +/// Arg1 = the created object +/// Arg2 = the parent. +DEFINE_OPCODE_2(NewObjectWithParent, Reg8, Reg8) + +/// Create an array from a static list of values, as for var=[1,2,3]. +/// Any non-constant elements can be set afterwards with PutOwnByIndex. +/// Arg1 is the destination. +/// Arg2 is a preallocation size hint. +/// Arg3 is the number of static elements. +/// Arg4 is the index in the array buffer table. +DEFINE_OPCODE_4(NewArrayWithBuffer, Reg8, UInt16, UInt16, UInt16) +DEFINE_OPCODE_4(NewArrayWithBufferLong, Reg8, UInt16, UInt16, UInt32) + +/// Create a new array of a given size. +/// Arg1 = new Array(Arg2) +DEFINE_OPCODE_2(NewArray, Reg8, UInt16) + +/// Arg1 = Arg2 (Register copy) +DEFINE_OPCODE_2(Mov, Reg8, Reg8) + +/// Arg1 = Arg2 (Register copy, long index) +DEFINE_OPCODE_2(MovLong, Reg32, Reg32) + +/// Arg1 = -Arg2 (Unary minus) +DEFINE_OPCODE_2(Negate, Reg8, Reg8) + +/// Arg1 = !Arg2 (Boolean not) +DEFINE_OPCODE_2(Not, Reg8, Reg8) + +/// Arg1 = ~Arg2 (Bitwise not) +DEFINE_OPCODE_2(BitNot, Reg8, Reg8) + +/// Arg1 = typeof Arg2 (JS typeof) +DEFINE_OPCODE_2(TypeOf, Reg8, Reg8) + +/// Arg1 = Arg2 == Arg3 (JS equality) +DEFINE_OPCODE_3(Eq, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 === Arg3 (JS strict equality) +DEFINE_OPCODE_3(StrictEq, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 != Arg3 (JS inequality) +DEFINE_OPCODE_3(Neq, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 !== Arg3 (JS strict inequality) +DEFINE_OPCODE_3(StrictNeq, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 < Arg3 (JS less-than) +DEFINE_OPCODE_3(Less, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 <= Arg3 (JS less-than-or-equals) +DEFINE_OPCODE_3(LessEq, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 > Arg3 (JS greater-than) +DEFINE_OPCODE_3(Greater, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 >= Arg3 (JS greater-than-or-equals) +DEFINE_OPCODE_3(GreaterEq, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 + Arg3 (JS addition/concatenation) +DEFINE_OPCODE_3(Add, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 + Arg3 (Numeric addition, skips number check) +DEFINE_OPCODE_3(AddN, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 * Arg3 (JS multiplication) +DEFINE_OPCODE_3(Mul, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 * Arg3 (Numeric multiplication, skips number check) +DEFINE_OPCODE_3(MulN, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 / Arg3 (JS division) +DEFINE_OPCODE_3(Div, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 / Arg3 (Numeric division, skips number check) +DEFINE_OPCODE_3(DivN, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 % Arg3 (JS remainder) +DEFINE_OPCODE_3(Mod, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 - Arg3 (JS subtraction) +DEFINE_OPCODE_3(Sub, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 - Arg3 (Numeric subtraction, skips number check) +DEFINE_OPCODE_3(SubN, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 << Arg3 (JS bitshift left) +DEFINE_OPCODE_3(LShift, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 >> Arg3 (JS signed bitshift right) +DEFINE_OPCODE_3(RShift, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 >>> Arg3 (JS unsigned bitshift right) +DEFINE_OPCODE_3(URshift, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 & Arg3 (JS bitwise AND) +DEFINE_OPCODE_3(BitAnd, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 ^ Arg3 (JS bitwise XOR) +DEFINE_OPCODE_3(BitXor, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 | Arg3 (JS bitwise OR) +DEFINE_OPCODE_3(BitOr, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 + 1 (JS increment, skips number check) +DEFINE_OPCODE_2(Inc, Reg8, Reg8) + +/// Arg1 = Arg2 - 1 (JS decrement, skips number check) +DEFINE_OPCODE_2(Dec, Reg8, Reg8) + +/// Check whether Arg2 contains Arg3 in its prototype chain. +/// Note that this is not the same as JS instanceof. +/// Pseudocode: Arg1 = prototypechain(Arg2).contains(Arg3) +DEFINE_OPCODE_3(InstanceOf, Reg8, Reg8, Reg8) + +/// Arg1 = Arg2 in Arg3 (JS relational 'in') +DEFINE_OPCODE_3(IsIn, Reg8, Reg8, Reg8) + +/// Get an environment (scope) from N levels up the stack. +/// 0 is the current environment, 1 is the caller's environment, etc. +DEFINE_OPCODE_2(GetEnvironment, Reg8, UInt8) + +/// Store a value in an environment. +/// StoreNPToEnvironment[L] store a non-pointer value in an environment +/// Arg1 is the environment (as fetched by GetEnvironment). +/// Arg2 is the environment index slot number. +/// Arg3 is the value. +DEFINE_OPCODE_3(StoreToEnvironment, Reg8, UInt8, Reg8) +DEFINE_OPCODE_3(StoreToEnvironmentL, Reg8, UInt16, Reg8) +DEFINE_OPCODE_3(StoreNPToEnvironment, Reg8, UInt8, Reg8) +DEFINE_OPCODE_3(StoreNPToEnvironmentL, Reg8, UInt16, Reg8) + +/// Load a value from an environment. +/// Arg1 is the destination. +/// Arg2 is the environment (as fetched by GetEnvironment). +/// Arg3 is the environment index slot number. +DEFINE_OPCODE_3(LoadFromEnvironment, Reg8, Reg8, UInt8) +DEFINE_OPCODE_3(LoadFromEnvironmentL, Reg8, Reg8, UInt16) + +/// Get the global object (the object in which global variables are stored). +DEFINE_OPCODE_1(GetGlobalObject, Reg8) + +/// Obtain the value of NewTarget from the frame. +/// Arg1 = NewTarget +DEFINE_OPCODE_1(GetNewTarget, Reg8) + +/// Create a new environment, to store values captured by closures. +DEFINE_OPCODE_1(CreateEnvironment, Reg8) + +/// Create a new inner environment, to store values captured by closures. +/// Arg1 is the destination. +/// Arg2 is the parent environment. +/// Arg3 is the number of slots in the environment. +DEFINE_OPCODE_3(CreateInnerEnvironment, Reg8, Reg8, UInt32) + +/// Declare a global variable by string table index. +/// The variable will be set to undefined. +DEFINE_OPCODE_1(DeclareGlobalVar, UInt32) +OPERAND_STRING_ID(DeclareGlobalVar, 1) + +/// Checks if globalThis has a restricted global property with the given name. +/// Arg1 is the name to check. +DEFINE_OPCODE_1(ThrowIfHasRestrictedGlobalProperty, UInt32) +OPERAND_STRING_ID(ThrowIfHasRestrictedGlobalProperty, 1) + +/// Get an object property by string table index. +/// Arg1 = Arg2[stringtable[Arg4]] +/// Arg3 is a cache index used to speed up the above operation. +DEFINE_OPCODE_4(GetByIdShort, Reg8, Reg8, UInt8, UInt8) +DEFINE_OPCODE_4(GetById, Reg8, Reg8, UInt8, UInt16) +DEFINE_OPCODE_4(GetByIdLong, Reg8, Reg8, UInt8, UInt32) +OPERAND_STRING_ID(GetByIdShort, 4) +OPERAND_STRING_ID(GetById, 4) +OPERAND_STRING_ID(GetByIdLong, 4) + +/// Get an object property by string table index, or throw if not found. +/// This is similar to GetById, but intended for use with global variables +/// where Arg2 = GetGlobalObject. +DEFINE_OPCODE_4(TryGetById, Reg8, Reg8, UInt8, UInt16) +DEFINE_OPCODE_4(TryGetByIdLong, Reg8, Reg8, UInt8, UInt32) +OPERAND_STRING_ID(TryGetById, 4) +OPERAND_STRING_ID(TryGetByIdLong, 4) + +/// Set an object property by string index. +/// Arg1[stringtable[Arg4]] = Arg2. +DEFINE_OPCODE_4(PutById, Reg8, Reg8, UInt8, UInt16) +DEFINE_OPCODE_4(PutByIdLong, Reg8, Reg8, UInt8, UInt32) +OPERAND_STRING_ID(PutById, 4) +OPERAND_STRING_ID(PutByIdLong, 4) + +/// Set an object property by string index, or throw if undeclared. +/// This is similar to PutById, but intended for use with global variables +/// where Arg1 = GetGlobalObject. +DEFINE_OPCODE_4(TryPutById, Reg8, Reg8, UInt8, UInt16) +DEFINE_OPCODE_4(TryPutByIdLong, Reg8, Reg8, UInt8, UInt32) +OPERAND_STRING_ID(TryPutById, 4) +OPERAND_STRING_ID(TryPutByIdLong, 4) + +/// Create a new own property on an object. This is similar to PutById, but +/// the destination must be an object, it only deals with own properties, +/// ignoring the prototype chain, and the property must not already be defined. +/// Similarly to PutById, the property name cannot be a valid array index. +/// Arg1 is the destination object, which is known to be an object. +/// Arg2 is the value to write. +/// Arg3 is the string table ID of the property name. +/// Arg1[stringtable[Arg3]] = Arg2 +DEFINE_OPCODE_3(PutNewOwnByIdShort, Reg8, Reg8, UInt8) +DEFINE_OPCODE_3(PutNewOwnById, Reg8, Reg8, UInt16) +DEFINE_OPCODE_3(PutNewOwnByIdLong, Reg8, Reg8, UInt32) +OPERAND_STRING_ID(PutNewOwnByIdShort, 3) +OPERAND_STRING_ID(PutNewOwnById, 3) +OPERAND_STRING_ID(PutNewOwnByIdLong, 3) + +/// Create a new non-enumerable own property on an object. This is the same as +/// PutNewOwnById, but creates the property with different enumerability. +/// Arg1 is the destination object. +/// Arg2 is the value to write. +/// Arg3 is the string table ID of the property name. +/// Arg1[stringtable[Arg3]] = Arg2 +DEFINE_OPCODE_3(PutNewOwnNEById, Reg8, Reg8, UInt16) +DEFINE_OPCODE_3(PutNewOwnNEByIdLong, Reg8, Reg8, UInt32) +OPERAND_STRING_ID(PutNewOwnNEById, 3) +OPERAND_STRING_ID(PutNewOwnNEByIdLong, 3) + +// The "NE" versions must be ordered after the "normal" versions. +ASSERT_MONOTONE_INCREASING( + PutNewOwnByIdShort, + PutNewOwnById, + PutNewOwnByIdLong, + PutNewOwnNEById, + PutNewOwnNEByIdLong) + +/// Assign a value to a constant integer own property which will be created as +/// enumerable. This is used (potentially in conjunction with +/// NewArrayWithBuffer) for arr=[foo,bar] initializations. +/// Arg1[Arg3] = Arg2; +DEFINE_OPCODE_3(PutOwnByIndex, Reg8, Reg8, UInt8) +DEFINE_OPCODE_3(PutOwnByIndexL, Reg8, Reg8, UInt32) + +/// Set an own property identified by value. +/// Arg1 is the destination object. +/// Arg2 is the value to write. +/// Arg3 is the property name. +/// Arg4 : bool -> enumerable. If true, the property is created as enumerable, +/// non-enumerable otherwise. +/// Arg1[Arg3] = Arg2; +DEFINE_OPCODE_4(PutOwnByVal, Reg8, Reg8, Reg8, UInt8) + +/// Delete a property by string table index. +/// Arg1 = delete Arg2[stringtable[Arg3]] +DEFINE_OPCODE_3(DelById, Reg8, Reg8, UInt16) +DEFINE_OPCODE_3(DelByIdLong, Reg8, Reg8, UInt32) +OPERAND_STRING_ID(DelById, 3) +OPERAND_STRING_ID(DelByIdLong, 3) + +/// Get a property by value. Constants string values should instead use GetById. +/// Arg1 = Arg2[Arg3] +DEFINE_OPCODE_3(GetByVal, Reg8, Reg8, Reg8) + +/// Set a property by value. Constant string values should instead use GetById +/// (unless they are array indices according to ES5.1 section 15.4, in which +/// case this is still the right opcode). +/// Arg1[Arg2] = Arg3 +DEFINE_OPCODE_3(PutByVal, Reg8, Reg8, Reg8) + +/// Delete a property by value (when the value is not known at compile time). +/// Arg1 = delete Arg2[Arg3] +DEFINE_OPCODE_3(DelByVal, Reg8, Reg8, Reg8) + +/// Add a getter and a setter for a property by value. +/// Object.defineProperty(Arg1, Arg2, { get: Arg3, set: Arg4 }). +/// Arg1 is the target object which will have a property defined. +/// Arg2 is the property name +/// Arg3 is the getter closure or undefined +/// Arg4 is the setter closure or undefined +/// Arg5 : boolean - if true, the property will be enumerable. +DEFINE_OPCODE_5(PutOwnGetterSetterByVal, Reg8, Reg8, Reg8, Reg8, UInt8) + +/// Get the list of properties from an object to implement for..in loop. +/// Returns Arg1, which is the register that holds array of properties. +/// Returns Undefined if the object is null/undefined. +/// Arg2 is the register that holds the object. +/// Arg3 is the register that holds the iterating index. +/// Arg4 is the register that holds the size of the property list. +DEFINE_OPCODE_4(GetPNameList, Reg8, Reg8, Reg8, Reg8) + +/// Get the next property in the for..in iterator. +/// Returns Arg1, which is the next property. Undefined if unavailable. +/// Arg2 is the register that holds array of properties. +/// Arg3 is the register that holds the object. +/// Arg4 is the register that holds the iterating index. +/// Arg5 is the register that holds the size of the property list. +DEFINE_OPCODE_5(GetNextPName, Reg8, Reg8, Reg8, Reg8, Reg8) + +///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +/// NOTE: the ordering of Call, CallN, Construct, CallLong, ConstructLong is +/// important. The "long" versions are defined after the "short" versions. +/// It is important for efficiency that all Calls have contiguous opcodes, with +/// wider instructions appearing after narrower ones. + +/// Call a function. +/// Arg1 is the destination of the return value. +/// Arg2 is the closure to invoke. +/// Arg3 is the number of arguments, assumed to be found in reverse order +/// from the end of the current frame. +DEFINE_OPCODE_3(Call, Reg8, Reg8, UInt8) +DEFINE_RET_TARGET(Call) + +/// Call a constructor, with semantics identical to Call. +/// Arg1 is the destination of the return value. +/// Arg2 is the closure to invoke. +/// Arg3 is the number of arguments, assumed to be found in reverse order +/// from the end of the current frame. The first argument 'this' +/// is assumed to be created with CreateThis. +DEFINE_OPCODE_3(Construct, Reg8, Reg8, UInt8) +DEFINE_RET_TARGET(Construct) + +/// Call a function with one arg. +/// Arg1 is the destination of the return value. +/// Arg2 is the closure to invoke. +/// Arg3 is the first argument. +DEFINE_OPCODE_3(Call1, Reg8, Reg8, Reg8) +DEFINE_RET_TARGET(Call1) + +/// Call a function directly without a closure. +/// Arg1 is the destination of the return value. +/// Arg2 is the number of arguments, assumed to be found in reverse order +/// from the end of the current frame. The first argument 'this' +/// is assumed to be created with CreateThis. +/// Arg3 is index in the function table. +/// Note that we expect the variable-sized argument to be last. +DEFINE_OPCODE_3(CallDirect, Reg8, UInt8, UInt16) +OPERAND_FUNCTION_ID(CallDirect, 3) +DEFINE_RET_TARGET(CallDirect) + +/// Call a function with two args. +/// Arg1 is the destination of the return value. +/// Arg2 is the closure to invoke. +/// Arg3 is the first argument. +/// Arg4 is the second argument. +DEFINE_OPCODE_4(Call2, Reg8, Reg8, Reg8, Reg8) +DEFINE_RET_TARGET(Call2) + +/// Call a function with three args. +/// Arg1 is the destination of the return value. +/// Arg2 is the closure to invoke. +/// Arg3 is the first argument. +/// Arg4 is the second argument. +/// Arg5 is the third argument. +DEFINE_OPCODE_5(Call3, Reg8, Reg8, Reg8, Reg8, Reg8) +DEFINE_RET_TARGET(Call3) + +/// Call a function with four args. +/// Arg1 is the destination of the return value. +/// Arg2 is the closure to invoke. +/// Arg3 is the first argument. +/// Arg4 is the second argument. +/// Arg5 is the third argument. +/// Arg6 is the fourth argument. +DEFINE_OPCODE_6(Call4, Reg8, Reg8, Reg8, Reg8, Reg8, Reg8) +DEFINE_RET_TARGET(Call4) + +/// Identical to Call, but allowing more arguments. +DEFINE_OPCODE_3(CallLong, Reg8, Reg8, UInt32) +DEFINE_RET_TARGET(CallLong) + +/// Identical to Construct, but allowing more arguments. +DEFINE_OPCODE_3(ConstructLong, Reg8, Reg8, UInt32) +DEFINE_RET_TARGET(ConstructLong) + +/// Identical to CallDirect, but the function index is 32-bit. +DEFINE_OPCODE_3(CallDirectLongIndex, Reg8, UInt8, UInt32) +DEFINE_RET_TARGET(CallDirectLongIndex) + +// Enforce the order. +ASSERT_MONOTONE_INCREASING( + Call, + Construct, + Call1, + CallDirect, + Call2, + Call3, + Call4, + CallLong, + ConstructLong, + CallDirectLongIndex) + +/// Call a builtin function. +/// Note this is NOT marked as a Ret target, because the callee is native +/// and therefore never JS. +/// Arg1 is the destination of the return value. +/// Arg2 is the builtin number. +/// Arg3 is the number of arguments, assumed to be found in reverse order +/// from the end of the current frame. +/// thisArg is set to "undefined". +DEFINE_OPCODE_3(CallBuiltin, Reg8, UInt8, UInt8) + +/// Call a builtin function. +/// Note this is NOT marked as a Ret target, because the callee is native +/// and therefore never JS. +/// Arg1 is the destination of the return value. +/// Arg2 is the builtin number. +/// Arg3 is the number of arguments, assumed to be found in reverse order +/// from the end of the current frame. +/// thisArg is set to "undefined". +DEFINE_OPCODE_3(CallBuiltinLong, Reg8, UInt8, UInt32) + +/// Get a closure from a builtin function. +/// Arg1 is the destination of the return value. +/// Arg2 is the builtin number. +DEFINE_OPCODE_2(GetBuiltinClosure, Reg8, UInt8) + +/// +///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +/// Return a value from the current function. +/// return Arg1; +DEFINE_OPCODE_1(Ret, Reg8) + +/// Catch an exception (the first instruction in an exception handler). +/// } catch(Arg1) { +DEFINE_OPCODE_1(Catch, Reg8) + +/// ES6 18.2.1.1 PerformEval(Arg2, evalRealm, strictCaller=true, direct=true) +/// Arg1 is the destination of the return value. +/// Arg2 is the value to eval. +DEFINE_OPCODE_2(DirectEval, Reg8, Reg8) + +/// Throw an exception. +/// throw Arg1; +DEFINE_OPCODE_1(Throw, Reg8) + +/// If Arg2 is Empty, throw ReferenceError, otherwise move it into Arg1. +/// Arg1 is the destination of the return value +/// Arg2 is the value to check +DEFINE_OPCODE_2(ThrowIfEmpty, Reg8, Reg8) + +/// Implementation dependent debugger action. +DEFINE_OPCODE_0(Debugger) + +/// Fast check for an async interrupt request. +DEFINE_OPCODE_0(AsyncBreakCheck) + +/// Define a profile point. +/// Arg1 is the function local profile point index. The first one will have the +/// largest index. If there are more than 2^16 profile points in the function, +/// all the overflowed profile points have index zero. +DEFINE_OPCODE_1(ProfilePoint, UInt16) + +/// Create a closure. +/// Arg1 is the register in which to store the closure. +/// Arg2 is the current environment as loaded by GetEnvironment 0. +/// Arg3 is index in the function table. +DEFINE_OPCODE_3(CreateClosure, Reg8, Reg8, UInt16) +DEFINE_OPCODE_3(CreateClosureLongIndex, Reg8, Reg8, UInt32) +OPERAND_FUNCTION_ID(CreateClosure, 3) +OPERAND_FUNCTION_ID(CreateClosureLongIndex, 3) + +/// Create a closure for a GeneratorFunction. +/// Arg1 is the register in which to store the closure. +/// Arg2 is the current environment as loaded by GetEnvironment 0. +/// Arg3 is index in the function table. +DEFINE_OPCODE_3(CreateGeneratorClosure, Reg8, Reg8, UInt16) +DEFINE_OPCODE_3(CreateGeneratorClosureLongIndex, Reg8, Reg8, UInt32) +OPERAND_FUNCTION_ID(CreateGeneratorClosure, 3) +OPERAND_FUNCTION_ID(CreateGeneratorClosureLongIndex, 3) + +/// Create a closure for an AsyncFunction. +/// Arg1 is the register in which to store the closure. +/// Arg2 is the current environment as loaded by GetEnvironment 0. +/// Arg3 is index in the function table. +DEFINE_OPCODE_3(CreateAsyncClosure, Reg8, Reg8, UInt16) +DEFINE_OPCODE_3(CreateAsyncClosureLongIndex, Reg8, Reg8, UInt32) +OPERAND_FUNCTION_ID(CreateAsyncClosure, 3) +OPERAND_FUNCTION_ID(CreateAsyncClosureLongIndex, 3) + +/// Allocate an empty, uninitialized object (immediately before a constructor). +/// Arg1 is the destination register. +/// Arg2 is the prototype to assign it. +/// Arg3 is the constructor closure that will be used*. +/// * To allow internal constructors to have special objects allocated. +DEFINE_OPCODE_3(CreateThis, Reg8, Reg8, Reg8) + +/// Choose the result of a constructor: 'this' or a returned object. +/// Arg1 is the result. +/// Arg2 is the 'this' object used for the constructor. +/// Arg3 is the constructor's return value. +/// Arg1 = Arg3 instanceof Object ? Arg3 : Arg2 +DEFINE_OPCODE_3(SelectObject, Reg8, Reg8, Reg8) + +/// Load a function parameter by index. Starts at 0 with 'this'. +/// Arg1 = Arg2 == 0 ? this : arguments[Arg2 - 1]; +DEFINE_OPCODE_2(LoadParam, Reg8, UInt8) + +/// Like LoadParam, but allows accessing arguments >= 255. +DEFINE_OPCODE_2(LoadParamLong, Reg8, UInt32) + +/// Load a constant integer value. +DEFINE_OPCODE_2(LoadConstUInt8, Reg8, UInt8) +DEFINE_OPCODE_2(LoadConstInt, Reg8, Imm32) + +/// Load a constant double value. +DEFINE_OPCODE_2(LoadConstDouble, Reg8, Double) + +/// Load a constant BigInt value by bigint table index. +DEFINE_OPCODE_2(LoadConstBigInt, Reg8, UInt16) +DEFINE_OPCODE_2(LoadConstBigIntLongIndex, Reg8, UInt32) +OPERAND_BIGINT_ID(LoadConstBigInt, 2) +OPERAND_BIGINT_ID(LoadConstBigIntLongIndex, 2) + +/// Load a constant string value by string table index. +DEFINE_OPCODE_2(LoadConstString, Reg8, UInt16) +DEFINE_OPCODE_2(LoadConstStringLongIndex, Reg8, UInt32) +OPERAND_STRING_ID(LoadConstString, 2) +OPERAND_STRING_ID(LoadConstStringLongIndex, 2) + +/// Load common constants. +DEFINE_OPCODE_1(LoadConstEmpty, Reg8) +DEFINE_OPCODE_1(LoadConstUndefined, Reg8) +DEFINE_OPCODE_1(LoadConstNull, Reg8) +DEFINE_OPCODE_1(LoadConstTrue, Reg8) +DEFINE_OPCODE_1(LoadConstFalse, Reg8) +DEFINE_OPCODE_1(LoadConstZero, Reg8) + +/// Coerce a value assumed to contain 'this' to an object using non-strict +/// mode rules. Primitives are boxed, \c null or \c undefed produce the global +/// object. +/// Arg1 = coerce_to_object(Arg2) +DEFINE_OPCODE_2(CoerceThisNS, Reg8, Reg8) + +/// Obtain the raw \c this value and coerce it to an object. Equivalent to: +/// \code +/// LoadParam Arg1, #0 +/// CoerceThisNS Arg1, Arg1 +/// \endcode +DEFINE_OPCODE_1(LoadThisNS, Reg8) + +/// Convert a value to a number. +/// Arg1 = Arg2 - 0 +DEFINE_OPCODE_2(ToNumber, Reg8, Reg8) +/// Convert a value to a numberic. +/// Arg1 = ToNumeric(Arg2) +DEFINE_OPCODE_2(ToNumeric, Reg8, Reg8) +/// Convert a value to a 32-bit signed integer. +/// Arg1 = Arg2 | 0 +DEFINE_OPCODE_2(ToInt32, Reg8, Reg8) + +/// Convert a value to a string as if evaluating the expression: +/// Arg1 = "" + Arg2 +/// In practice this means +/// Arg1 = ToString(ToPrimitive(Arg2, PreferredType::NONE)) +/// with ToPrimitive (ES5.1 9.1) and ToString (ES5.1 9.8). +DEFINE_OPCODE_2(AddEmptyString, Reg8, Reg8) + +// `arguments` opcodes all work with a lazy register that contains either +// undefined or a reified array. On the first ReifyArguments, the register +// will be populated and the rest of the instruction will access it directly. +// This is an optimization to allow arguments[i] to just load an argument +// instead of doing a full array allocation and property lookup. + +/// Get a property of the 'arguments' array by value. +/// Arg1 is the result. +/// Arg2 is the index. +/// Arg3 is the lazy loaded register. +/// Arg1 = arguments[Arg2] +DEFINE_OPCODE_3(GetArgumentsPropByVal, Reg8, Reg8, Reg8) + +/// Get the length of the 'arguments' array. +/// Arg1 is the result. +/// Arg2 is the lazy loaded register. +/// Arg1 = arguments.length +DEFINE_OPCODE_2(GetArgumentsLength, Reg8, Reg8) + +/// Create an actual 'arguments' array, if get-by-index and length isn't enough. +/// Arg1 is the lazy loaded register, which afterwards will contain a proper +/// object that can be used by non-*Arguments* opcodes like Return. +DEFINE_OPCODE_1(ReifyArguments, Reg8) + +/// Create a regular expression. +/// Arg1 is the result. +/// Arg2 is the string index of the pattern. +/// Arg3 is the string index of the flags. +/// Arg4 is the regexp bytecode index in the regexp table. +DEFINE_OPCODE_4(CreateRegExp, Reg8, UInt32, UInt32, UInt32) +OPERAND_STRING_ID(CreateRegExp, 2) +OPERAND_STRING_ID(CreateRegExp, 3) + +/// Jump table switch - using a table of offset, jump to the offset of the given +/// input or to the default block if out of range (or not right type) +/// Arg 1 is the value to be branched upon +/// Arg 2 is the relative offset of the jump table to be used by this +/// instruction. Jump tables are appended to the bytecode. Arg 3 is the relative +/// offset for the "default" jump. Arg 4 is the unsigned min value, if arg 1 is +/// less than this value jmp to +/// default block +/// Arg 5 is the unsigned max value, if arg 1 is greater than this value jmp to +/// default block. +/// +/// Given the above, the jump table entry for a given value (that is in range) +/// is located at offset ip + arg2 + arg1 - arg4. We subtract arg4 to avoid +/// wasting space when compiling denses switches that do not start at zero. Note +/// that Arg2 is *unaligned* it is dynamically aligned at runtime. +DEFINE_OPCODE_5(SwitchImm, Reg8, UInt32, Addr32, UInt32, UInt32) + +/// Start the generator by jumping to the next instruction to begin. +/// Restore the stack frame if this generator has previously been suspended. +DEFINE_OPCODE_0(StartGenerator) + +/// Resume generator by performing one of the following user-requested actions: +/// - next(val): Set Arg1 to val, Arg2 to false, run next instruction +/// - return(val): Set Arg1 to val, Arg2 to true, run next instruction +/// - throw(val): Throw val as an error +/// Arg1 is the result provided by the user. +/// Arg2 is a boolean which is true if the user requested a return(). +DEFINE_OPCODE_2(ResumeGenerator, Reg8, Reg8) + +/// Set the generator status to complete, but do not return. +DEFINE_OPCODE_0(CompleteGenerator) + +/// Create a generator. +/// Arg1 is the register in which to store the generator. +/// Arg2 is the current environment as loaded by GetEnvironment 0. +/// Arg3 is index in the function table. +DEFINE_OPCODE_3(CreateGenerator, Reg8, Reg8, UInt16) +DEFINE_OPCODE_3(CreateGeneratorLongIndex, Reg8, Reg8, UInt32) +OPERAND_FUNCTION_ID(CreateGenerator, 3) +OPERAND_FUNCTION_ID(CreateGeneratorLongIndex, 3) + +/// Arg1 [out] is the result iterator or index. +/// Arg2 [in/out] is the source. Output for either the source or next method. +/// If source is an array with an unmodified [Symbol.iterator], the result is +/// 0. Else the result is source[Symbol.iterator] and the output is the .next() +/// method on the iterator. +/// See IR.md for IteratorBeginInst. +DEFINE_OPCODE_2(IteratorBegin, Reg8, Reg8) + +/// Arg1 [out] is the result, or undefined if done. +/// Arg2 [in/out] is the iterator or index. +/// Arg2 [in] is the source or the next method. +/// If iterator is undefined, result = undefined. +/// If iterator is a number: +/// If iterator is less than source.length, return source[iterator++] +/// Else iterator = undefined and result = undefined +/// Else: +/// n = iterator.next() +/// If n.done, iterator = undefined and result = undefined. +/// Else result = n.value +/// See IR.md for IteratorNextInst. +DEFINE_OPCODE_3(IteratorNext, Reg8, Reg8, Reg8) + +/// Arg1 [in] is the iterator or array index. +/// Arg2 is a bool indicating whether to ignore the inner exception. +/// If the iterator is an object, call iterator.return(). +/// If Arg2 is true, ignore exceptions which are thrown by iterator.return(). +/// See IR.md for IteratorCloseInst. +DEFINE_OPCODE_2(IteratorClose, Reg8, UInt8) + +// Jump instructions must be defined through the following DEFINE_JUMP macros. +// The numeric suffix indicates number of operands the instruction takes. +// The macros will automatically generate two opcodes for each definition, +// one short jump that takes Addr8 as target and one long jump that takes +// Addr32 as target. The address is relative to the offset of the instruction. +#define DEFINE_JUMP_1(name) \ + DEFINE_OPCODE_1(name, Addr8) \ + DEFINE_OPCODE_1(name##Long, Addr32) \ + DEFINE_JUMP_LONG_VARIANT(name, name##Long) + +#define DEFINE_JUMP_2(name) \ + DEFINE_OPCODE_2(name, Addr8, Reg8) \ + DEFINE_OPCODE_2(name##Long, Addr32, Reg8) \ + DEFINE_JUMP_LONG_VARIANT(name, name##Long) + +#define DEFINE_JUMP_3(name) \ + DEFINE_OPCODE_3(name, Addr8, Reg8, Reg8) \ + DEFINE_OPCODE_3(name##Long, Addr32, Reg8, Reg8) \ + DEFINE_JUMP_LONG_VARIANT(name, name##Long) + +/// Unconditional branch to Arg1. +DEFINE_JUMP_1(Jmp) +/// Conditional branches to Arg1 based on Arg2. +DEFINE_JUMP_2(JmpTrue) +DEFINE_JUMP_2(JmpFalse) +/// Jump if the value is undefined. +DEFINE_JUMP_2(JmpUndefined) +/// Save the provided value, yield, and signal the VM to restart execution +/// at the provided target. +DEFINE_JUMP_1(SaveGenerator) + +/// Conditional branches to Arg1 based on Arg2 and Arg3. +/// The *N branches assume numbers and are illegal for other types. + +/// Not conditionals are required for NaN comparisons +/// Since we want to be able to reorder targets to allow for fall-throughs, +/// we need to be able to say "jump when not less than to BB2" instead of +/// "jump when less than to BB1". +/// Since NaN comparisons always return false, "not less" != "greater or equal" +DEFINE_JUMP_3(JLess) +DEFINE_JUMP_3(JNotLess) +DEFINE_JUMP_3(JLessN) +DEFINE_JUMP_3(JNotLessN) +DEFINE_JUMP_3(JLessEqual) +DEFINE_JUMP_3(JNotLessEqual) +DEFINE_JUMP_3(JLessEqualN) +DEFINE_JUMP_3(JNotLessEqualN) +DEFINE_JUMP_3(JGreater) +DEFINE_JUMP_3(JNotGreater) +DEFINE_JUMP_3(JGreaterN) +DEFINE_JUMP_3(JNotGreaterN) +DEFINE_JUMP_3(JGreaterEqual) +DEFINE_JUMP_3(JNotGreaterEqual) +DEFINE_JUMP_3(JGreaterEqualN) +DEFINE_JUMP_3(JNotGreaterEqualN) +DEFINE_JUMP_3(JEqual) +DEFINE_JUMP_3(JNotEqual) +DEFINE_JUMP_3(JStrictEqual) +DEFINE_JUMP_3(JStrictNotEqual) + +#ifdef HERMES_RUN_WASM +/// Arg1 = Arg2 + Arg3 (32-bit integer addition) +DEFINE_OPCODE_3(Add32, Reg8, Reg8, Reg8) +/// Arg1 = Arg2 - Arg3 (32-bit integer subtraction) +DEFINE_OPCODE_3(Sub32, Reg8, Reg8, Reg8) +/// Arg1 = Arg2 * Arg3 (32-bit integer multiplication) +DEFINE_OPCODE_3(Mul32, Reg8, Reg8, Reg8) +/// Arg1 = Arg2 / Arg3 (32-bit signed integer division) +DEFINE_OPCODE_3(Divi32, Reg8, Reg8, Reg8) +/// Arg1 = Arg2 / Arg3 (32-bit unsigned integer division) +DEFINE_OPCODE_3(Divu32, Reg8, Reg8, Reg8) + +/// Arg1 = HEAP8[Arg3] (load signed 8-bit integer) +DEFINE_OPCODE_3(Loadi8, Reg8, Reg8, Reg8) +/// Arg1 = HEAPU8[Arg3] (load unsigned 8-bit integer) +DEFINE_OPCODE_3(Loadu8, Reg8, Reg8, Reg8) +/// Arg1 = HEAP16[Arg3 >> 1] (load signed 16-bit integer) +DEFINE_OPCODE_3(Loadi16, Reg8, Reg8, Reg8) +/// Arg1 = HEAPU16[Arg3 >> 1] (load unsigned 16-bit integer) +DEFINE_OPCODE_3(Loadu16, Reg8, Reg8, Reg8) +/// Arg1 = HEAP32[Arg3 >> 2] (load signed 32-bit integer) +DEFINE_OPCODE_3(Loadi32, Reg8, Reg8, Reg8) +/// Arg1 = HEAPU32[Arg3 >> 2] (load unsigned 32-bit integer) +DEFINE_OPCODE_3(Loadu32, Reg8, Reg8, Reg8) + +/// HEAP8[Arg2] = Arg3 (store signed or unsigned 8-bit integer) +DEFINE_OPCODE_3(Store8, Reg8, Reg8, Reg8) +/// HEAP16[Arg2] = Arg3 (store signed or unsigned 16-bit integer) +DEFINE_OPCODE_3(Store16, Reg8, Reg8, Reg8) +/// HEAP32[Arg2] = Arg3 (store signed or unsigned 32-bit integer) +DEFINE_OPCODE_3(Store32, Reg8, Reg8, Reg8) +#endif + +// Implementations can rely on the following pairs of instructions having the +// same number and type of operands. +ASSERT_EQUAL_LAYOUT3(Call, Construct) +ASSERT_EQUAL_LAYOUT4(GetById, TryGetById) +ASSERT_EQUAL_LAYOUT4(PutById, TryPutById) +ASSERT_EQUAL_LAYOUT3(PutNewOwnById, PutNewOwnNEById) +ASSERT_EQUAL_LAYOUT3(PutNewOwnByIdLong, PutNewOwnNEByIdLong) +ASSERT_EQUAL_LAYOUT3(Add, AddN) +ASSERT_EQUAL_LAYOUT3(Sub, SubN) +ASSERT_EQUAL_LAYOUT3(Mul, MulN) + +// Call and CallLong must agree on the first 2 parameters. +ASSERT_EQUAL_LAYOUT2(Call, CallLong) +ASSERT_EQUAL_LAYOUT2(Construct, ConstructLong) + +#undef DEFINE_JUMP_1 +#undef DEFINE_JUMP_2 +#undef DEFINE_JUMP_3 + +// Undefine all macros used to avoid confusing next include. +#undef DEFINE_OPERAND_TYPE +#undef DEFINE_OPCODE_0 +#undef DEFINE_OPCODE_1 +#undef DEFINE_OPCODE_2 +#undef DEFINE_OPCODE_3 +#undef DEFINE_OPCODE_4 +#undef DEFINE_OPCODE_5 +#undef DEFINE_OPCODE_6 +#undef DEFINE_OPCODE +#undef DEFINE_JUMP_LONG_VARIANT +#undef DEFINE_RET_TARGET +#undef ASSERT_EQUAL_LAYOUT1 +#undef ASSERT_EQUAL_LAYOUT2 +#undef ASSERT_EQUAL_LAYOUT3 +#undef ASSERT_EQUAL_LAYOUT4 +#undef ASSERT_MONOTONE_INCREASING +#undef OPERAND_BIGINT_ID +#undef OPERAND_FUNCTION_ID +#undef OPERAND_STRING_ID diff --git a/hbctool/hbc/hbc94/raw/SerializedLiteralGenerator.h b/hbctool/hbc/hbc94/raw/SerializedLiteralGenerator.h new file mode 100644 index 0000000..7b02e12 --- /dev/null +++ b/hbctool/hbc/hbc94/raw/SerializedLiteralGenerator.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#ifndef HERMES_BCGEN_HBC_SERIALIZEDLITERALGENERATOR_H +#define HERMES_BCGEN_HBC_SERIALIZEDLITERALGENERATOR_H + +#include "llvh/ADT/ArrayRef.h" + +#include + +namespace hermes { +class Literal; + +namespace hbc { + +class BytecodeModuleGenerator; + +/// Generator of literal value buffers for object/array literals. +/// SerializedLiteralParser.h is responsible to decode the serialized literal +/// buffer. +/// +/// The format is designed to represent sequences of similarly typed Literals in +/// as small a representation as possible; it is therfore both variable length +/// and has a variable tag length. +/// +/// If the sequence of similarly typed Literals is shorter than 16 Literals, the +/// tag is as follows: +/// 0 ttt llll +/// | | | +/// 7 6 3 0 +/// +/// t: Type of the sequence (Tags are defined in the class) +/// l: Length of the sequence +/// +/// If the sequence of similarly typed Literals is longer than 15 Literals, the +/// tag is as follows: +/// 1 ttt llll,llllllll +/// | | | +/// 15 14 11 0 +/// +/// t: Type of the sequence (Tags are defined in the class) +/// l: Length of the sequence +/// +/// Following the tags are the values of the sequences. +/// Note that `true`, `false`, and `null` don't need extra bytes, and are +/// therefore generated just from reading the tag length and type. +/// Short strings (indices into the string table smaller than 2^16) take up two +/// bytes, doubles take up eight bytes, and integers and long strings take up +/// four bytes. All values are serialized in little-endian format. +class SerializedLiteralGenerator { + private: + /// The bytecode module generator. + BytecodeModuleGenerator &BMGen_; + + public: + SerializedLiteralGenerator(BytecodeModuleGenerator &BMGen) : BMGen_(BMGen) {} + + using TagType = unsigned char; + + // Since undefined is very rarely used in literals in practice, replacing + // it with a single byte string tag allows us to save even more space. + static constexpr TagType NullTag = 0; + static constexpr TagType TrueTag = 1 << 4; + static constexpr TagType FalseTag = 2 << 4; + static constexpr TagType NumberTag = 3 << 4; + static constexpr TagType LongStringTag = 4 << 4; + static constexpr TagType ShortStringTag = 5 << 4; + static constexpr TagType ByteStringTag = 6 << 4; + static constexpr TagType IntegerTag = 7 << 4; + static constexpr TagType TagMask = 0x70; + + static constexpr unsigned SequenceMax = (1 << 12) - 1; + + /// Serialize input \p literals and append into \p buff. + /// \p isKeyBuffer: whether this is generating object literal key buffer or + /// not. + void serializeBuffer( + llvh::ArrayRef literals, + std::vector &buff, + bool isKeyBuffer); +}; + +} // namespace hbc +} // namespace hermes + +#endif // HERMES_BCGEN_HBC_SERIALIZEDLITERALGENERATOR_H diff --git a/hbctool/hbc/hbc94/tool/opcode_generator.py b/hbctool/hbc/hbc94/tool/opcode_generator.py new file mode 100644 index 0000000..7f1bffe --- /dev/null +++ b/hbctool/hbc/hbc94/tool/opcode_generator.py @@ -0,0 +1,92 @@ +import pathlib +import re +import json + +basepath = pathlib.Path(__file__).parent.absolute() + +# Open file +bytecodeListFile = open(f"{basepath}/../raw/BytecodeList.def", "r") +lines = bytecodeListFile.readlines() +bytecodeListFile.close() + +# Init constants +jmp_operand = { + "1": ["Addr8"], + "1Long": ["Addr32"], + "2": ["Addr8", "Reg8"], + "2Long": ["Addr32", "Reg8"], + "3": ["Addr8", "Reg8", "Reg8"], + "3Long": ["Addr32", "Reg8", "Reg8"] +} + +# Init variables +json_op = {} +opcode = 0 +line_num = 0 + +def addOp(name, operands): + global opcode + global json_op + + print(hex(opcode)[2:], name) + json_op[name] = operands + opcode = opcode + 1 + pass + +# Read each line +for line in lines: + line_num = line_num + 1 + + # Example: DEFINE_OPCODE_4(NewArrayWithBuffer, Reg8, UInt16, UInt16, UInt16) + # name = "NewArrayWithBuffer" + # operands = ["Reg8", "UInt16", "UInt16", "UInt16"] + if line.startswith("DEFINE_OPCODE_"): + match = re.search(r'\((\w+)((, \w+)*)\)', line) + name = match.group(1) + operands = match.group(2).split(', ')[1:] + addOp(name, operands) + + elif line.startswith("OPERAND_STRING_ID"): + match = re.search(r'\((\w+), (\w+)\)', line) + name = match.group(1) + operandID = int(match.group(2)) - 1 # Starts from 0 + + # Handle name not found / arg not found + assert json_op[name], f"Opcode not found ({name})" + assert json_op[name][operandID], f"Operand not found ({operandID})" + + # Add ":S" to argument arg of json_op[name] + json_op[name][operandID] += ":S" + + elif line.startswith("DEFINE_JUMP_"): + match = re.search(r'(\d)\((\w+)\)', line) + num_op = match.group(1) + name = match.group(2) + + addOp(name, jmp_operand[f"{num_op}"]) + addOp(f"{name}Long", jmp_operand[f"{num_op}Long"]) + + # NOTE : Skip all assert + elif line.startswith("ASSERT_"): + pass + + # NOTE : Skip this + elif line.startswith("DEFINE_RET_TARGET"): + pass + + # NOTE : Also skip this + elif line.startswith("DEFINE_OPERAND_TYPE"): + pass + + # NOTE : Skip all comments + elif line.startswith("#") or line.startswith("//") or line.startswith("/*") or line.startswith(" *") or line.startswith("#") or line.startswith(" ") or line.startswith("\n"): + pass + + else: + # Unhandled cases + print(line_num, line) + +# Write json to file +f = open(f"{basepath}/../data/opcode.json", "w") +json.dump(json_op, f, indent=4) +f.close() diff --git a/hbctool/hbc/hbc94/translator.py b/hbctool/hbc/hbc94/translator.py new file mode 100644 index 0000000..5d0732f --- /dev/null +++ b/hbctool/hbc/hbc94/translator.py @@ -0,0 +1,62 @@ +import pathlib +import json +from hbctool.util import * + +basepath = pathlib.Path(__file__).parent.absolute() + +operand_type = { + "Reg8": (1, to_uint8, from_uint8), + "Reg32": (4, to_uint32, from_uint32), + "UInt8": (1, to_uint8, from_uint8), + "UInt16": (2, to_uint16, from_uint16), + "UInt32": (4, to_uint32, from_uint32), + "Addr8": (1, to_int8, from_int8), + "Addr32": (4, to_int32, from_int32), + "Reg32": (4, to_uint32, from_uint32), + "Imm32": (4, to_uint32, from_uint32), + "Double": (8, to_double, from_double) +} + +f = open(f"{basepath}/data/opcode.json", "r") +opcode_operand = json.load(f) +opcode_mapper = list(opcode_operand.keys()) +opcode_mapper_inv = {} +for i, v in enumerate(opcode_mapper): + opcode_mapper_inv[v] = i + +f.close() + +def disassemble(bc): + i = 0 + insts = [] + while i < len(bc): + opcode = opcode_mapper[bc[i]] + i+=1 + inst = (opcode, []) + operand_ts = opcode_operand[opcode] + for oper_t in operand_ts: + is_str = oper_t.endswith(":S") + if is_str: + oper_t = oper_t[:-2] + + size, conv_to, _ = operand_type[oper_t] + val = conv_to(bc[i:i+size]) + inst[1].append((oper_t, is_str, val)) + i+=size + + insts.append(inst) + + return insts + +def assemble(insts): + bc = [] + for opcode, operands in insts: + op = opcode_mapper_inv[opcode] + bc.append(op) + assert len(opcode_operand[opcode]) == len(operands), f"Malicious instruction: {op}, {operands}" + for oper_t, _, val in operands: + assert oper_t in operand_type, f"Malicious operand type: {oper_t}" + _, _, conv_from = operand_type[oper_t] + bc += conv_from(val) + + return bc