diff --git a/.github/script/amd64-20.04.Dockerfile b/.github/script/amd64-20.04.Dockerfile index 40d980e5e..1ec89ebd7 100644 --- a/.github/script/amd64-20.04.Dockerfile +++ b/.github/script/amd64-20.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= -DCMAKE_CXX_FLAGS="-mavx2" .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client diff --git a/.github/script/amd64-22.04.Dockerfile b/.github/script/amd64-22.04.Dockerfile index 44c9c40b7..6134d1673 100644 --- a/.github/script/amd64-22.04.Dockerfile +++ b/.github/script/amd64-22.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= -DCMAKE_CXX_FLAGS="-mavx2" .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client diff --git a/.github/script/arm64-20.04.Dockerfile b/.github/script/arm64-20.04.Dockerfile index 1f57dc401..5e3505345 100644 --- a/.github/script/arm64-20.04.Dockerfile +++ b/.github/script/arm64-20.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id dht-server lite-client diff --git a/.github/script/arm64-22.04.Dockerfile b/.github/script/arm64-22.04.Dockerfile index 2b595839f..f9805849d 100644 --- a/.github/script/arm64-22.04.Dockerfile +++ b/.github/script/arm64-22.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id dht-server lite-client diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 263bd9a43..ca08357c2 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -167,6 +167,14 @@ jobs: asset_name: func.exe tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Windows 2019 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-win-binaries/tolk.exe + asset_name: tolk.exe + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Windows 2019 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: @@ -257,6 +265,14 @@ jobs: asset_name: func-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-macos-binaries/tolk + asset_name: tolk-mac-x86-64 + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac x86-64 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: @@ -348,6 +364,14 @@ jobs: asset_name: func-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac arm64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-macos-binaries/tolk + asset_name: tolk-mac-arm64 + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac arm64 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: @@ -438,6 +462,14 @@ jobs: asset_name: func-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Linux x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-linux-binaries/tolk + asset_name: tolk-linux-x86_64 + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Linux x86-64 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: diff --git a/.github/workflows/ton-arm64-macos.yml b/.github/workflows/ton-arm64-macos.yml index 9e8302e80..affe22456 100644 --- a/.github/workflows/ton-arm64-macos.yml +++ b/.github/workflows/ton-arm64-macos.yml @@ -29,6 +29,7 @@ jobs: artifacts/lite-client -V artifacts/fift -V artifacts/func -V + artifacts/tolk -v - name: Upload artifacts uses: actions/upload-artifact@master diff --git a/.github/workflows/ton-x86-64-linux.yml b/.github/workflows/ton-x86-64-linux.yml index abbe1cca4..b7ef9684e 100644 --- a/.github/workflows/ton-x86-64-linux.yml +++ b/.github/workflows/ton-x86-64-linux.yml @@ -33,6 +33,7 @@ jobs: artifacts/lite-client -V artifacts/fift -V artifacts/func -V + artifacts/tolk -v - name: Upload artifacts uses: actions/upload-artifact@master diff --git a/.github/workflows/ton-x86-64-macos.yml b/.github/workflows/ton-x86-64-macos.yml index 8c71f34a1..1890dc344 100644 --- a/.github/workflows/ton-x86-64-macos.yml +++ b/.github/workflows/ton-x86-64-macos.yml @@ -29,6 +29,7 @@ jobs: artifacts/lite-client -V artifacts/fift -V artifacts/func -V + artifacts/tolk -v - name: Upload artifacts uses: actions/upload-artifact@master diff --git a/.gitignore b/.gitignore index 536918ab3..9b94834b8 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ test/regression-tests.cache/ **/*build*/ .idea .vscode +.DS_Store +dev/ zlib/ libsodium/ libmicrohttpd-0.9.77-w32-bin/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 573bc3a32..7d5b8da5c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -413,6 +413,7 @@ add_subdirectory(adnl) add_subdirectory(crypto) add_subdirectory(lite-client) add_subdirectory(emulator) +add_subdirectory(tolk) #BEGIN tonlib add_subdirectory(tonlib) @@ -626,6 +627,30 @@ if (NOT NIX) endif() endif() +# Tolk tests +if (NOT NIX) + if (MSVC) + set(PYTHON_VER "python") + else() + set(PYTHON_VER "python3") + endif() + add_test( + NAME test-tolk + COMMAND ${PYTHON_VER} tolk-tester.py tests/ + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tolk-tester) + if (WIN32) + set_property(TEST test-tolk PROPERTY ENVIRONMENT + "TOLK_EXECUTABLE=${CMAKE_CURRENT_BINARY_DIR}/tolk/tolk.exe" + "FIFT_EXECUTABLE=${CMAKE_CURRENT_BINARY_DIR}/crypto/fift.exe" + "FIFTPATH=${CMAKE_CURRENT_SOURCE_DIR}/crypto/fift/lib/") + else() + set_property(TEST test-tolk PROPERTY ENVIRONMENT + "TOLK_EXECUTABLE=${CMAKE_CURRENT_BINARY_DIR}/tolk/tolk" + "FIFT_EXECUTABLE=${CMAKE_CURRENT_BINARY_DIR}/crypto/fift" + "FIFTPATH=${CMAKE_CURRENT_SOURCE_DIR}/crypto/fift/lib/") + endif() +endif() + #BEGIN internal if (NOT TON_ONLY_TONLIB) add_test(test-adnl test-adnl) diff --git a/assembly/native/build-macos-portable.sh b/assembly/native/build-macos-portable.sh index 0e1003b56..af82b2c01 100644 --- a/assembly/native/build-macos-portable.sh +++ b/assembly/native/build-macos-portable.sh @@ -153,7 +153,7 @@ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator \ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont \ @@ -162,7 +162,7 @@ if [ "$with_tests" = true ]; then test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } @@ -173,6 +173,7 @@ strip -s storage/storage-daemon/storage-daemon-cli strip -s blockchain-explorer/blockchain-explorer strip -s crypto/fift strip -s crypto/func +strip -s tolk/tolk strip -s crypto/create-state strip -s crypto/tlbc strip -s validator-engine-console/validator-engine-console @@ -197,6 +198,7 @@ if [ "$with_artifacts" = true ]; then cp build/blockchain-explorer/blockchain-explorer artifacts/ cp build/crypto/fift artifacts/ cp build/crypto/func artifacts/ + cp build/tolk/tolk artifacts/ cp build/crypto/create-state artifacts/ cp build/crypto/tlbc artifacts/ cp build/validator-engine-console/validator-engine-console artifacts/ diff --git a/assembly/native/build-macos-shared.sh b/assembly/native/build-macos-shared.sh index 7fdcfb941..8a7399aa9 100644 --- a/assembly/native/build-macos-shared.sh +++ b/assembly/native/build-macos-shared.sh @@ -81,7 +81,7 @@ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator \ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont \ @@ -90,7 +90,7 @@ if [ "$with_tests" = true ]; then test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } @@ -102,6 +102,7 @@ strip -s storage/storage-daemon/storage-daemon-cli strip -s blockchain-explorer/blockchain-explorer strip -s crypto/fift strip -s crypto/func +strip -s tolk/tolk strip -s crypto/create-state strip -s crypto/tlbc strip -s validator-engine-console/validator-engine-console @@ -126,6 +127,7 @@ if [ "$with_artifacts" = true ]; then cp build/blockchain-explorer/blockchain-explorer artifacts/ cp build/crypto/fift artifacts/ cp build/crypto/func artifacts/ + cp build/tolk/tolk artifacts/ cp build/crypto/create-state artifacts/ cp build/crypto/tlbc artifacts/ cp build/validator-engine-console/validator-engine-console artifacts/ diff --git a/assembly/native/build-ubuntu-portable.sh b/assembly/native/build-ubuntu-portable.sh index 73ae59264..8ae977e0b 100644 --- a/assembly/native/build-ubuntu-portable.sh +++ b/assembly/native/build-ubuntu-portable.sh @@ -144,7 +144,7 @@ cmake -GNinja .. \ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator test-ed25519 test-ed25519-crypto test-bigint \ @@ -153,7 +153,7 @@ ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ test-fec test-tddb test-db test-validator-session-state test-emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator @@ -166,6 +166,7 @@ strip -s storage/storage-daemon/storage-daemon \ crypto/fift \ crypto/tlbc \ crypto/func \ + tolk/tolk \ crypto/create-state \ validator-engine-console/validator-engine-console \ tonlib/tonlib-cli \ @@ -195,7 +196,7 @@ if [ "$with_artifacts" = true ]; then mkdir artifacts mv build/tonlib/libtonlibjson.so.0.5 build/tonlib/libtonlibjson.so cp build/storage/storage-daemon/storage-daemon build/storage/storage-daemon/storage-daemon-cli \ - build/crypto/fift build/crypto/tlbc build/crypto/func build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ + build/crypto/fift build/crypto/tlbc build/crypto/func build/tolk/tolk build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ build/validator-engine-console/validator-engine-console build/tonlib/tonlib-cli \ build/tonlib/libtonlibjson.so build/http/http-proxy build/rldp-http-proxy/rldp-http-proxy \ build/dht-server/dht-server build/lite-client/lite-client build/validator-engine/validator-engine \ diff --git a/assembly/native/build-ubuntu-shared.sh b/assembly/native/build-ubuntu-shared.sh index 00b9aa9b4..6b1841cdf 100644 --- a/assembly/native/build-ubuntu-shared.sh +++ b/assembly/native/build-ubuntu-shared.sh @@ -52,7 +52,7 @@ cmake -GNinja -DTON_USE_JEMALLOC=ON .. \ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator test-ed25519 test-ed25519-crypto test-bigint \ @@ -61,7 +61,7 @@ ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ test-fec test-tddb test-db test-validator-session-state test-emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator @@ -74,6 +74,7 @@ strip -s storage/storage-daemon/storage-daemon \ crypto/fift \ crypto/tlbc \ crypto/func \ + tolk/tolk \ crypto/create-state \ validator-engine-console/validator-engine-console \ tonlib/tonlib-cli \ @@ -105,7 +106,7 @@ if [ "$with_artifacts" = true ]; then mkdir artifacts mv build/tonlib/libtonlibjson.so.0.5 build/tonlib/libtonlibjson.so cp build/storage/storage-daemon/storage-daemon build/storage/storage-daemon/storage-daemon-cli \ - build/crypto/fift build/crypto/tlbc build/crypto/func build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ + build/crypto/fift build/crypto/tlbc build/crypto/func build/tolk/tolk build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ build/validator-engine-console/validator-engine-console build/tonlib/tonlib-cli \ build/tonlib/libtonlibjson.so build/http/http-proxy build/rldp-http-proxy/rldp-http-proxy \ build/dht-server/dht-server build/lite-client/lite-client build/validator-engine/validator-engine \ diff --git a/assembly/native/build-windows-2019.bat b/assembly/native/build-windows-2019.bat index f728b88f8..fdfb6bcf6 100644 --- a/assembly/native/build-windows-2019.bat +++ b/assembly/native/build-windows-2019.bat @@ -155,7 +155,7 @@ IF %errorlevel% NEQ 0 ( ) IF "%1"=="-t" ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator ^ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont test-net ^ @@ -166,7 +166,7 @@ IF %errorlevel% NEQ 0 ( exit /b %errorlevel% ) ) else ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator IF %errorlevel% NEQ 0 ( @@ -204,6 +204,7 @@ build\blockchain-explorer\blockchain-explorer.exe ^ build\crypto\fift.exe ^ build\crypto\tlbc.exe ^ build\crypto\func.exe ^ +build\tolk\tolk.exe ^ build\crypto\create-state.exe ^ build\validator-engine-console\validator-engine-console.exe ^ build\tonlib\tonlib-cli.exe ^ diff --git a/assembly/native/build-windows.bat b/assembly/native/build-windows.bat index aa0fd69ad..e1ce9e473 100644 --- a/assembly/native/build-windows.bat +++ b/assembly/native/build-windows.bat @@ -156,7 +156,7 @@ IF %errorlevel% NEQ 0 ( ) IF "%1"=="-t" ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator ^ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont test-net ^ @@ -167,7 +167,7 @@ IF %errorlevel% NEQ 0 ( exit /b %errorlevel% ) ) else ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator IF %errorlevel% NEQ 0 ( @@ -205,6 +205,7 @@ build\blockchain-explorer\blockchain-explorer.exe ^ build\crypto\fift.exe ^ build\crypto\tlbc.exe ^ build\crypto\func.exe ^ +build\tolk\tolk.exe ^ build\crypto\create-state.exe ^ build\validator-engine-console\validator-engine-console.exe ^ build\tonlib\tonlib-cli.exe ^ diff --git a/assembly/nix/build-linux-arm64-nix.sh b/assembly/nix/build-linux-arm64-nix.sh index 2c7df521f..565b1d253 100644 --- a/assembly/nix/build-linux-arm64-nix.sh +++ b/assembly/nix/build-linux-arm64-nix.sh @@ -43,6 +43,7 @@ sudo strip -s storage-daemon \ fift \ tlbc \ func \ + tolk \ create-state \ validator-engine-console \ tonlib-cli \ diff --git a/assembly/nix/build-linux-x86-64-nix.sh b/assembly/nix/build-linux-x86-64-nix.sh index ae478ec2d..e6a3aef07 100644 --- a/assembly/nix/build-linux-x86-64-nix.sh +++ b/assembly/nix/build-linux-x86-64-nix.sh @@ -43,6 +43,7 @@ sudo strip -s storage-daemon \ fift \ tlbc \ func \ + tolk \ create-state \ validator-engine-console \ tonlib-cli \ diff --git a/assembly/nix/build-macos-nix.sh b/assembly/nix/build-macos-nix.sh index c92eddb28..0ada59a4a 100644 --- a/assembly/nix/build-macos-nix.sh +++ b/assembly/nix/build-macos-nix.sh @@ -43,6 +43,7 @@ sudo strip -xSX storage-daemon \ fift \ tlbc \ func \ + tolk \ create-state \ validator-engine-console \ tonlib-cli \ diff --git a/crypto/fift/lib/Asm.fif b/crypto/fift/lib/Asm.fif index 92ceab6db..964db4417 100644 --- a/crypto/fift/lib/Asm.fif +++ b/crypto/fift/lib/Asm.fif @@ -1589,6 +1589,9 @@ forget @proclist forget @proccnt { }END> b> } : }END>c { }END>c s +// This is the way how FunC assigns method_id for reserved functions. +// Note, that Tolk entrypoints have other names (`onInternalMessage`, etc.), +// but method_id is assigned not by Fift, but by Tolk code generation. 0 constant recv_internal -1 constant recv_external -2 constant run_ticktock diff --git a/crypto/fift/utils.cpp b/crypto/fift/utils.cpp index f37766a72..6057b2dc0 100644 --- a/crypto/fift/utils.cpp +++ b/crypto/fift/utils.cpp @@ -114,7 +114,7 @@ class MemoryFileLoader : public fift::FileLoader { std::map> files_; }; -td::Result create_source_lookup(std::string main, bool need_preamble = true, bool need_asm = true, +td::Result create_source_lookup(std::string&& main, bool need_preamble = true, bool need_asm = true, bool need_ton_util = true, bool need_lisp = true, bool need_w3_code = true, bool need_fift_ext = true, bool need_disasm = true, std::string dir = "") { @@ -189,7 +189,7 @@ td::Result run_fift(fift::SourceLookup source_lookup, std::o } // namespace td::Result mem_run_fift(std::string source, std::vector args, std::string fift_dir) { std::stringstream ss; - TRY_RESULT(source_lookup, create_source_lookup(source, true, true, true, true, true, true, true, fift_dir)); + TRY_RESULT(source_lookup, create_source_lookup(std::move(source), true, true, true, true, true, true, true, fift_dir)); TRY_RESULT_ASSIGN(source_lookup, run_fift(std::move(source_lookup), &ss, true, std::move(args))); FiftOutput res; res.source_lookup = std::move(source_lookup); @@ -207,19 +207,43 @@ td::Result mem_run_fift(SourceLookup source_lookup, std::vector create_mem_source_lookup(std::string main, std::string fift_dir, bool need_preamble, bool need_asm, bool need_ton_util, bool need_lisp, bool need_w3_code) { - return create_source_lookup(main, need_preamble, need_asm, need_ton_util, need_lisp, need_w3_code, false, false, + return create_source_lookup(std::move(main), need_preamble, need_asm, need_ton_util, need_lisp, need_w3_code, false, false, fift_dir); } -td::Result> compile_asm(td::Slice asm_code, std::string fift_dir, bool is_raw) { +td::Result> compile_asm(td::Slice asm_code) { std::stringstream ss; - TRY_RESULT(source_lookup, - create_source_lookup(PSTRING() << "\"Asm.fif\" include\n " << (is_raw ? "<{" : "") << asm_code << "\n" - << (is_raw ? "}>c" : "") << " boc>B \"res\" B>file", - true, true, true, false, false, false, false, fift_dir)); + std::string sb; + sb.reserve(asm_code.size() + 100); + sb.append("\"Asm.fif\" include\n <{\n"); + sb.append(asm_code.data(), asm_code.size()); + sb.append("\n}>c boc>B \"res\" B>file"); + + TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false)); TRY_RESULT(res, run_fift(std::move(source_lookup), &ss)); TRY_RESULT(boc, res.read_file("res")); return vm::std_boc_deserialize(std::move(boc.data)); } +td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir) { + std::string main_fif; + main_fif.reserve(program_code.size() + 100); + main_fif.append(program_code.data(), program_code.size()); + main_fif.append(R"( dup hashB B>X $>B "hex" B>file)"); // write codeHashHex to a file + main_fif.append(R"( boc>B B>base64 $>B "boc" B>file)"); // write codeBoc64 to a file + + std::stringstream fift_output_stream; + TRY_RESULT(source_lookup, create_source_lookup(std::move(main_fif), true, true, false, false, false, false, false, fift_dir)); + TRY_RESULT(res, run_fift(std::move(source_lookup), &fift_output_stream)); + + TRY_RESULT(boc, res.read_file("boc")); + TRY_RESULT(hex, res.read_file("hex")); + + return CompiledProgramOutput{ + std::move(program_code), + std::move(boc.data), + std::move(hex.data), + }; +} + } // namespace fift diff --git a/crypto/fift/utils.h b/crypto/fift/utils.h index dd434fe01..fab92c542 100644 --- a/crypto/fift/utils.h +++ b/crypto/fift/utils.h @@ -26,11 +26,21 @@ struct FiftOutput { SourceLookup source_lookup; std::string output; }; + +// given a valid Fift code PROGRAM{ ... }END>c, compile_asm_program() returns this output +// now it's used primarily for wasm output (see tolk-js, for example) +struct CompiledProgramOutput { + std::string fiftCode; + std::string codeBoc64; + std::string codeHashHex; +}; + td::Result create_mem_source_lookup(std::string main, std::string fift_dir = "", bool need_preamble = true, bool need_asm = true, bool need_ton_util = true, bool need_lisp = true, bool need_w3_code = true); td::Result mem_run_fift(std::string source, std::vector args = {}, std::string fift_dir = ""); td::Result mem_run_fift(SourceLookup source_lookup, std::vector args); -td::Result> compile_asm(td::Slice asm_code, std::string fift_dir = "", bool is_raw = true); +td::Result> compile_asm(td::Slice asm_code); +td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir); } // namespace fift diff --git a/crypto/funcfiftlib/funcfiftlib.cpp b/crypto/funcfiftlib/funcfiftlib.cpp index 0bef9eac7..403c075dd 100644 --- a/crypto/funcfiftlib/funcfiftlib.cpp +++ b/crypto/funcfiftlib/funcfiftlib.cpp @@ -37,10 +37,10 @@ td::Result compile_internal(char *config_json) { TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) - auto &obj = input_json.get_object(); + td::JsonObject& config = input_json.get_object(); - TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false)); - TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false)); + TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optLevel", false)); + TRY_RESULT(sources_obj, td::get_json_object_field(config, "sources", td::JsonValue::Type::Array, false)); auto &sources_arr = sources_obj.get_array(); @@ -52,29 +52,25 @@ td::Result compile_internal(char *config_json) { funC::opt_level = std::max(0, opt_level); funC::program_envelope = true; + funC::asm_preamble = true; funC::verbosity = 0; funC::indent = 1; std::ostringstream outs, errs; - auto compile_res = funC::func_proceed(sources, outs, errs); - - if (compile_res != 0) { - return td::Status::Error(std::string("Func compilation error: ") + errs.str()); + int funC_res = funC::func_proceed(sources, outs, errs); + if (funC_res != 0) { + return td::Status::Error("FunC compilation error: " + errs.str()); } - TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false)); - TRY_RESULT(boc, vm::std_boc_serialize(code_cell)); + TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/")); td::JsonBuilder result_json; - auto result_obj = result_json.enter_object(); - result_obj("status", "ok"); - result_obj("codeBoc", td::base64_encode(boc)); - result_obj("fiftCode", outs.str()); - result_obj("codeHashHex", code_cell->get_hash().to_hex()); - result_obj.leave(); - - outs.clear(); - errs.clear(); + auto obj = result_json.enter_object(); + obj("status", "ok"); + obj("fiftCode", std::move(fift_res.fiftCode)); + obj("codeBoc", std::move(fift_res.codeBoc64)); + obj("codeHashHex", std::move(fift_res.codeHashHex)); + obj.leave(); return result_json.string_builder().as_cslice().str(); } diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk new file mode 100644 index 000000000..dec12e233 --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -0,0 +1,747 @@ +// Standard library for Tolk (LGPL licence). +// It contains common functions that are available out of the box, the user doesn't have to import anything. +// More specific functions are required to be imported explicitly, like "@stdlib/tvm-dicts". +tolk 0.6 + +/** + Tuple manipulation primitives. + Elements of a tuple can be of arbitrary type. + Note that atomic type `tuple` can't be cast to composite tuple type (e.g. `[int, cell]`) and vise versa. + */ + +/// Creates a tuple with zero elements. +@pure +fun createEmptyTuple(): tuple + asm "NIL"; + +/// Appends a value to tuple, resulting in `Tuple t' = (x1, ..., xn, value)`. +/// If its size exceeds 255, throws a type check exception. +@pure +fun tuplePush(mutate self: tuple, value: X): void + asm "TPUSH"; + +/// Returns the first element of a non-empty tuple. +@pure +fun tupleFirst(t: tuple): X + asm "FIRST"; + +/// Returns the [`index`]-th element of a tuple. +@pure +fun tupleAt(t: tuple, index: int): X + builtin; + +/// Returns the size of a tuple (elements count in it). +@pure +fun tupleSize(t: tuple): int + asm "TLEN"; + +/// Returns the last element of a non-empty tuple. +@pure +fun tupleLast(t: tuple): int + asm "LAST"; + + +/** + Mathematical primitives. + */ + +/// Computes the minimum of two integers. +@pure +fun min(x: int, y: int): int + asm "MIN"; + +/// Computes the maximum of two integers. +@pure +fun max(x: int, y: int): int + asm "MAX"; + +/// Sorts two integers. +@pure +fun minMax(x: int, y: int): (int, int) + asm "MINMAX"; + +/// Computes the absolute value of an integer. +@pure +fun abs(x: int): int + asm "ABS"; + +/// Returns the sign of an integer: `-1` if x < 0, `0` if x == 0, `1` if x > 0. +@pure +fun sign(x: int): int + asm "SGN"; + +/// Computes the quotient and remainder of [x] / [y]. Example: divMod(112,3) = (37,1) +@pure +fun divMod(x: int, y: int): (int, int) + asm "DIVMOD"; + +/// Computes the remainder and quotient of [x] / [y]. Example: modDiv(112,3) = (1,37) +@pure +fun modDiv(x: int, y: int): (int, int) + asm(-> 1 0) "DIVMOD"; + +/// Computes multiple-then-divide: floor([x] * [y] / [z]). +/// The intermediate result is stored in a 513-bit integer to prevent precision loss. +@pure +fun mulDivFloor(x: int, y: int, z: int): int + builtin; + +/// Similar to `mulDivFloor`, but rounds the result: round([x] * [y] / [z]). +@pure +fun mulDivRound(x: int, y: int, z: int): int + builtin; + +/// Similar to `mulDivFloor`, but ceils the result: ceil([x] * [y] / [z]). +@pure +fun mulDivCeil(x: int, y: int, z: int): int + builtin; + +/// Computes the quotient and remainder of ([x] * [y] / [z]). Example: mulDivMod(112,3,10) = (33,6) +@pure +fun mulDivMod(x: int, y: int, z: int): (int, int) + builtin; + + +/** + Global getters of environment and contract state. + */ + +const MASTERCHAIN = -1; +const BASECHAIN = 0; + +/// Returns current Unix timestamp (in seconds). +@pure +fun now(): int + asm "NOW"; + +/// Returns the internal address of the current smart contract as a Slice with a `MsgAddressInt`. +/// If necessary, it can be parsed further using primitives such as [parseStandardAddress]. +@pure +fun getMyAddress(): slice + asm "MYADDR"; + +/// Returns the balance (in nanotoncoins) of the smart contract at the start of Computation Phase. +/// Note that RAW primitives such as [sendMessage] do not update this field. +@pure +fun getMyOriginalBalance(): int + asm "BALANCE" "FIRST"; + +/// Same as [getMyOriginalBalance], but returns a tuple: +/// `int` — balance in nanotoncoins; +/// `cell` — a dictionary with 32-bit keys representing the balance of "extra currencies". +@pure +fun getMyOriginalBalanceWithExtraCurrencies(): [int, cell] + asm "BALANCE"; + +/// Returns the logical time of the current transaction. +@pure +fun getLogicalTime(): int + asm "LTIME"; + +/// Returns the starting logical time of the current block. +@pure +fun getCurrentBlockLogicalTime(): int + asm "BLOCKLT"; + +/// Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. +@pure +fun getBlockchainConfigParam(x: int): cell + asm "CONFIGOPTPARAM"; + +/// Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. +@pure +fun getContractData(): cell + asm "c4 PUSH"; + +/// Sets `cell` [c] as persistent contract data. You can update persistent contract storage with this primitive. +fun setContractData(c: cell): void + asm "c4 POP"; + +/// Retrieves code of smart-contract from c7 +@pure +fun getContractCode(): cell + asm "MYCODE"; + +/// Creates an output action that would change this smart contract code to that given by cell [newCode]. +/// Notice that this change will take effect only after the successful termination of the current run of the smart contract. +fun setContractCodePostponed(newCode: cell): void + asm "SETCODE"; + +/// Commits the current state of registers `c4` (“persistent data”) and `c5` (“actions”) +/// so that the current execution is considered “successful” with the saved values even if an exception +/// in Computation Phase is thrown later. +fun commitContractDataAndActions(): void + asm "COMMIT"; + + +/** + Signature checks, hashing, cryptography. + */ + +/// Computes the representation hash of a `cell` [c] and returns it as a 256-bit unsigned integer `x`. +/// Useful for signing and checking signatures of arbitrary entities represented by a tree of cells. +@pure +fun cellHash(c: cell): int + asm "HASHCU"; + +/// Computes the hash of a `slice s` and returns it as a 256-bit unsigned integer `x`. +/// The result is the same as if an ordinary cell containing only data and references from `s` had been created +/// and its hash computed by [cellHash]. +@pure +fun sliceHash(s: slice): int + asm "HASHSU"; + +/// Computes sha256 of the data bits of `slice` [s]. If the bit length of `s` is not divisible by eight, +/// throws a cell underflow exception. The hash value is returned as a 256-bit unsigned integer `x`. +@pure +fun stringHash(s: slice): int + asm "SHA256U"; + +/// Checks the Ed25519-`signature` of a `hash` (a 256-bit unsigned integer, usually computed as the hash of some data) +/// using [publicKey] (also represented by a 256-bit unsigned integer). +/// The signature must contain at least 512 data bits; only the first 512 bits are used. +/// The result is `−1` if the signature is valid, `0` otherwise. +/// Note that `CHKSIGNU` creates a 256-bit slice with the hash and calls `CHKSIGNS`. +/// That is, if [hash] is computed as the hash of some data, these data are hashed twice, +/// the second hashing occurring inside `CHKSIGNS`. +@pure +fun isSignatureValid(hash: int, signature: slice, publicKey: int): int + asm "CHKSIGNU"; + +/// Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `publicKey`, +/// similarly to [isSignatureValid]. +/// If the bit length of [data] is not divisible by eight, throws a cell underflow exception. +/// The verification of Ed25519 signatures is the standard one, +/// with sha256 used to reduce [data] to the 256-bit number that is actually signed. +@pure +fun isSliceSignatureValid(data: slice, signature: slice, publicKey: int): int + asm "CHKSIGNS"; + +/// Generates a new pseudo-random unsigned 256-bit integer x. +fun random(): int + asm "RANDU256"; + +/// Generates a new pseudo-random integer z in the range 0..range−1 (or range..−1, if range < 0). +/// More precisely, an unsigned random value x is generated as in random; then z := x * range / 2^256 is computed. +fun randomRange(range: int): int + asm "RAND"; + +/// Returns the current random seed as an unsigned 256-bit integer. +@pure +fun randomGetSeed(): int + asm "RANDSEED"; + +/// Sets the random seed to unsigned 256-bit seed. +fun randomSetSeed(seed: int): void + asm "SETRAND"; + +/// Initializes (mixes) random seed with unsigned 256-bit integer x. +fun randomizeBy(x: int): void + asm "ADDRAND"; + +/// Initializes random seed using current time. Don't forget to call this before calling `random`! +fun randomizeByLogicalTime(): void + asm "LTIME" "ADDRAND"; + + +/** + Size computation primitives. + They may be useful for computing storage fees of user-provided data. + */ + +/// Returns `(x, y, z, -1)` or `(null, null, null, 0)`. +/// Recursively computes the count of distinct cells `x`, data bits `y`, and cell references `z` +/// in the DAG rooted at `cell` [c], effectively returning the total storage used by this DAG taking into account +/// the identification of equal cells. +/// The values of `x`, `y`, and `z` are computed by a depth-first traversal of this DAG, +/// with a hash table of visited cell hashes used to prevent visits of already-visited cells. +/// The total count of visited cells `x` cannot exceed non-negative [maxCells]; +/// otherwise the computation is aborted before visiting the `(maxCells + 1)`-st cell and +/// a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. +@pure +fun calculateCellSize(c: cell, maxCells: int): (int, int, int, int) + asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +/// Similar to [calculateCellSize], but accepting a `slice` [s] instead of a `cell`. +/// The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; +/// however, the data bits and the cell references of [s] are accounted for in `y` and `z`. +@pure +fun calculateSliceSize(s: slice, maxCells: int): (int, int, int, int) + asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +/// A non-quiet version of [calculateCellSize] that throws a cell overflow exception (`8`) on failure. +fun calculateCellSizeStrict(c: cell, maxCells: int): (int, int, int) + asm "CDATASIZE"; + +/// A non-quiet version of [calculateSliceSize] that throws a cell overflow exception (`8`) on failure. +fun calculateSliceSizeStrict(s: slice, maxCells: int): (int, int, int) + asm "SDATASIZE"; + +/// Returns the depth of `cell` [c]. +/// If [c] has no references, then return `0`; +/// otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. +/// If [c] is a `null` instead of a cell, returns zero. +@pure +fun getCellDepth(c: cell): int + asm "CDEPTH"; + +/// Returns the depth of `slice` [s]. +/// If [s] has no references, then returns `0`; +/// otherwise the returned value is one plus the maximum of depths of cells referred to from [s]. +@pure +fun getSliceDepth(s: slice): int + asm "SDEPTH"; + +/// Returns the depth of `builder` [b]. +/// If no cell references are stored in [b], then returns 0; +/// otherwise the returned value is one plus the maximum of depths of cells referred to from [b]. +@pure +fun getBuilderDepth(b: builder): int + asm "BDEPTH"; + + +/** + Debug primitives. + Only works for local TVM execution with debug level verbosity. + */ + +/// Dump a variable [x] to the debug log. +fun debugPrint(x: X): void + builtin; + +/// Dump a string [x] to the debug log. +fun debugPrintString(x: X): void + builtin; + +/// Dumps the stack (at most the top 255 values) and shows the total stack depth. +fun debugDumpStack(): void + builtin; + + +/** + Slice primitives: parsing cells. + When you _load_ some data, you mutate the slice (shifting an internal pointer on the stack). + When you _preload_ some data, you just get the result without mutating the slice. + */ + +/// Converts a `cell` [c] into a `slice`. Notice that [c] must be either an ordinary cell, +/// or an exotic cell (see [TVM.pdf](https://ton-blockchain.github.io/docs/tvm.pdf), 3.1.2) +/// which is automatically loaded to yield an ordinary cell `c'`, converted into a `slice` afterwards. +@pure +fun beginParse(c: cell): slice + asm "CTOS"; + +/// Checks if slice is empty. If not, throws an exception. +fun assertEndOfSlice(self: slice): void + asm "ENDS"; + +/// Loads the next reference from the slice. +@pure +fun loadRef(mutate self: slice): cell + asm( -> 1 0) "LDREF"; + +/// Preloads the next reference from the slice. +@pure +fun preloadRef(self: slice): cell + asm "PLDREF"; + +/// Loads a signed [len]-bit integer from a slice. +@pure +fun loadInt(mutate self: slice, len: int): int + builtin; + +/// Loads an unsigned [len]-bit integer from a slice. +@pure +fun loadUint(mutate self: slice, len: int): int + builtin; + +/// Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice `s''`. +@pure +fun loadBits(mutate self: slice, len: int): slice + builtin; + +/// Preloads a signed [len]-bit integer from a slice. +@pure +fun preloadInt(self: slice, len: int): int + builtin; + +/// Preloads an unsigned [len]-bit integer from a slice. +@pure +fun preloadUint(self: slice, len: int): int + builtin; + +/// Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice. +@pure +fun preloadBits(self: slice, len: int): slice + builtin; + +/// Loads serialized amount of Toncoins (any unsigned integer up to `2^120 - 1`). +@pure +fun loadCoins(mutate self: slice): int + asm( -> 1 0) "LDGRAMS"; + +/// Loads bool (-1 or 0) from a slice +@pure +fun loadBool(mutate self: slice): int + asm( -> 1 0) "1 LDI"; + +/// Shifts a slice pointer to [len] bits forward, mutating the slice. +@pure +fun skipBits(mutate self: slice, len: int): self + asm "SDSKIPFIRST"; + +/// Returns the first `0 ≤ len ≤ 1023` bits of a slice. +@pure +fun getFirstBits(self: slice, len: int): slice + asm "SDCUTFIRST"; + +/// Returns all but the last `0 ≤ len ≤ 1023` bits of a slice. +@pure +fun removeLastBits(mutate self: slice, len: int): self + asm "SDSKIPLAST"; + +/// Returns the last `0 ≤ len ≤ 1023` bits of a slice. +@pure +fun getLastBits(self: slice, len: int): slice + asm "SDCUTLAST"; + +/// Loads a dictionary (TL HashMapE structure, represented as TVM cell) from a slice. +/// Returns `null` if `nothing` constructor is used. +@pure +fun loadDict(mutate self: slice): cell + asm( -> 1 0) "LDDICT"; + +/// Preloads a dictionary (cell) from a slice. +@pure +fun preloadDict(self: slice): cell + asm "PLDDICT"; + +/// Loads a dictionary as [loadDict], but returns only the remainder of the slice. +@pure +fun skipDict(mutate self: slice): self + asm "SKIPDICT"; + +/// Loads (Maybe ^Cell) from a slice. +/// In other words, loads 1 bit: if it's true, loads the first ref, otherwise returns `null`. +@pure +fun loadMaybeRef(mutate self: slice): cell + asm( -> 1 0) "LDOPTREF"; + +/// Preloads (Maybe ^Cell) from a slice. +@pure +fun preloadMaybeRef(self: slice): cell + asm "PLDOPTREF"; + +/// Loads (Maybe ^Cell), but returns only the remainder of the slice. +@pure +fun skipMaybeRef(mutate self: slice): self + asm "SKIPOPTREF"; + +/** + Builder primitives: constructing cells. + When you _store_ some data, you mutate the builder (shifting an internal pointer on the stack). + All the primitives below first check whether there is enough space in the `builder`, + and only then check the range of the value being serialized. + */ + +/// Creates a new empty builder. +@pure +fun beginCell(): builder + asm "NEWC"; + +/// Converts a builder into an ordinary `cell`. +@pure +fun endCell(self: builder): cell + asm "ENDC"; + +/// Stores a reference to a cell into a builder. +@pure +fun storeRef(mutate self: builder, c: cell): self + asm(c self) "STREF"; + +/// Stores a signed [len]-bit integer into a builder (`0 ≤ len ≤ 257`). +@pure +fun storeInt(mutate self: builder, x: int, len: int): self + builtin; + +/// Stores an unsigned [len]-bit integer into a builder (`0 ≤ len ≤ 256`). +@pure +fun storeUint(mutate self: builder, x: int, len: int): self + builtin; + +/// Stores a slice into a builder. +@pure +fun storeSlice(mutate self: builder, s: slice): self + asm "STSLICER"; + +/// Stores amount of Toncoins into a builder. +@pure +fun storeCoins(mutate self: builder, x: int): self + asm "STGRAMS"; + +/// Stores bool (-1 or 0) into a builder. +/// Attention: true value is `-1`, not 1! If you pass `1` here, TVM will throw an exception. +@pure +fun storeBool(mutate self: builder, x: int): self + asm(x self) "1 STI"; + +/// Stores dictionary (represented by TVM `cell` or `null`) into a builder. +/// In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. +@pure +fun storeDict(mutate self: builder, c: cell): self + asm(c self) "STDICT"; + +/// Stores (Maybe ^Cell) into a builder. +/// In other words, if cell is `null`, store '0' bit; otherwise, store '1' and a ref to [c]. +@pure +fun storeMaybeRef(mutate self: builder, c: cell): self + asm(c self) "STOPTREF"; + +/// Concatenates two builders. +@pure +fun storeBuilder(mutate self: builder, from: builder): self + asm "STBR"; + +/// Stores a slice representing TL addr_none$00 (two `0` bits). +@pure +fun storeAddressNone(mutate self: builder): self + asm "b{00} STSLICECONST"; + + +/** + Slice size primitives. + */ + +/// Returns the number of references in a slice. +@pure +fun getRemainingRefsCount(self: slice): int + asm "SREFS"; + +/// Returns the number of data bits in a slice. +@pure +fun getRemainingBitsCount(self: slice): int + asm "SBITS"; + +/// Returns both the number of data bits and the number of references in a slice. +@pure +fun getRemainingBitsAndRefsCount(self: slice): (int, int) + asm "SBITREFS"; + +/// Checks whether a slice is empty (i.e., contains no bits of data and no cell references). +@pure +fun isEndOfSlice(self: slice): int + asm "SEMPTY"; + +/// Checks whether a slice has no bits of data. +@pure +fun isEndOfSliceBits(self: slice): int + asm "SDEMPTY"; + +/// Checks whether a slice has no references. +@pure +fun isEndOfSliceRefs(self: slice): int + asm "SREMPTY"; + +/// Checks whether data parts of two slices coinside. +@pure +fun isSliceBitsEqual(self: slice, b: slice): int + asm "SDEQ"; + +/// Returns the number of cell references already stored in a builder. +@pure +fun getBuilderRefsCount(self: builder): int + asm "BREFS"; + +/// Returns the number of data bits already stored in a builder. +@pure +fun getBuilderBitsCount(self: builder): int + asm "BBITS"; + + +/** + Address manipulation primitives. + The address manipulation primitives listed below serialize and deserialize values according to the following TL-B scheme: + ```TL-B + addr_none$00 = MsgAddressExt; + addr_extern$01 len:(## 8) external_address:(bits len) + = MsgAddressExt; + anycast_info$_ depth:(#<= 30) { depth >= 1 } + rewrite_pfx:(bits depth) = Anycast; + addr_std$10 anycast:(Maybe Anycast) + workchain_id:int8 address:bits256 = MsgAddressInt; + addr_var$11 anycast:(Maybe Anycast) addr_len:(## 9) + workchain_id:int32 address:(bits addr_len) = MsgAddressInt; + _ _:MsgAddressInt = MsgAddress; + _ _:MsgAddressExt = MsgAddress; + + int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool + src:MsgAddress dest:MsgAddressInt + value:CurrencyCollection ihr_fee:Grams fwd_fee:Grams + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ext_out_msg_info$11 src:MsgAddress dest:MsgAddressExt + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ``` + A deserialized `MsgAddress` is represented by a tuple `t` as follows: + + - `addr_none` is represented by `t = (0)`, + i.e., a tuple containing exactly one integer equal to zero. + - `addr_extern` is represented by `t = (1, s)`, + where slice `s` contains the field `external_address`. In other words, ` + t` is a pair (a tuple consisting of two entries), containing an integer equal to one and slice `s`. + - `addr_std` is represented by `t = (2, u, x, s)`, + where `u` is either a `null` (if `anycast` is absent) or a slice `s'` containing `rewrite_pfx` (if anycast is present). + Next, integer `x` is the `workchain_id`, and slice `s` contains the address. + - `addr_var` is represented by `t = (3, u, x, s)`, + where `u`, `x`, and `s` have the same meaning as for `addr_std`. + */ + +/// Loads from slice [s] the only prefix that is a valid `MsgAddress`, +/// and returns both this prefix `s'` and the remainder `s''` of [s] as slices. +@pure +fun loadAddress(mutate self: slice): slice + asm( -> 1 0) "LDMSGADDR"; + +/// Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. +/// If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. +@pure +fun parseAddress(s: slice): tuple + asm "PARSEMSGADDR"; + +/// Parses slice [s] containing a valid `MsgAddressInt` (usually a `msg_addr_std`), +/// applies rewriting from the anycast (if present) to the same-length prefix of the address, +/// and returns both the workchain and the 256-bit address as integers. +/// If the address is not 256-bit, or if [s] is not a valid serialization of `MsgAddressInt`, +/// throws a cell deserialization exception. +@pure +fun parseStandardAddress(s: slice): (int, int) + asm "REWRITESTDADDR"; + +/// Creates a slice representing TL addr_none$00 (two `0` bits). +@pure +fun createAddressNone(): slice + asm "b{00} PUSHSLICE"; + +/// Returns if a slice pointer contains an empty address (`-1` for true, `0` for false, as always). +/// In other words, a slice starts with two `0` bits (TL addr_none$00). +@pure +fun addressIsNone(s: slice): int + asm "2 PLDU" "0 EQINT"; + + +/** + Reserving Toncoins on balance and its flags. + */ + +/// mode = 0: Reserve exact amount of nanotoncoins +const RESERVE_MODE_EXACT_AMOUNT = 0; +/// +1: Actually reserves all but amount, meaning `currentContractBalance - amount` +const RESERVE_MODE_ALL_BUT_AMOUNT = 1; +/// +2: Actually set `min(amount, currentContractBalance)` (without this mode, if amount is greater, the action will fail) +const RESERVE_MODE_AT_MOST = 2; +/// +4: [amount] is increased by the _original_ balance of the current account (before the compute phase). +const RESERVE_MODE_INCREASE_BY_ORIGINAL_BALANCE = 4; +/// +8: Actually sets `amount = -amount` before performing any further actions. +const RESERVE_MODE_NEGATE_AMOUNT = 8; +/// +16: If this action fails, the transaction will be bounced. +const RESERVE_MODE_BOUNCE_ON_ACTION_FAIL = 16; + +/// Creates an output action which would reserve Toncoins on balance. +/// For [reserveMode] consider constants above. +fun reserveToncoinsOnBalance(nanoTonCoins: int, reserveMode: int): void + asm "RAWRESERVE"; + +/// Similar to [reserveToncoinsOnBalance], but also accepts a dictionary extraAmount (represented by a cell or null) +/// with extra currencies. In this way currencies other than Toncoin can be reserved. +fun reserveExtraCurrenciesOnBalance(nanoTonCoins: int, extraAmount: cell, reserveMode: int): void + asm "RAWRESERVEX"; + + +/** + Messages sending and parsing primitives. + Working with messages is low-level right now, but still, every contract should do that. + + `Message` structure, its header and so on are specified in TL-B scheme, particularly: + int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool ... = CommonMsgInfo; + */ + +/// 0b011000 tag - 0, ihr_disabled - 1, bounce - 1, bounced - 0, src = adr_none$00 +const BOUNCEABLE = 0x18; +/// 0b010000 tag - 0, ihr_disabled - 1, bounce - 0, bounced - 0, src = adr_none$00 +const NON_BOUNCEABLE = 0x10; + +/// Load msgFlags from incoming message body (4 bits). +@pure +fun loadMessageFlags(mutate self: slice): int + asm( -> 1 0) "4 LDU"; + +/// Having msgFlags (4 bits), check that a message is bounced. +/// Effectively, it's `msgFlags & 1` (the lowest bit present). +@pure +fun isMessageBounced(msgFlags: int): int + asm "1 PUSHINT" "AND"; + +/// Skip 0xFFFFFFFF prefix (when a message is bounced). +@pure +fun skipBouncedPrefix(mutate self: slice): self + asm "32 PUSHINT" "SDSKIPFIRST"; + +/// The guideline recommends to start the body of an internal message with uint32 `op` and uint64 `queryId`. +@pure +fun loadMessageOp(mutate self: slice): int + asm( -> 1 0) "32 LDU"; + +@pure +fun skipMessageOp(mutate self: slice): self + asm "32 PUSHINT" "SDSKIPFIRST"; + +@pure +fun storeMessageOp(mutate self: builder, op: int): self + asm(op self) "32 STU"; + +/// The guideline recommends that uint64 `queryId` should follow uint32 `op`. +@pure +fun loadMessageQueryId(mutate self: slice): int + asm( -> 1 0) "64 LDU"; + +@pure +fun skipMessageQueryId(mutate self: slice): self + asm "64 PUSHINT" "SDSKIPFIRST"; + +@pure +fun storeMessageQueryId(mutate self: builder, queryId: int): self + asm(queryId self) "64 STU"; + +/// SEND MODES - https://docs.ton.org/tvm.pdf page 137, SENDRAWMSG + +/// mode = 0 is used for ordinary messages; the gas fees are deducted from the senging amount; action phaes should NOT be ignored. +const SEND_MODE_REGULAR = 0; +/// +1 means that the sender wants to pay transfer fees separately. +const SEND_MODE_PAY_FEES_SEPARATELY = 1; +/// +2 means that any errors arising while processing this message during the action phase should be ignored. +const SEND_MODE_IGNORE_ERRORS = 2; +/// in the case of action fail - bounce transaction. No effect if SEND_MODE_IGNORE_ERRORS (+2) is used. TVM UPGRADE 2023-07. https://docs.ton.org/learn/tvm-instructions/tvm-upgrade-2023-07#sending-messages +const SEND_MODE_BOUNCE_ON_ACTION_FAIL = 16; +/// mode = 32 means that the current account must be destroyed if its resulting balance is zero. +const SEND_MODE_DESTROY = 32; +/// mode = 64 is used for messages that carry all the remaining value of the inbound message in addition to the value initially indicated in the new message. +const SEND_MODE_CARRY_ALL_REMAINING_MESSAGE_VALUE = 64; +/// mode = 128 is used for messages that are to carry all the remaining balance of the current smart contract (instead of the value originally indicated in the message). +const SEND_MODE_CARRY_ALL_BALANCE = 128; +/// do not create an action, only estimate fee. TVM UPGRADE 2023-07. https://docs.ton.org/learn/tvm-instructions/tvm-upgrade-2023-07#sending-messages +const SEND_MODE_ESTIMATE_FEE_ONLY = 1024; +/// Other modes affect the fee calculation as follows: +/// +64 substitutes the entire balance of the incoming message as an outcoming value (slightly inaccurate, gas expenses that cannot be estimated before the computation is completed are not taken into account). +/// +128 substitutes the value of the entire balance of the contract before the start of the computation phase (slightly inaccurate, since gas expenses that cannot be estimated before the completion of the computation phase are not taken into account). + +/// Sends a raw message — a correctly serialized TL object `Message X`. +/// For `mode`, see constants above (except SEND_MODE_ESTIMATE_FEE_ONLY). +/// This function is still available, but deprecated: consider using [sendMessage]. +@deprecated +fun sendRawMessage(msg: cell, mode: int): void + asm "SENDRAWMSG"; + +/// Creates an output action and returns a fee for creating a message. +/// Mode has the same effect as in the case of SENDRAWMSG. +/// For mode including SEND_MODE_ESTIMATE_FEE_ONLY it just returns estimated fee without sending a message. +fun sendMessage(msg: cell, mode: int): int + asm "SENDMSG"; diff --git a/crypto/smartcont/tolk-stdlib/gas-payments.tolk b/crypto/smartcont/tolk-stdlib/gas-payments.tolk new file mode 100644 index 000000000..1dc6f3f89 --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/gas-payments.tolk @@ -0,0 +1,63 @@ +// A part of standard library for Tolk +tolk 0.6 + +/** + Gas and payment related primitives. + */ + +/// Returns amount of gas (in gas units) consumed in current Computation Phase. +fun getGasConsumedAtTheMoment(): int + asm "GASCONSUMED"; + +/// This function is required to be called when you process an external message (from an outer world) +/// and "accept" it to blockchain. +/// Without calling this function, an external message would be discarded. +/// As an effect, the current smart contract agrees to buy some gas to finish the current transaction. +/// For more details, check [accept_message effects](https://ton.org/docs/#/smart-contracts/accept). +fun acceptExternalMessage(): void + asm "ACCEPT"; + +/// When processing an internal message, by default, the limit of gas consumption is determined by incoming message. +/// Functions [setGasLimit] and [setGasLimitToMaximum] allow you to change this behavior. +/// Sets current gas limit `gl` to its maximal allowed value `gm`, and resets the gas credit `gc` to zero, +/// decreasing the value of `gr` by `gc` in the process. +fun setGasLimitToMaximum(): void + asm "ACCEPT"; + +/// When processing an internal message, by default, the limit of gas consumption is determined by incoming message. +/// Functions [setGasLimit] and [setGasLimitToMaximum] allow you to change this behavior. +/// Sets current gas limit `gl` to the minimum of limit and `gm`, and resets the gas credit `gc` to zero. +/// If the gas consumed so far (including the present instruction) exceeds the resulting value of `gl`, +/// an (unhandled) out of gas exception is thrown before setting new gas limits. +fun setGasLimit(limit: int): void + asm "SETGASLIMIT"; + +/// Calculates fee (amount in nanotoncoins to be paid) for a transaction which consumed [gasUsed] gas units. +fun calculateGasFee(workchain: int, gasUsed: int): int + asm(gasUsed workchain) "GETGASFEE"; + +/// Same as [calculateGasFee], but without flat price (you have supposed to read https://docs.ton.org/develop/howto/fees-low-level) +fun calculateGasFeeWithoutFlatPrice(workchain: int, gasUsed: int): int + asm(gasUsed workchain) "GETGASFEESIMPLE"; + +/// Calculates amount of nanotoncoins you should pay for storing a contract of provided size for [seconds]. +/// [bits] and [cells] represent contract state (code + data). +fun calculateStorageFee(workchain: int, seconds: int, bits: int, cells: int): int + asm(cells bits seconds workchain) "GETSTORAGEFEE"; + +/// Calculates amount of nanotoncoins you should pay to send a message of specified size. +fun calculateMessageFee(workchain: int, bits: int, cells: int): int + asm(cells bits workchain) "GETFORWARDFEE"; + +/// Same as [calculateMessageFee], but without lump price (you have supposed to read https://docs.ton.org/develop/howto/fees-low-level) +fun calculateMessageFeeWithoutLumpPrice(workchain: int, bits: int, cells: int): int + asm(cells bits workchain) "GETFORWARDFEESIMPLE"; + +/// Calculates fee that was paid by the sender of an incoming internal message. +fun calculateOriginalMessageFee(workchain: int, incomingFwdFee: int): int + asm(incomingFwdFee workchain) "GETORIGINALFWDFEE"; + +/// Returns the amount of nanotoncoins current contract debts for storage. ("due" and "debt" are synonyms) +/// If it has no debt, `0` is returned. +fun getMyStorageDuePayment(): int + asm "DUEPAYMENT"; diff --git a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk new file mode 100644 index 000000000..f7a721918 --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk @@ -0,0 +1,38 @@ +// A part of standard library for Tolk +tolk 0.6 + +/** + Lisp-style lists are nested 2-elements tuples: `(1, (2, (3, null)))` represents list `[1, 2, 3]`. + Elements of a list can be of different types. + Empty list is conventionally represented as TVM `null` value. + */ + +@pure +fun createEmptyList(): tuple + asm "PUSHNULL"; + +/// Adds an element to the beginning of lisp-style list. +/// Note, that it does not mutate the list: instead, it returns a new one (it's a lisp pattern). +@pure +fun listPrepend(head: X, tail: tuple): tuple + asm "CONS"; + +/// Extracts the head and the tail of lisp-style list. +@pure +fun listSplit(list: tuple): (X, tuple) + asm "UNCONS"; + +/// Extracts the tail and the head of lisp-style list. +@pure +fun listNext(mutate self: tuple): X + asm( -> 1 0) "UNCONS"; + +/// Returns the head of lisp-style list. +@pure +fun listGetHead(list: tuple): X + asm "CAR"; + +/// Returns the tail of lisp-style list. +@pure +fun listGetTail(list: tuple): tuple + asm "CDR"; diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk new file mode 100644 index 000000000..9fba24d90 --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -0,0 +1,311 @@ +// A part of standard library for Tolk +tolk 0.6 + +/** + Dictionaries are represented as `cell` data type (cells can store anything, dicts in particular). + Currently, they have very low-level API very close to TVM internals. + Most of functions are duplicated for three common cases: + - iDict* - dicts with signed integer keys + - uDict* - dicts with unsigned integer keys + - sDict* - dicts with arbitrary slice keys + When accessing a dict element, you should not only provide a key, but provide keyLen, + since for optimization, for optimization, key length is not stored in the dictionary itself. + */ + +/// Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL +@pure +fun createEmptyDict(): cell + asm "NEWDICT"; + +/// Checks whether a dictionary is empty. +@pure +fun dictIsEmpty(self: cell): int + asm "DICTEMPTY"; + + +@pure +fun iDictGet(self: cell, keyLen: int, key: int): (slice, int) + asm(key self keyLen) "DICTIGET" "NULLSWAPIFNOT"; + +@pure +fun uDictGet(self: cell, keyLen: int, key: int): (slice, int) + asm(key self keyLen) "DICTUGET" "NULLSWAPIFNOT"; + +@pure +fun sDictGet(self: cell, keyLen: int, key: slice): (slice, int) + asm(key self keyLen) "DICTGET" "NULLSWAPIFNOT"; + + +@pure +fun iDictSet(mutate self: cell, keyLen: int, key: int, value: slice): void + asm(value key self keyLen) "DICTISET"; + +@pure +fun uDictSet(mutate self: cell, keyLen: int, key: int, value: slice): void + asm(value key self keyLen) "DICTUSET"; + +@pure +fun sDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): void + asm(value key self keyLen) "DICTSET"; + + +@pure +fun iDictSetRef(mutate self: cell, keyLen: int, key: int, value: cell): void + asm(value key self keyLen) "DICTISETREF"; + +@pure +fun uDictSetRef(mutate self: cell, keyLen: int, key: int, value: cell): void + asm(value key self keyLen) "DICTUSETREF"; + +@pure +fun sDictSetRef(mutate self: cell, keyLen: int, key: slice, value: cell): void + asm(value key self keyLen) "DICTSETREF"; + + +@pure +fun iDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): int + asm(value key self keyLen) "DICTIADD"; + +@pure +fun uDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): int + asm(value key self keyLen) "DICTUADD"; + + +@pure +fun iDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): int + asm(value key self keyLen) "DICTIREPLACE"; + +@pure +fun uDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): int + asm(value key self keyLen) "DICTUREPLACE"; + + +@pure +fun iDictGetRef(self: cell, keyLen: int, key: int): (cell, int) + asm(key self keyLen) "DICTIGETREF" "NULLSWAPIFNOT"; + +@pure +fun uDictGetRef(self: cell, keyLen: int, key: int): (cell, int) + asm(key self keyLen) "DICTUGETREF" "NULLSWAPIFNOT"; + +@pure +fun sDictGetRef(self: cell, keyLen: int, key: slice): (cell, int) + asm(key self keyLen) "DICTGETREF" "NULLSWAPIFNOT"; + + +@pure +fun iDictGetRefOrNull(self: cell, keyLen: int, key: int): cell + asm(key self keyLen) "DICTIGETOPTREF"; + +@pure +fun uDictGetRefOrNull(self: cell, keyLen: int, key: int): cell + asm(key self keyLen) "DICTUGETOPTREF"; + +@pure +fun sDictGetRefOrNull(self: cell, keyLen: int, key: slice): cell + asm(key self keyLen) "DICTGETOPTREF"; + + +@pure +fun iDictDelete(mutate self: cell, keyLen: int, key: int): int + asm(key self keyLen) "DICTIDEL"; + +@pure +fun uDictDelete(mutate self: cell, keyLen: int, key: int): int + asm(key self keyLen) "DICTUDEL"; + +@pure +fun sDictDelete(mutate self: cell, keyLen: int, key: slice): int + asm(key self keyLen) "DICTDEL"; + + +@pure +fun iDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, int) + asm(value key self keyLen) "DICTISETGET" "NULLSWAPIFNOT"; + +@pure +fun uDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, int) + asm(value key self keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; + +@pure +fun sDictSetAndGet(mutate self: cell, keyLen: int, key: slice, value: slice): (slice, int) + asm(value key self keyLen) "DICTSETGET" "NULLSWAPIFNOT"; + + +@pure +fun iDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cell): cell + asm(value key self keyLen) "DICTISETGETOPTREF"; + +@pure +fun uDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cell): cell + asm(value key self keyLen) "DICTUSETGETOPTREF"; + + +@pure +fun iDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, int) + asm(key self keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; + +@pure +fun uDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, int) + asm(key self keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; + +@pure +fun sDictDeleteAndGet(mutate self: cell, keyLen: int, key: slice): (slice, int) + asm(key self keyLen) "DICTDELGET" "NULLSWAPIFNOT"; + + +@pure +fun iDictSetBuilder(mutate self: cell, keyLen: int, key: int, value: builder): void + asm(value key self keyLen) "DICTISETB"; + +@pure +fun uDictSetBuilder(mutate self: cell, keyLen: int, key: int, value: builder): void + asm(value key self keyLen) "DICTUSETB"; + +@pure +fun sDictSetBuilder(mutate self: cell, keyLen: int, key: slice, value: builder): void + asm(value key self keyLen) "DICTSETB"; + + +@pure +fun iDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): int + asm(value key self keyLen) "DICTIADDB"; + +@pure +fun uDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): int + asm(value key self keyLen) "DICTUADDB"; + +@pure +fun iDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): int + asm(value key self keyLen) "DICTIREPLACEB"; + +@pure +fun uDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): int + asm(value key self keyLen) "DICTUREPLACEB"; + + +@pure +fun iDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, int) + asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun uDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, int) + asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun sDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (slice, slice, int) + asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; + + +@pure +fun iDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, int) + asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun uDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, int) + asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun sDictDeleteLastAndGet(mutate self: cell, keyLen: int): (slice, slice, int) + asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; + + +@pure +fun iDictGetFirst(self: cell, keyLen: int): (int, slice, int) + asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetFirst(self: cell, keyLen: int): (int, slice, int) + asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; + +@pure +fun sDictGetFirst(self: cell, keyLen: int): (slice, slice, int) + asm (-> 1 0 2) "DICTMIN" "NULLSWAPIFNOT2"; + +@pure +fun iDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, int) + asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, int) + asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; + +@pure +fun sDictGetFirstAsRef(self: cell, keyLen: int): (slice, cell, int) + asm (-> 1 0 2) "DICTMINREF" "NULLSWAPIFNOT2"; + + +@pure +fun iDictGetLast(self: cell, keyLen: int): (int, slice, int) + asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetLast(self: cell, keyLen: int): (int, slice, int) + asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; + +@pure +fun sDictGetLast(self: cell, keyLen: int): (slice, slice, int) + asm (-> 1 0 2) "DICTMAX" "NULLSWAPIFNOT2"; + +@pure +fun iDictGetLastAsRef(self: cell, keyLen: int): (int, cell, int) + asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetLastAsRef(self: cell, keyLen: int): (int, cell, int) + asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; + +@pure +fun sDictGetLastAsRef(self: cell, keyLen: int): (slice, cell, int) + asm (-> 1 0 2) "DICTMAXREF" "NULLSWAPIFNOT2"; + + +@pure +fun iDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; + +@pure +fun iDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; + + +@pure +fun iDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; + +@pure +fun iDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; + + +/** + Prefix dictionary primitives. + */ + +@pure +fun prefixDictGet(self: cell, keyLen: int, key: slice): (slice, slice, slice, int) + asm(key self keyLen) "PFXDICTGETQ" "NULLSWAPIFNOT2"; + +@pure +fun prefixDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): int + asm(value key self keyLen) "PFXDICTSET"; + +@pure +fun prefixDictDelete(mutate self: cell, keyLen: int, key: slice): int + asm(key self keyLen) "PFXDICTDEL"; diff --git a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk new file mode 100644 index 000000000..91b35f2bd --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk @@ -0,0 +1,25 @@ +// A part of standard library for Tolk +tolk 0.6 + +/// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. +/// The primitive returns the current value of `c3`. +@pure +fun getTvmRegisterC3(): continuation + asm "c3 PUSH"; + +/// Updates the current value of `c3`. Usually, it is used for updating smart contract code in run-time. +/// Note that after execution of this primitive the current code +/// (and the stack of recursive function calls) won't change, +/// but any other function call will use a function from the new code. +fun setTvmRegisterC3(c: continuation): void + asm "c3 POP"; + +/// Transforms a `slice` [s] into a simple ordinary continuation `c`, with `c.code = s` and an empty stack and savelist. +@pure +fun transformSliceToContinuation(s: slice): continuation + asm "BLESS"; + +/// Moves a variable or a value [x] to the top of the stack. +@pure +fun stackMoveToTop(mutate self: X): void + asm "NOP"; diff --git a/lite-client/lite-client.cpp b/lite-client/lite-client.cpp index 1a4201a7c..77c9a8c8b 100644 --- a/lite-client/lite-client.cpp +++ b/lite-client/lite-client.cpp @@ -926,7 +926,7 @@ bool TestNode::show_help(std::string command) { "saveaccount[code|data] []\tSaves into specified file the most recent state " "(StateInit) or just the code or data of specified account; is in " "[:] format\n" - "runmethod[full] [] ...\tRuns GET method of account " + "runmethod[full] [] ...\tRuns GET method of account " " " "with specified parameters\n" "dnsresolve [] []\tResolves a domain starting from root dns smart contract\n" diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk new file mode 100644 index 000000000..d46397c6a --- /dev/null +++ b/tolk-tester/tests/a10.tolk @@ -0,0 +1,130 @@ +import "@stdlib/tvm-lowlevel" + +fun pair_first(p: [X, Y]): X asm "FIRST"; + +fun one(dummy: tuple) { + return 1; +} + +fun main(a: int, x: int) { + var y: int = 0; + var z: int = 0; + while ((y = x * x) > a) { + x -= 1; + z = one(null); + } + return (y, z); +} + +fun throwIfLt10(x: int): void { + if (x > 10) { + return; + } + throw 234; + return; +} + +@method_id(88) +fun test88(x: int) { + try { + var x: void = throwIfLt10(x); + return 0; + } catch(code) { + return code; + } +} + +@method_id(89) +fun test89(last: int) { + var t: tuple = createEmptyTuple(); + t.tuplePush(1); + t.tuplePush(2); + t.tuplePush(3); + t.tuplePush(last); + return (t.tupleAt(0), t.tupleAt(t.tupleSize() - 1), t.tupleFirst(), t.tupleLast()); +} + +@pure fun get10() { return 10; } + +@method_id(91) +fun touchCodegen2() { + var f = get10(); + f.stackMoveToTop(); + return f; +} + +@method_id(92) +fun testDumpDontPolluteStack() { + var f = get10(); + f.debugPrint(); + debugPrint(10); + var s = "asdf"; + s.debugPrintString(); + debugDumpStack(); + debugPrintString("my"); + return (f, getRemainingBitsCount(s)); +} + +@method_id(93) +fun testStartBalanceCodegen1() { + var t = getMyOriginalBalanceWithExtraCurrencies(); + var first = t.pair_first(); + return first; +} + +@method_id(94) +fun testStartBalanceCodegen2() { + var first = getMyOriginalBalance(); + return first; +} + +/** + method_id | in | out +@testcase | 0 | 101 15 | 100 1 +@testcase | 0 | 101 14 | 100 1 +@testcase | 0 | 101 10 | 100 0 +@testcase | 0 | 100 10 | 100 0 +@testcase | 0 | 100 10 | 100 0 +@testcase | 88 | 5 | 234 +@testcase | 88 | 50 | 0 +@testcase | 89 | 4 | 1 4 1 4 +@testcase | 91 | | 10 +@testcase | 92 | | 10 32 + +@fif_codegen +""" + touchCodegen2 PROC:<{ + // + get10 CALLDICT // f + }> +""" + +@fif_codegen +""" + testDumpDontPolluteStack PROC:<{ + ... + DUMPSTK + x{6d79} PUSHSLICE // f s _9 + STRDUMP DROP + SBITS // f _11 + }> +""" + +@fif_codegen +""" + testStartBalanceCodegen1 PROC:<{ + // + BALANCE // t + FIRST // first + }> +""" + +@fif_codegen +""" + testStartBalanceCodegen2 PROC:<{ + // + BALANCE + FIRST // first + }> +""" +*/ diff --git a/tolk-tester/tests/a6.tolk b/tolk-tester/tests/a6.tolk new file mode 100644 index 000000000..7f2c39461 --- /dev/null +++ b/tolk-tester/tests/a6.tolk @@ -0,0 +1,79 @@ +fun f(a: int, b: int, c: int, d: int, e: int, f: int): (int, int) { + // solve a 2x2 linear equation + var D: int = a*d - b*c;;;; var Dx: int = e*d-b*f ;;;; var Dy: int = a * f - e * c; + return (Dx/D,Dy/D); +};;;; + +fun calc_phi(): int { + var n = 1; + repeat (70) { n*=10; }; + var p= 1; + var `q`=1; + do { + (p,q)=(q,p+q); + } while (q <= n); //;; + return mulDivRound(p, n, q); +} + +fun calc_sqrt2(): int { + var n = 1; + repeat (70) { n *= 10; } + var p = 1; + var q = 1; + do { + var t = p + q; + (p, q) = (q, t + q); + } while (q <= n); + return mulDivRound(p, n, q); +} + +fun calc_root(m: auto): auto { + var base: int=1; + repeat(70) { base *= 10; } + var (a, b, c) = (1,0,-m); + var (p1, q1, p2, q2) = (1, 0, 0, 1); + do { + var k: int=-1; + var (a1, b1, c1) = (0, 0, 0); + do { + k+=1; + (a1, b1, c1) = (a, b, c); + c+=b; + c += b += a; + } while (c <= 0); + (a, b, c) = (-c1, -b1, -a1); + (p1, q1) = (k * p1+q1, p1); + (p2, q2) = (k * p2+q2, p2); + } while (p1 <= base); + return (p1, q1, p2, q2); +} + +fun ataninv(base: int, q: int): int { // computes base*atan(1/q) + base=base~/q; + q*=-q; + var sum: int = 0; + var n: int = 1; + do { + sum += base~/n; + base = base~/q; + n += 2; + } while (base != 0); + return sum; +} + +fun calc_pi(): int { + var base: int = 64; + repeat (70) { base *= 10; } + return (ataninv(base << 2, 5) - ataninv(base, 239))~>>4; +} + +fun main(): int { + return calc_pi(); +} + +/** + method_id | in | out +@testcase | 0 | | 31415926535897932384626433832795028841971693993751058209749445923078164 + +@code_hash 84337043972311674339187056298873613816389434478842780265748859098303774481976 +*/ diff --git a/tolk-tester/tests/a6_1.tolk b/tolk-tester/tests/a6_1.tolk new file mode 100644 index 000000000..4995c42d3 --- /dev/null +++ b/tolk-tester/tests/a6_1.tolk @@ -0,0 +1,22 @@ +fun main(a: int, b: int, c: int, d: int, e: int, f: int): (int, int) { + var D: int = a * d - b * c; + var Dx: int = e * d - b * f; + var Dy: int = a * f - e * c; + return (Dx / D, Dy / D); +} + +@method_id(101) +fun testDivMod(x: int, y: int) { + return [divMod(x, y), modDiv(x, y), mulDivMod(x, y, 10)]; +} + +/** + method_id | in | out +@testcase | 0 | 1 1 1 -1 10 6 | 8 2 +@testcase | 0 | 817 -31 624 -241 132272 272276 | 132 -788 +@testcase | 0 | -886 562 498 -212 -36452 -68958 | -505 -861 +@testcase | 0 | 448 -433 -444 792 150012 -356232 | -218 -572 +@testcase | 0 | -40 -821 433 -734 -721629 -741724 | -206 889 +@testcase | 0 | -261 -98 -494 868 -166153 733738 | 263 995 +@testcase | 101 | 112 3 | [ 37 1 1 37 33 6 ] +*/ diff --git a/tolk-tester/tests/a6_5.tolk b/tolk-tester/tests/a6_5.tolk new file mode 100644 index 000000000..8b300c0c9 --- /dev/null +++ b/tolk-tester/tests/a6_5.tolk @@ -0,0 +1,26 @@ +@deprecated +fun twice(f: auto, x: auto): auto { + return f (f (x)); +} + +fun sqr(x: int) { + return x * x; +} + +fun main(x: int): int { + var f = sqr; + return twice(f, x) * f(x); +} + +@method_id(4) +fun pow6(x: int): int { + return twice(sqr, x) * sqr(x); +} + +/** + method_id | in | out +@testcase | 0 | 3 | 729 +@testcase | 0 | 10 | 1000000 +@testcase | 4 | 3 | 729 +@testcase | 4 | 10 | 1000000 +*/ diff --git a/tolk-tester/tests/a7.tolk b/tolk-tester/tests/a7.tolk new file mode 100644 index 000000000..1c0ae2eb3 --- /dev/null +++ b/tolk-tester/tests/a7.tolk @@ -0,0 +1,24 @@ +fun main() { } +@method_id(1) +fun steps(x: int): int { + var n = 0; + while (x > 1) { + n += 1; + if (x & 1) { + x = 3 * x + 1; + } else { + x >>= 1; + } + } + return n; +} + +/** + method_id | in | out +@testcase | 1 | 1 | 0 +@testcase | 1 | 2 | 1 +@testcase | 1 | 5 | 5 +@testcase | 1 | 19 | 20 +@testcase | 1 | 27 | 111 +@testcase | 1 | 100 | 25 +*/ diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk new file mode 100644 index 000000000..5cfa2f3d8 --- /dev/null +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -0,0 +1,111 @@ +fun unsafe_tuple(x: X): tuple + asm "NOP"; + +fun inc(x: int, y: int): (int, int) { + return (x + y, y * 10); +} +fun `~inc`(mutate self: int, y: int): int { + val (newX, newY) = inc(self, y); + self = newX; + return newY; +} + +@method_id(11) +fun test_return(x: int): (int, int, int, int, int, int, int) { + return (x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); +} + +@method_id(12) +fun test_assign(x: int): (int, int, int, int, int, int, int) { + var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x.`~inc`(x / 20), x, x=x*2, x, x+=1, x); + return (x1, x2, x3, x4, x5, x6, x7); +} + +@method_id(13) +fun test_tuple(x: int): tuple { + var t: tuple = unsafe_tuple([x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]); + return t; +} + +@method_id(14) +fun test_tuple_assign(x: int): (int, int, int, int, int, int, int) { + var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]; + return (x1, x2, x3, x4, x5, x6, x7); +} + +fun foo1(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) { + return (x1, x2, x3, x4, x5, x6, x7); +} + +@method_id(15) +fun test_call_1(x: int): (int, int, int, int, int, int, int) { + return foo1(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); +} + +fun foo2(x1: int, x2: int, x3456: (int, int, int, int), x7: int): (int, int, int, int, int, int, int) { + var (x3: int, x4: int, x5: int, x6: int) = x3456; + return (x1, x2, x3, x4, x5, x6, x7); +} + +@method_id(16) +fun test_call_2(x: int): (int, int, int, int, int, int, int) { + return foo2(x, x.`~inc`(x / 20), (x, x = x * 2, x, x += 1), x); +} + +fun asm_func(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) +asm + (x4 x5 x6 x7 x1 x2 x3->0 1 2 3 4 5 6) "NOP"; + +@method_id(17) +fun test_call_asm_old(x: int): (int, int, int, int, int, int, int) { + return asm_func(x, x += 1, x, x, x.`~inc`(x / 20), x, x = x * 2); +} + +@method_id(18) +fun test_call_asm_new(x: int): (int, int, int, int, int, int, int) { + return asm_func(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); +} + +global xx: int; +@method_id(19) +fun test_global(x: int): (int, int, int, int, int, int, int) { + xx = x; + return (xx, xx.`~inc`(xx / 20), xx, xx = xx * 2, xx, xx += 1, xx); +} + +@method_id(20) +fun test_if_else(x: int): (int, int, int, int, int) { + if (x > 10) { + return (x.`~inc`(8), x + 1, x = 1, x <<= 3, x); + } else { + xx = 9; + return (x, x.`~inc`(-4), x.`~inc`(-1), x >= 1, x = x + xx); + } +} + +fun main() { +} + +/** + method_id | in | out +@testcase | 11 | 100 | 100 50 105 210 210 211 211 +@testcase | 12 | 100 | 100 50 105 210 210 211 211 +@testcase | 13 | 100 | [ 100 50 105 210 210 211 211 ] +@testcase | 14 | 100 | 100 50 105 210 210 211 211 +@testcase | 15 | 100 | 100 50 105 210 210 211 211 +@testcase | 16 | 100 | 100 50 105 210 210 211 211 +@testcase | 17 | 100 | 101 50 106 212 100 101 101 +@testcase | 18 | 100 | 210 210 211 211 100 50 105 +@testcase | 19 | 100 | 100 50 105 210 210 211 211 +@testcase | 20 | 80 | 80 89 1 8 8 +@testcase | 20 | 9 | 9 -40 -10 -1 13 + +@fif_codegen +""" + ~inc PROC:<{ + // self y + inc CALLDICT // self newY + }> +""" +@code_hash 97139400653362069936987769894397430077752335662822462908581556703209313861576 +*/ diff --git a/tolk-tester/tests/asm_arg_order.tolk b/tolk-tester/tests/asm_arg_order.tolk new file mode 100644 index 000000000..b96e09ecb --- /dev/null +++ b/tolk-tester/tests/asm_arg_order.tolk @@ -0,0 +1,139 @@ +@pure +fun empty_tuple2(): tuple +asm "NIL"; +@pure +fun tpush2(mutate self: tuple, x: X): void +asm "TPUSH"; + +@pure +fun asm_func_1(x: int, y: int, z: int): tuple +asm "3 TUPLE"; +@pure +fun asm_func_2(x: int, y: int, z: int): tuple +asm (z y x -> 0) "3 TUPLE"; +@pure +fun asm_func_3(x: int, y: int, z: int): tuple +asm (y z x -> 0) "3 TUPLE"; +@pure +fun asm_func_4(a: int, b: (int, (int, int)), c: int): tuple +asm (b a c -> 0) "5 TUPLE"; + +@pure +fun asm_func_modify(mutate self: tuple, b: int, c: int): void +asm (c b self) "SWAP TPUSH SWAP TPUSH"; + +global t: tuple; + +fun foo(x: int): int { + t.tpush2(x); + return x * 10; +} + +@method_id(11) +fun test_old_1(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_1(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(12) +fun test_old_2(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_2(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(13) +fun test_old_3(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_3(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(14) +fun test_old_4(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = empty_tuple2(); + // This actually computes left-to-right even without compute-asm-ltr + t2 = asm_func_4(foo(11), (foo(22), (foo(33), foo(44))), foo(55)); + return (t, t2); +} + +@method_id(15) +fun test_old_modify(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = empty_tuple2(); + t2.asm_func_modify(foo(22), foo(33)); + return (t, t2); +} + +@method_id(16) +fun test_old_dot(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = foo(11).asm_func_3(foo(22), foo(33)); + return (t, t2); +} + +@method_id(21) +fun test_new_1(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_1(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(22) +fun test_new_2(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_2(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(23) +fun test_new_3(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_3(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(24) +fun test_new_4(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_4(foo(11), (foo(22), (foo(33), foo(44))), foo(55)); + return (t, t2); +} + +@method_id(25) +fun test_new_modify(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = empty_tuple2(); + t2.asm_func_modify(foo(22), foo(33)); + return (t, t2); +} + +@method_id(26) +fun test_new_dot(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = foo(11).asm_func_3(foo(22), foo(33)); + return (t, t2); +} + +fun main() { +} + +/** + method_id | in | out +@testcase | 11 | | [ 11 22 33 ] [ 110 220 330 ] +@testcase | 12 | | [ 11 22 33 ] [ 330 220 110 ] +@testcase | 13 | | [ 11 22 33 ] [ 220 330 110 ] +@testcase | 14 | | [ 11 22 33 44 55 ] [ 220 330 440 110 550 ] +@testcase | 15 | | [ 22 33 ] [ 220 330 ] +@testcase | 16 | | [ 11 22 33 ] [ 220 330 110 ] +@testcase | 21 | | [ 11 22 33 ] [ 110 220 330 ] +@testcase | 22 | | [ 11 22 33 ] [ 330 220 110 ] +@testcase | 23 | | [ 11 22 33 ] [ 220 330 110 ] +@testcase | 24 | | [ 11 22 33 44 55 ] [ 220 330 440 110 550 ] +@testcase | 25 | | [ 22 33 ] [ 220 330 ] +@testcase | 26 | | [ 11 22 33 ] [ 220 330 110 ] + +@code_hash 93068291567112337250118419287631047120002003622184251973082208096953112184588 +*/ diff --git a/tolk-tester/tests/bit-operators.tolk b/tolk-tester/tests/bit-operators.tolk new file mode 100644 index 000000000..049406af9 --- /dev/null +++ b/tolk-tester/tests/bit-operators.tolk @@ -0,0 +1,53 @@ +fun lshift(): int { + return (1 << 0) == 1; +} + +fun rshift(): int { + return (1 >> 0) == 1; +} + +fun lshift_var(i: int): int { + return (1 << i) == 1; +} + +fun rshift_var(i: int): int { + return (1 >> i) == 1; +} + +fun main(x: int): int { + if (x == 0) { + return lshift(); + } else if (x == 1) { + return rshift(); + } else if (x == 2) { + return lshift_var(0); + } else if (x == 3) { + return rshift_var(0); + } else if (x == 4) { + return lshift_var(1); + } else { + return rshift_var(1); + } +} + +@method_id(11) +fun is_claimed(index: int): int { + var claim_bit_index: int = index % 256; + var mask: int = 1 << claim_bit_index; + return (255 & mask) == mask; +} + + +/** + method_id | in | out +@testcase | 0 | 0 | -1 +@testcase | 0 | 1 | -1 +@testcase | 0 | 2 | -1 +@testcase | 0 | 3 | -1 +@testcase | 0 | 4 | 0 +@testcase | 0 | 5 | 0 +@testcase | 11 | 0 | -1 +@testcase | 11 | 1 | -1 +@testcase | 11 | 256 | -1 +@testcase | 11 | 8 | 0 +*/ diff --git a/tolk-tester/tests/c2.tolk b/tolk-tester/tests/c2.tolk new file mode 100644 index 000000000..ec8d32da4 --- /dev/null +++ b/tolk-tester/tests/c2.tolk @@ -0,0 +1,27 @@ +global op: (int, int) -> int; + +fun check_assoc(a: int, b: int, c: int): int { + return op(op(a, b), c) == op(a, op(b, c)); +} + +fun unnamed_args(_: int, _: slice, _: auto): auto { + return true; +} + +fun main(x: int, y: int, z: int): int { + op = `_+_`; + return check_assoc(x, y, z); +} + +@method_id(101) +fun test101(x: int, z: int): auto { + return unnamed_args(x, "asdf", z); +} + +/** + method_id | in | out +@testcase | 0 | 2 3 9 | -1 +@testcase | 0 | 11 22 44 | -1 +@testcase | 0 | -1 -10 -20 | -1 +@testcase | 101 | 1 10 | -1 +*/ diff --git a/tolk-tester/tests/c2_1.tolk b/tolk-tester/tests/c2_1.tolk new file mode 100644 index 000000000..4e52b9eeb --- /dev/null +++ b/tolk-tester/tests/c2_1.tolk @@ -0,0 +1,14 @@ +fun check_assoc(op: auto, a: int, b: int, c: int) { + return op(op(a, b), c) == op(a, op(b, c)); +} + +fun main(x: int, y: int, z: int): int { + return check_assoc(`_+_`, x, y, z); +} + +/** + method_id | in | out +@testcase | 0 | 2 3 9 | -1 +@testcase | 0 | 11 22 44 | -1 +@testcase | 0 | -1 -10 -20 | -1 +*/ diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk new file mode 100644 index 000000000..e1d28b8b1 --- /dev/null +++ b/tolk-tester/tests/cells-slices.tolk @@ -0,0 +1,231 @@ +fun store_u32(mutate self: builder, value: int): self { + return self.storeUint(value, 32); +} + +fun load_u32(mutate self: slice): int { + return self.loadUint(32); +} + +fun myLoadInt(mutate self: slice, len: int): int + asm(-> 1 0) "LDIX"; +fun myStoreInt(mutate self: builder, x: int, len: int): self + asm(x self len) "STIX"; + +@method_id(101) +fun test1(): [int,int,int,int,int] { + var b: builder = beginCell().storeUint(1, 32); + b = b.storeUint(2, 32); + b.storeUint(3, 32); + b = b.store_u32(4); + b.store_u32(5); + + var cs: slice = b.endCell().beginParse(); + var one: int = cs.loadUint(32); + var (two: int, three: int) = (cs.loadUint(32), cs.load_u32()); + var four: int = cs.load_u32(); + var five: int = cs.load_u32(); + + return [one,two,three,four,five]; +} + +@method_id(102) +fun test2(): [int,int,int] { + var b: builder = beginCell().myStoreInt(1, 32); + b = b.myStoreInt(2, 32); + b.myStoreInt(3, 32); + + var cs: slice = b.endCell().beginParse(); + var one: int = cs.myLoadInt(32); + var (two: int, three: int) = (cs.myLoadInt(32), cs.myLoadInt(32)); + + return [one,two,three]; +} + +@method_id(103) +fun test3(ret: int): int { + val same: int = beginCell().storeUint(ret,32).endCell().beginParse().loadUint(32); + return same; +} + +@method_id(104) +fun test4(): [int,int] { + var b: builder = beginCell().myStoreInt(1, 32); + b = b.storeInt(2, 32).storeInt(3, 32); + + var cs: slice = b.endCell().beginParse(); + var (one, _, three) = (cs.getFirstBits(32).loadUint(32), cs.skipBits(64), cs.load_u32()); + + return [one,three]; +} + +@method_id(105) +fun test5(): [int,int] { + var cref: cell = endCell(beginCell().store_u32(105)); + var c: cell = beginCell().storeRef(cref).storeRef(cref).store_u32(1).endCell(); + + var cs: slice = beginParse(c); + var sto5x2: int = cs.loadRef().beginParse().load_u32() + cs.loadRef().beginParse().loadUint(32); + return [sto5x2, cs.load_u32()]; +} + +@method_id(106) +fun test6() { + return beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32); +} + +@method_id(107) +fun test7() { + // since .store() methods now mutate, this piece of code works not as earlier (mutates uri_builder) + var uri_builder = beginCell(); + var uri_slice = uri_builder.storeSlice(".json").endCell().beginParse(); + var image_slice = uri_builder.storeSlice(".png").endCell().beginParse(); + return (uri_builder.getBuilderBitsCount(), uri_slice.getRemainingBitsCount(), image_slice.getRemainingBitsCount()); +} + +@method_id(108) +fun test8() { + var uri_builder = beginCell(); + var fresh = uri_builder; + var uri_slice = fresh.storeSlice(".json").endCell().beginParse(); + var fresh redef = uri_builder; + var image_slice = fresh.storeSlice(".png").endCell().beginParse(); + return (uri_builder.getBuilderBitsCount(), uri_slice.getRemainingBitsCount(), image_slice.getRemainingBitsCount()); +} + + +fun sumNumbersInSlice(mutate self: slice): int { + var result = 0; + while (!self.isEndOfSliceBits()) { + result += self.loadUint(32); + } + return result; +} + +@method_id(110) +fun test10() { + var ref = beginCell().storeInt(100, 32).endCell(); + var s: slice = beginCell().storeInt(1, 32).storeInt(2, 32).storeRef(ref).endCell().beginParse(); + var result = (getRemainingBitsCount(s), s.sumNumbersInSlice(), getRemainingBitsCount(s), isEndOfSlice(s), isEndOfSliceBits(s), isEndOfSliceRefs(s)); + var ref2: cell = s.loadRef(); + var s2: slice = ref2.beginParse(); + s.assertEndOfSlice(); + return (result, s2.loadInt(32), s2.isEndOfSlice()); +} + +@method_id(111) +fun test11() { + var s: slice = beginCell().storeInt(1, 32).storeInt(2, 32).storeInt(3, 32).storeInt(4, 32).storeInt(5, 32).storeInt(6, 32).storeInt(7, 32).endCell().beginParse(); + var size1 = getRemainingBitsCount(s); + s.skipBits(32); + var s1: slice = s.getFirstBits(64); + var n1 = s1.loadInt(32); + var size2 = getRemainingBitsCount(s); + s.loadInt(32); + var size3 = getRemainingBitsCount(s); + s.removeLastBits(32); + var size4 = getRemainingBitsCount(s); + var n2 = s.loadInt(32); + var size5 = getRemainingBitsCount(s); + return (n1, n2, size1, size2, size3, size4, size5); +} + +@method_id(112) +fun test12() { + var (result1, result2) = (0, 0); + try { + beginCell().storeRef(beginCell().endCell()).endCell().beginParse().assertEndOfSlice(); + result1 = 100; + } catch (code) { + result1 = code; + } + try { + beginCell().endCell().beginParse().assertEndOfSlice(); + result2 = 100; + } catch (code) { + result2 = code; + } + return (result1, result2); +} + +@method_id(113) +fun test13() { + var ref2 = beginCell().storeInt(1, 32).endCell(); + var ref1 = beginCell().storeInt(1, 32).storeRef(ref2).endCell(); + var c = beginCell().storeInt(444, 32).storeRef(ref1).storeRef(ref1).storeRef(ref1).storeRef(ref2).storeInt(4, 32).endCell(); + var (n_cells1, n_bits1, n_refs1) = c.calculateCellSizeStrict(10); + var s = c.beginParse(); + s.loadRef(); + s.loadRef(); + var n = s.loadInt(32); + var (n_cells2, n_bits2, n_refs2) = s.calculateSliceSizeStrict(10); + return ([n_cells1, n_bits1, n_refs1], [n_cells2, n_bits2, n_refs2], n); +} + +@method_id(114) +fun test110(x: int) { + var s = beginCell().storeBool(x < 0).storeBool(0).storeBool(x).endCell().beginParse(); + return (s.loadBool(), s.loadBool(), s.loadBool()); +} + +@method_id(115) +fun test111() { + var s = beginCell().storeMessageOp(123).storeMessageQueryId(456) + .storeAddressNone().storeAddressNone() + .storeUint(0, 32) + .storeUint(123, 32).storeUint(456, 64).storeUint(789, 64) + .endCell().beginParse(); + var op1 = s.loadUint(32); + var q1 = s.loadUint(64); + if (s.addressIsNone()) { + s.skipBits(2); + } + if (s.loadBool() == 0) { + assert(s.loadBool() == 0) throw 444; + s.skipBouncedPrefix(); + } + var op2 = s.loadMessageOp(); + var q2 = s.loadMessageQueryId(); + s.skipBits(64); + s.assertEndOfSlice(); + assert(isMessageBounced(0x001)) throw 444; + return (op1, q1, op2, q2); +} + +fun main(): int { + return 0; +} + +/** +@testcase | 101 | | [ 1 2 3 4 5 ] +@testcase | 102 | | [ 1 2 3 ] +@testcase | 103 | 103 | 103 +@testcase | 104 | | [ 1 3 ] +@testcase | 105 | | [ 210 1 ] +@testcase | 107 | | 72 40 72 +@testcase | 108 | | 0 40 32 +@testcase | 110 | | 64 3 0 0 -1 0 100 -1 +@testcase | 111 | | 2 3 224 192 160 128 96 +@testcase | 112 | | 9 100 +@testcase | 113 | | [ 3 128 5 ] [ 2 96 3 ] 444 +@testcase | 114 | -1 | -1 0 -1 +@testcase | 114 | 0 | 0 0 0 +@testcase | 115 | | 123 456 123 456 + +Note, that since 'compute-asm-ltr' became on be default, chaining methods codegen is not quite optimal. +@fif_codegen +""" + test6 PROC:<{ + // + NEWC // _1 + 1 PUSHINT // _1 _2=1 + SWAP // _2=1 _1 + 32 STU // _0 + 2 PUSHINT // _0 _6=2 + SWAP // _6=2 _0 + 32 STU // _0 + 3 PUSHINT // _0 _10=3 + SWAP // _10=3 _0 + 32 STU // _0 + }> +""" + */ diff --git a/tolk-tester/tests/co1.tolk b/tolk-tester/tests/co1.tolk new file mode 100644 index 000000000..f124e1de8 --- /dev/null +++ b/tolk-tester/tests/co1.tolk @@ -0,0 +1,72 @@ +const int1 = 1; +const int2 = 2; + +const int101: int = 101; +const int111: int = 111; + +const int1r = int1; + +const str1 = "const1"; +const str2 = "aabbcc"s; + +const str2r: slice = str2; + +const str1int = 0x636f6e737431; +const str2int = 0xAABBCC; + +const nibbles: int = 4; + +fun iget1(): int { return int1; } +fun iget2(): int { return int2; } +fun iget3(): int { return int1+int2; } + +fun iget1r(): int { return int1r; } + +fun sget1(): slice { return str1; } +fun sget2(): slice { return str2; } +fun sget2r(): slice { return str2r; } + +const int240: int = ((int1+int2)*10)<<3; + +fun iget240(): int { return int240; } + +@pure +fun newc(): builder +asm "NEWC"; +@pure +fun endcs(b: builder): slice +asm "ENDC" "CTOS"; +@pure +fun sdeq(s1: slice, s2: slice): int +asm "SDEQ"; +@pure +fun stslicer(b: builder, s: slice): builder +asm "STSLICER"; + +fun main() { + var i1: int = iget1(); + var i2: int = iget2(); + var i3: int = iget3(); + + assert(i1 == 1) throw int101; + assert(i2 == 2) throw 102; + assert(i3 == 3) throw 103; + + var s1: slice = sget1(); + var s2: slice = sget2(); + var s3: slice = newc().stslicer(str1).stslicer(str2r).endcs(); + + assert(sdeq(s1, newc().storeUint(str1int, 12 * nibbles).endcs())) throw int111; + assert(sdeq(s2, newc().storeUint(str2int, 6 * nibbles).endcs())) throw 112; + assert(sdeq(s3, newc().storeUint(0x636f6e737431AABBCC, 18 * nibbles).endcs())) throw 113; + + var i4: int = iget240(); + assert(i4 == 240) throw ((104)); + return 0; +} + +/** +@testcase | 0 | | 0 + +@code_hash 61273295789179921867241079778489100375537711211918844448475493726205774530743 +*/ diff --git a/tolk-tester/tests/code_after_ifelse.tolk b/tolk-tester/tests/code_after_ifelse.tolk new file mode 100644 index 000000000..6a16262f8 --- /dev/null +++ b/tolk-tester/tests/code_after_ifelse.tolk @@ -0,0 +1,41 @@ +fun elseif(cond: int) { + if (cond > 0) { + throw(cond); + } +} + +@inline +@method_id(101) +fun foo(x: int): int { + if (x==1) { + return 111; + } else { + x *= 2; + } + return x + 1; +} + +fun main(x: int): (int, int) { + return (foo(x), 222); +} + +@method_id(102) +fun test2(x: int) { + try { + if (x < 0) { return -1; } + elseif (x); + } catch(excNo) { + return excNo * 1000; + } + return 0; +} + +/** + method_id | in | out +@testcase | 0 | 1 | 111 222 +@testcase | 0 | 3 | 7 222 +@testcase | 101 | 1 | 111 +@testcase | 101 | 3 | 7 +@testcase | 102 | -5 | -1 +@testcase | 102 | 5 | 5000 +*/ diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk new file mode 100644 index 000000000..02379540c --- /dev/null +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -0,0 +1,96 @@ +@method_id(101) +fun test1(): int { + var x = false; + if (x == true) { + x= 100500; + } + return x; +} + +fun main(s: int) { + var (z, t) = (17, s); + while (z > 0) { + t = s; + z -= 1; + } + return ~ t; +} + +/** + method_id | in | out +@testcase | 0 | 1 | -2 +@testcase | 0 | 5 | -6 +@testcase | 101 | | 0 + +Below, I just give examples of @fif_codegen tag: +* a pattern can be single-line (after the tag), or multi-line, surrounded with """ +* there may be multiple @fif_codegen, they all will be checked +* identation (spaces) is not checked intentionally +* "..." means any number of any lines +* lines not divided with "..." are expected to be consecutive in fif output +* //comments can be omitted, but if present, they are also expected to be equal +* there is also a tag @fif_codegen_avoid to check a pattern does not occur + +@fif_codegen +""" +main PROC:<{ + // s + 17 PUSHINT // s _3=17 + OVER // s z=17 t + WHILE:<{ + ... + }>DO<{ // s z t + ... + s1 s(-1) PUXC // s t z + ... + 2 1 BLKDROP2 + ... +}> +""" + +@fif_codegen +""" +main PROC:<{ + ... + WHILE:<{ + ... + }>DO<{ + ... + }> + }END>c +""" + +@fif_codegen +""" + OVER + 0 GTINT // s z t _5 +""" + +@fif_codegen +""" + "Asm.fif" include + ... + PROGRAM{ + ... + }END>c +""" + +@fif_codegen +""" +test1 PROC:<{ +// +FALSE +}> +""" + +@fif_codegen NOT // _8 +@fif_codegen main PROC:<{ + +@fif_codegen_avoid PROCINLINE +@fif_codegen_avoid END c +@fif_codegen_avoid +""" +multiline +can also be +""" +*/ diff --git a/tolk-tester/tests/comments.tolk b/tolk-tester/tests/comments.tolk new file mode 100644 index 000000000..cd2877470 --- /dev/null +++ b/tolk-tester/tests/comments.tolk @@ -0,0 +1,31 @@ + +fun main(): int + +// inside a comment, /* doesn't start a new one +/* but if // is inside, a comment may end at this line*/ { + var cc = "a string may contain /* or // or /*, not parsed"; + // return 1; + return get10() + /* + traditional comment /* may not be nested + // line comment + // ends */1 + + 1; + /* moreover, different comment styles + may be used for opening and closing + */ +} + +/*** + first line + //two-lined*/ + +@method_id(10) +fun get10(): int { + return 10; +} + + +/** +@testcase | 0 | | 12 +@testcase | 10 | | 10 +*/ diff --git a/tolk-tester/tests/dicts-demo.tolk b/tolk-tester/tests/dicts-demo.tolk new file mode 100644 index 000000000..291bd2ea4 --- /dev/null +++ b/tolk-tester/tests/dicts-demo.tolk @@ -0,0 +1,105 @@ +import "@stdlib/tvm-dicts" + +fun addIntToIDict(mutate self: cell, key: int, number: int): void { + return self.iDictSetBuilder(32, key, beginCell().storeInt(number, 32)); +} + +fun calculateDictLen(d: cell) { + var len = 0; + var (k, v, f) = d.uDictGetFirst(32); + while (f) { + len += 1; + (k, v, f) = d.uDictGetNext(32, k); + } + return len; +} + +fun loadTwoDigitNumberFromSlice(mutate self: slice): int { + var n1 = self.loadInt(8); + var n2 = self.loadInt(8); + return (n1 - 48) * 10 + (n2 - 48); +} + + +@method_id(101) +fun test101(getK1: int, getK2: int, getK3: int) { + var dict = createEmptyDict(); + dict.uDictSetBuilder(32, 1, beginCell().storeUint(1, 32)); + var (old1: slice, found1) = dict.uDictSetAndGet(32, getK1, beginCell().storeUint(2, 32).endCell().beginParse()); + var (old2: slice, found2) = dict.uDictSetAndGet(32, getK2, beginCell().storeUint(3, 32).endCell().beginParse()); + var (cur3: slice, found3) = dict.uDictGet(32, getK3); + return ( + found1 ? old1.loadUint(32) : -1, + found2 ? old2.loadUint(32) : -1, + found3 ? cur3.loadUint(32) : -1 + ); +} + +@method_id(102) +fun test102() { + var dict = createEmptyDict(); + dict.addIntToIDict(2, 102); + dict.addIntToIDict(1, 101); + dict.addIntToIDict(4, 104); + dict.addIntToIDict(3, 103); + var deleted = createEmptyTuple(); + var shouldBreak = false; + while (!shouldBreak) { + var (kDel, kVal, wasDel) = dict.iDictDeleteLastAndGet(32); + if (wasDel) { + deleted.tuplePush([kDel, kVal.loadInt(32)]); + } else { + shouldBreak = true; + } + } + return deleted; +} + +@method_id(103) +fun test103() { + var dict = createEmptyDict(); + dict.uDictSetBuilderIfNotExists(32, 1,beginCell().storeInt(1, 32)); + dict.uDictSetBuilderIfNotExists(32, 1,beginCell().storeInt(1, 32)); + var len1 = calculateDictLen(dict); + dict.uDictSetBuilderIfExists(32, 2,beginCell().storeInt(1, 32)); + dict.uDictSetBuilderIfExists(32, 2,beginCell().storeInt(1, 32)); + var len2 = calculateDictLen(dict); + dict.uDictSetBuilder(32, 3,beginCell().storeInt(1, 32)); + dict.uDictSetBuilderIfExists(32, 3,beginCell().storeInt(1, 32)); + var len3 = calculateDictLen(dict); + var (delK1, _, _) = dict.uDictDeleteFirstAndGet(32); + var (delK2, _, _) = dict.uDictDeleteFirstAndGet(32); + var (delK3, _, _) = dict.uDictDeleteFirstAndGet(32); + return (len1, len2, len3, delK1, delK2, delK3); +} + +@method_id(104) +fun test104() { + var dict = createEmptyDict(); + dict.sDictSetBuilder(32, "7800", beginCell().storeUint(5 + 48, 8).storeUint(6 + 48, 8)); + dict.sDictSet(32, "key1", "12"); + var (old1, _) = dict.sDictSetAndGet(32, "key1", "34"); + var (old2, _) = dict.sDictDeleteAndGet(32, "key1"); + var (restK, restV, _) = dict.sDictGetFirst(32); + var (restK1, restV1, _) = dict.sDictDeleteLastAndGet(32); + assert (restK.isSliceBitsEqual(restK1)) throw 123; + assert (restV.isSliceBitsEqual(restV1)) throw 123; + return ( + old1.loadTwoDigitNumberFromSlice(), + old2.loadTwoDigitNumberFromSlice(), + restV.loadTwoDigitNumberFromSlice(), + restK.loadTwoDigitNumberFromSlice(), + restK.loadTwoDigitNumberFromSlice() + ); +} + +fun main() {} + +/** +@testcase | 101 | 1 1 1 | 1 2 3 +@testcase | 101 | 1 2 1 | 1 -1 2 +@testcase | 101 | 1 2 3 | 1 -1 -1 +@testcase | 102 | | [ [ 4 104 ] [ 3 103 ] [ 2 102 ] [ 1 101 ] ] +@testcase | 103 | | 1 1 2 1 3 (null) +@testcase | 104 | | 12 34 56 78 0 + */ diff --git a/tolk-tester/tests/if_stmt.tolk b/tolk-tester/tests/if_stmt.tolk new file mode 100644 index 000000000..2c51ac515 --- /dev/null +++ b/tolk-tester/tests/if_stmt.tolk @@ -0,0 +1,66 @@ +@method_id(101) +fun test1(x: int): int { + if (x > 200) { + return 200; + } else if (x > 100) { + return 100; + } else if (!(x <= 50)) { + if (!(x > 90)) { + return x; + } else { + return 90; + } + } else { + return 0; + } +} + +@method_id(102) +fun test2(x: int) { + if (x == 20) { return 20; } + if (x != 50) { return 50; } + if (x == 0) { return 0; } + return -1; +} + +@method_id(103) +fun test3(x: int) { + if (!(x != 20)) { return 20; } + if (!(x == 50)) { return 50; } + if (!x) { return 0; } + return -1; +} + +fun main() { + +} + +/** +@testcase | 101 | 0 | 0 +@testcase | 101 | 1000 | 200 +@testcase | 101 | 150 | 100 +@testcase | 101 | -1 | 0 +@testcase | 101 | 87 | 87 +@testcase | 101 | 94 | 90 +@testcase | 102 | 20 | 20 +@testcase | 102 | 40 | 50 +@testcase | 102 | 50 | -1 +@testcase | 103 | 20 | 20 +@testcase | 103 | 40 | 50 +@testcase | 103 | 50 | -1 + +@fif_codegen +""" + test3 PROC:<{ + // x + DUP // x x + 20 NEQINT // x _2 + IFNOTJMP:<{ // x + DROP // + 20 PUSHINT // _3=20 + }> // x + DUP // x x + 50 EQINT // x _5 + IFNOTJMP:<{ // x +""" +*/ diff --git a/tolk-tester/tests/imports/invalid-no-import.tolk b/tolk-tester/tests/imports/invalid-no-import.tolk new file mode 100644 index 000000000..6c4ab6ce6 --- /dev/null +++ b/tolk-tester/tests/imports/invalid-no-import.tolk @@ -0,0 +1,4 @@ +fun demoOfInvalid(): (int) { + var f = someAdd; + return f(1, 2); +} diff --git a/tolk-tester/tests/imports/some-math.tolk b/tolk-tester/tests/imports/some-math.tolk new file mode 100644 index 000000000..dc0c9c9b7 --- /dev/null +++ b/tolk-tester/tests/imports/some-math.tolk @@ -0,0 +1,3 @@ +fun someAdd(a: int, b: int): int { + return a + b + 0; +} diff --git a/tolk-tester/tests/imports/use-dicts-err.tolk b/tolk-tester/tests/imports/use-dicts-err.tolk new file mode 100644 index 000000000..c5ba89d22 --- /dev/null +++ b/tolk-tester/tests/imports/use-dicts-err.tolk @@ -0,0 +1,21 @@ +fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { + var dict: cell = createEmptyDict(); + dict.idict_set_builder(32, 3, begin_cell().store_int(30, 32)); + dict.idict_set_builder(32, 4, begin_cell().store_int(40, 32)); + dict.idict_set_builder(32, 5, begin_cell().store_int(valueAt5, 32)); + return dict; +} + +fun lookupIdxByValue(idict32: cell, value: int): int { + var cur_key = -1; + do { + var (cur_key redef, cs: slice, found: int) = idict32.idictGetNext(32, cur_key); + // one-line condition (via &) doesn't work, since right side is calculated immediately + if (found) { + if (cs.loadInt(32) == value) { + return cur_key; + } + } + } while (found); + return -1; +} diff --git a/tolk-tester/tests/imports/use-dicts.tolk b/tolk-tester/tests/imports/use-dicts.tolk new file mode 100644 index 000000000..26a9a9ccd --- /dev/null +++ b/tolk-tester/tests/imports/use-dicts.tolk @@ -0,0 +1,23 @@ +import "@stdlib/tvm-dicts" + +fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { + var dict: cell = createEmptyDict(); + dict.iDictSetBuilder(32, 3, beginCell().storeInt(30, 32)); + dict.iDictSetBuilder(32, 4, beginCell().storeInt(40, 32)); + dict.iDictSetBuilder(32, 5, beginCell().storeInt(valueAt5, 32)); + return dict; +} + +fun lookupIdxByValue(idict32: cell, value: int): int { + var cur_key = -1; + do { + var (cur_key redef, cs: slice, found: int) = idict32.iDictGetNext(32, cur_key); + // one-line condition (via &) doesn't work, since right side is calculated immediately + if (found) { + if (cs.loadInt(32) == value) { + return cur_key; + } + } + } while (found); + return -1; +} diff --git a/tolk-tester/tests/inline_big.tolk b/tolk-tester/tests/inline_big.tolk new file mode 100644 index 000000000..be014eb5d --- /dev/null +++ b/tolk-tester/tests/inline_big.tolk @@ -0,0 +1,62 @@ +@inline +fun foo(x: int): int { + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + return x; +} + +fun main(x: int): int { + return foo(x) * 10 + 5; +} +/** + method_id | in | out +@testcase | 0 | 9 | 9111111111111111111111111111111111111111111111111115 +*/ diff --git a/tolk-tester/tests/inline_if.tolk b/tolk-tester/tests/inline_if.tolk new file mode 100644 index 000000000..9f1fa8c12 --- /dev/null +++ b/tolk-tester/tests/inline_if.tolk @@ -0,0 +1,28 @@ +fun foo1(x: int): int { + if (x == 1) { + return 1; + } + return 2; +} +@inline +fun foo2(x: int): int { + if (x == 1) { + return 11; + } + return 22; +} +@inline_ref +fun foo3(x: int): int { + if (x == 1) { + return 111; + } + return 222; +} +fun main(x: int): (int, int, int) { + return (foo1(x)+1, foo2(x)+1, foo3(x)+1); +} +/** + method_id | in | out +@testcase | 0 | 1 | 2 12 112 +@testcase | 0 | 2 | 3 23 223 +*/ diff --git a/tolk-tester/tests/inline_loops.tolk b/tolk-tester/tests/inline_loops.tolk new file mode 100644 index 000000000..eba595a5e --- /dev/null +++ b/tolk-tester/tests/inline_loops.tolk @@ -0,0 +1,48 @@ +global g: int; + +@inline +fun foo_repeat() { + g = 1; + repeat(5) { + g *= 2; + } +} + +@inline +fun foo_until(): int { + g = 1; + var i: int = 0; + do { + g *= 2; + i += 1; + } while (i < 8); + return i; +} + +@inline +fun foo_while(): int { + g = 1; + var i: int = 0; + while (i < 10) { + g *= 2; + i += 1; + } + return i; +} + +fun main() { + foo_repeat(); + var x: int = g; + foo_until(); + var y: int = g; + foo_while(); + var z: int = g; + return (x, y, z); +} + +/** + method_id | in | out +@testcase | 0 | | 32 256 1024 + +@code_hash 102749806552989901976653997041637095139193406161777448419603700344770997608788 +*/ diff --git a/tolk-tester/tests/invalid-bitwise-1.tolk b/tolk-tester/tests/invalid-bitwise-1.tolk new file mode 100644 index 000000000..f939d60db --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-1.tolk @@ -0,0 +1,9 @@ +fun main(flags: int): int { + return flags&0xFF!=0; +} + +/** +@compilation_should_fail +@stderr & has lower precedence than != +@stderr Use parenthesis +*/ diff --git a/tolk-tester/tests/invalid-bitwise-2.tolk b/tolk-tester/tests/invalid-bitwise-2.tolk new file mode 100644 index 000000000..e6fcd1e54 --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-2.tolk @@ -0,0 +1,8 @@ +fun justTrue(): int { return true; } + +const a = justTrue() | 1 < 9; + +/** +@compilation_should_fail +@stderr | has lower precedence than < +*/ diff --git a/tolk-tester/tests/invalid-bitwise-3.tolk b/tolk-tester/tests/invalid-bitwise-3.tolk new file mode 100644 index 000000000..ee43860bf --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-3.tolk @@ -0,0 +1,8 @@ +fun justTrue(): int { return true; } + +const a = justTrue() | (1 < 9) | justTrue() != true; + +/** +@compilation_should_fail +@stderr | has lower precedence than != +*/ diff --git a/tolk-tester/tests/invalid-bitwise-4.tolk b/tolk-tester/tests/invalid-bitwise-4.tolk new file mode 100644 index 000000000..563ed535d --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-4.tolk @@ -0,0 +1,6 @@ +const a = (1) <=> (0) ^ 8; + +/** +@compilation_should_fail +@stderr ^ has lower precedence than <=> +*/ diff --git a/tolk-tester/tests/invalid-bitwise-5.tolk b/tolk-tester/tests/invalid-bitwise-5.tolk new file mode 100644 index 000000000..1030ed8d5 --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-5.tolk @@ -0,0 +1,11 @@ +const MAX_SLIPAGE = 100; + +fun main(jetton_amount: int, msg_value: int, slippage: int) { + if ((0 == jetton_amount) | (msg_value == 0) | true | false | slippage > MAX_SLIPAGE) { + } +} + +/** +@compilation_should_fail +@stderr | has lower precedence than > +*/ diff --git a/tolk-tester/tests/invalid-bitwise-6.tolk b/tolk-tester/tests/invalid-bitwise-6.tolk new file mode 100644 index 000000000..9c4dc67e4 --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-6.tolk @@ -0,0 +1,9 @@ +fun main() { + if ((1==1)|(2==2)&(3==3)) { + } +} + +/** +@compilation_should_fail +@stderr mixing | with & without parenthesis +*/ diff --git a/tolk-tester/tests/invalid-bitwise-7.tolk b/tolk-tester/tests/invalid-bitwise-7.tolk new file mode 100644 index 000000000..39fba401c --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-7.tolk @@ -0,0 +1,8 @@ +fun main() { + var c = x && y || x && y; +} + +/** +@compilation_should_fail +@stderr mixing && with || without parenthesis +*/ diff --git a/tolk-tester/tests/invalid-builtin-1.tolk b/tolk-tester/tests/invalid-builtin-1.tolk new file mode 100644 index 000000000..6a7f1ca7f --- /dev/null +++ b/tolk-tester/tests/invalid-builtin-1.tolk @@ -0,0 +1,10 @@ +fun moddiv2(x: int, y: int): (int, int) builtin; + +/** +@compilation_should_fail +@stderr +""" +`builtin` used for non-builtin function +fun moddiv2 +""" +*/ diff --git a/tolk-tester/tests/invalid-call-1.tolk b/tolk-tester/tests/invalid-call-1.tolk new file mode 100644 index 000000000..1c32422ee --- /dev/null +++ b/tolk-tester/tests/invalid-call-1.tolk @@ -0,0 +1,9 @@ +fun main() { + return true(); +} + +/** +@compilation_should_fail +The message is weird now, but later I'll rework error messages anyway. +@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??3 with int + */ diff --git a/tolk-tester/tests/invalid-call-2.tolk b/tolk-tester/tests/invalid-call-2.tolk new file mode 100644 index 000000000..5a8c9fa5d --- /dev/null +++ b/tolk-tester/tests/invalid-call-2.tolk @@ -0,0 +1,14 @@ +fun add1(x: int) { + return x + 1; +} + +fun main() { + val adder_fn = add1; + var x = 10; + return adder_fn(mutate x); +} + +/** +@compilation_should_fail +@stderr `mutate` used for non-mutate argument + */ diff --git a/tolk-tester/tests/invalid-call-3.tolk b/tolk-tester/tests/invalid-call-3.tolk new file mode 100644 index 000000000..ac98df704 --- /dev/null +++ b/tolk-tester/tests/invalid-call-3.tolk @@ -0,0 +1,12 @@ +fun with2Params(x: int, y: int) { + +} + +fun main() { + return with2Params(1); +} + +/** +@compilation_should_fail +@stderr too few arguments in call to `with2Params`, expected 2, have 1 + */ diff --git a/tolk-tester/tests/invalid-call-4.tolk b/tolk-tester/tests/invalid-call-4.tolk new file mode 100644 index 000000000..c8f7dcebf --- /dev/null +++ b/tolk-tester/tests/invalid-call-4.tolk @@ -0,0 +1,13 @@ +fun methodWith1Param(self: int, param: int) { + +} + +fun main() { + val x = 10; + x.methodWith1Param(2, "asdf"); +} + +/** +@compilation_should_fail +@stderr too many arguments in call to `methodWith1Param`, expected 1, have 2 + */ diff --git a/tolk-tester/tests/invalid-call-5.tolk b/tolk-tester/tests/invalid-call-5.tolk new file mode 100644 index 000000000..89ab026a9 --- /dev/null +++ b/tolk-tester/tests/invalid-call-5.tolk @@ -0,0 +1,13 @@ +fun inc(x: int) { + return x + 1; +} + +fun main() { + return inc(_); +} + +/** +@compilation_should_fail +@stderr rvalue expected +@stderr inc(_) + */ diff --git a/tolk-tester/tests/invalid-call-6.tolk b/tolk-tester/tests/invalid-call-6.tolk new file mode 100644 index 000000000..cbf598066 --- /dev/null +++ b/tolk-tester/tests/invalid-call-6.tolk @@ -0,0 +1,12 @@ +fun nothing() { +} + +fun main() { + val x = 0; + return x.nothing(); +} + +/** +@compilation_should_fail +@stderr `nothing` has no parameters and can not be called as method + */ diff --git a/tolk-tester/tests/invalid-call-7.tolk b/tolk-tester/tests/invalid-call-7.tolk new file mode 100644 index 000000000..4ad038c9e --- /dev/null +++ b/tolk-tester/tests/invalid-call-7.tolk @@ -0,0 +1,14 @@ +fun main() { + beginCell() + .storeAddressNone() + .storeUint(3, 32) + .storeUnexisting() + .storeInt(1, 32) + .endCell(); +} + +/** +@compilation_should_fail +@stderr undefined symbol `storeUnexisting` +@stderr .storeUnexisting() + */ diff --git a/tolk-tester/tests/invalid-call-8.tolk b/tolk-tester/tests/invalid-call-8.tolk new file mode 100644 index 000000000..c613d7d9c --- /dev/null +++ b/tolk-tester/tests/invalid-call-8.tolk @@ -0,0 +1,8 @@ +fun main() { + var incoming_ton: int = get_incoming_value().3(); +} + +/** +@compilation_should_fail +@stderr expected method name, got `3` + */ diff --git a/tolk-tester/tests/invalid-catch-1.tolk b/tolk-tester/tests/invalid-catch-1.tolk new file mode 100644 index 000000000..756722bb8 --- /dev/null +++ b/tolk-tester/tests/invalid-catch-1.tolk @@ -0,0 +1,12 @@ +fun main() { + try { + + } catch(int, arg) {} + return 0; +} + +/** +@compilation_should_fail +@stderr expected identifier, got `int` +@stderr catch(int + */ diff --git a/tolk-tester/tests/invalid-catch-2.tolk b/tolk-tester/tests/invalid-catch-2.tolk new file mode 100644 index 000000000..a02761463 --- /dev/null +++ b/tolk-tester/tests/invalid-catch-2.tolk @@ -0,0 +1,9 @@ +fun main() { + try {} + catch(err, arg, more) {} +} + +/** +@compilation_should_fail +@stderr expected `)`, got `,` + */ diff --git a/tolk-tester/tests/invalid-cmt-nested.tolk b/tolk-tester/tests/invalid-cmt-nested.tolk new file mode 100644 index 000000000..807e7be88 --- /dev/null +++ b/tolk-tester/tests/invalid-cmt-nested.tolk @@ -0,0 +1,11 @@ +/* +in tolk we decided to drop nested comments support +/* +not nested + */ +*/ + +/** +@compilation_should_fail +@stderr error: expected fun or get, got `*` +*/ diff --git a/tolk-tester/tests/invalid-cmt-old.tolk b/tolk-tester/tests/invalid-cmt-old.tolk new file mode 100644 index 000000000..58927d3a0 --- /dev/null +++ b/tolk-tester/tests/invalid-cmt-old.tolk @@ -0,0 +1,8 @@ +fun main(): int { + ;; here is not a comment +} + +/** +@compilation_should_fail +@stderr error: expected `;`, got `is` + */ diff --git a/tolk-tester/tests/invalid-cyclic-1.tolk b/tolk-tester/tests/invalid-cyclic-1.tolk new file mode 100644 index 000000000..c46b1640e --- /dev/null +++ b/tolk-tester/tests/invalid-cyclic-1.tolk @@ -0,0 +1,8 @@ +const ONE = TWO - 1; +const TWO = ONE + 1; + +/** +@compilation_should_fail +@stderr const ONE +@stderr undefined symbol `TWO` + */ diff --git a/tolk-tester/tests/invalid-declaration-1.tolk b/tolk-tester/tests/invalid-declaration-1.tolk new file mode 100644 index 000000000..ea27e723b --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-1.tolk @@ -0,0 +1,6 @@ +const a = 10, b = 20; + +/** +@compilation_should_fail +@stderr multiple declarations are not allowed + */ diff --git a/tolk-tester/tests/invalid-declaration-10.tolk b/tolk-tester/tests/invalid-declaration-10.tolk new file mode 100644 index 000000000..7ccb182d5 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-10.tolk @@ -0,0 +1,8 @@ +get fun onInternalMessage() { + return 0; +} + +/** +@compilation_should_fail +@stderr invalid declaration of a reserved function + */ diff --git a/tolk-tester/tests/invalid-declaration-2.tolk b/tolk-tester/tests/invalid-declaration-2.tolk new file mode 100644 index 000000000..700632517 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-2.tolk @@ -0,0 +1,8 @@ +fun main(int): int { + +} + +/** +@compilation_should_fail +@stderr expected parameter name, got `int` +*/ diff --git a/tolk-tester/tests/invalid-declaration-3.tolk b/tolk-tester/tests/invalid-declaration-3.tolk new file mode 100644 index 000000000..3edc09fda --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-3.tolk @@ -0,0 +1,8 @@ +int main() { + +} + +/** +@compilation_should_fail +@stderr expected fun or get, got `int` +*/ diff --git a/tolk-tester/tests/invalid-declaration-4.tolk b/tolk-tester/tests/invalid-declaration-4.tolk new file mode 100644 index 000000000..183dda96f --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-4.tolk @@ -0,0 +1,8 @@ +fun main() { + int x = 0; +} + +/** +@compilation_should_fail +@stderr probably, you use FunC-like declarations; valid syntax is `var x: int = ...` +*/ diff --git a/tolk-tester/tests/invalid-declaration-5.tolk b/tolk-tester/tests/invalid-declaration-5.tolk new file mode 100644 index 000000000..bf23d8570 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-5.tolk @@ -0,0 +1,6 @@ +enum MyKind { } + +/** +@compilation_should_fail +@stderr `enum` is not supported yet +*/ diff --git a/tolk-tester/tests/invalid-declaration-6.tolk b/tolk-tester/tests/invalid-declaration-6.tolk new file mode 100644 index 000000000..42cb7b953 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-6.tolk @@ -0,0 +1,8 @@ +get seqno(self: int) { + return 0; +} + +/** +@compilation_should_fail +@stderr get methods can't have `mutate` and `self` params + */ diff --git a/tolk-tester/tests/invalid-declaration-7.tolk b/tolk-tester/tests/invalid-declaration-7.tolk new file mode 100644 index 000000000..8d188ea08 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-7.tolk @@ -0,0 +1,8 @@ +fun main() { + var a = 10, b = 20; +} + +/** +@compilation_should_fail +@stderr multiple declarations are not allowed + */ diff --git a/tolk-tester/tests/invalid-declaration-8.tolk b/tolk-tester/tests/invalid-declaration-8.tolk new file mode 100644 index 000000000..06cb9a985 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-8.tolk @@ -0,0 +1,8 @@ +fun someDemo() { + return 0; +} + +/** +@compilation_should_fail +@stderr the contract has no entrypoint + */ diff --git a/tolk-tester/tests/invalid-declaration-9.tolk b/tolk-tester/tests/invalid-declaration-9.tolk new file mode 100644 index 000000000..8cb71c73a --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-9.tolk @@ -0,0 +1,9 @@ +fun recv_internal() { + return 0; +} + +/** +@compilation_should_fail +@stderr this is a reserved FunC/Fift identifier +@stderr you need `onInternalMessage` + */ diff --git a/tolk-tester/tests/invalid-get-method-1.tolk b/tolk-tester/tests/invalid-get-method-1.tolk new file mode 100644 index 000000000..263370d47 --- /dev/null +++ b/tolk-tester/tests/invalid-get-method-1.tolk @@ -0,0 +1,9 @@ +@method_id(123) +get fun hello(x: int, y: int): (int, int) { + return (x, y); +} + +/** +@compilation_should_fail +@stderr @method_id can be specified only for regular functions +*/ diff --git a/tolk-tester/tests/invalid-get-method-2.tolk b/tolk-tester/tests/invalid-get-method-2.tolk new file mode 100644 index 000000000..7c7a14136 --- /dev/null +++ b/tolk-tester/tests/invalid-get-method-2.tolk @@ -0,0 +1,17 @@ +@pure +get fun secret(): int { + return 0; +} +@pure +get fun balanced(): int { + return 1; +} + +fun main(): int { + return secret() + balanced(); +} + +/** +@compilation_should_fail +@stderr GET methods hash collision: `secret` and `balanced` produce the same hash +*/ diff --git a/tolk-tester/tests/invalid-import.tolk b/tolk-tester/tests/invalid-import.tolk new file mode 100644 index 000000000..416764b62 --- /dev/null +++ b/tolk-tester/tests/invalid-import.tolk @@ -0,0 +1,11 @@ +// line1 +/* */ import "unexisting.tolk"; +// line3 + +/** +@compilation_should_fail +On Linux/Mac, `realpath()` returns an error, and the error message is `cannot find file` +On Windows, it fails after, on reading, with a message "cannot open file" +@stderr invalid-import.tolk:2:7: error: Failed to import: cannot +@stderr import "unexisting.tolk"; + */ diff --git a/tolk-tester/tests/invalid-mutate-1.tolk b/tolk-tester/tests/invalid-mutate-1.tolk new file mode 100644 index 000000000..237940fc9 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-1.tolk @@ -0,0 +1,11 @@ +fun f(x: int) {} + +fun cantAssignToVal() { + val x = 10; + f(x += 1); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `x` + */ diff --git a/tolk-tester/tests/invalid-mutate-10.tolk b/tolk-tester/tests/invalid-mutate-10.tolk new file mode 100644 index 000000000..8cd37c517 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-10.tolk @@ -0,0 +1,16 @@ +fun increment(mutate x: int) { + x = x + 1; +} + +fun cantCallMutatingAsAMember() { + var x = 0; + x.increment(); + return x; +} + +/** +@compilation_should_fail +@stderr function `increment` mutates parameter `x` +@stderr consider calling `increment(mutate x)`, not `x.increment`() +@stderr alternatively, rename parameter to `self` to make it a method + */ diff --git a/tolk-tester/tests/invalid-mutate-11.tolk b/tolk-tester/tests/invalid-mutate-11.tolk new file mode 100644 index 000000000..9f2c2601e --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-11.tolk @@ -0,0 +1,8 @@ +fun load32(self: slice): int { + return self.loadUint(32); +} + +/** +@compilation_should_fail +@stderr modifying `self` (call a mutating method), which is immutable by default + */ diff --git a/tolk-tester/tests/invalid-mutate-12.tolk b/tolk-tester/tests/invalid-mutate-12.tolk new file mode 100644 index 000000000..c8c8c68ec --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-12.tolk @@ -0,0 +1,14 @@ +fun increment(mutate x: int) { + +} + +fun main() { + var x = 0; + var inc = increment; + inc(x); +} + +/** +@compilation_should_fail +@stderr saving `increment` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly + */ diff --git a/tolk-tester/tests/invalid-mutate-13.tolk b/tolk-tester/tests/invalid-mutate-13.tolk new file mode 100644 index 000000000..ad861fd88 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-13.tolk @@ -0,0 +1,8 @@ +fun onInternalMessage(mutate in_msg_body: slice) { + +} + +/** +@compilation_should_fail +@stderr invalid declaration of a reserved function + */ diff --git a/tolk-tester/tests/invalid-mutate-14.tolk b/tolk-tester/tests/invalid-mutate-14.tolk new file mode 100644 index 000000000..2ba645d13 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-14.tolk @@ -0,0 +1,8 @@ +fun main(cs: slice) { + return loadInt(cs, 32); +} + +/** +@compilation_should_fail +@stderr `loadInt` is a mutating method; consider calling `cs.loadInt()`, not `loadInt(cs)` + */ diff --git a/tolk-tester/tests/invalid-mutate-15.tolk b/tolk-tester/tests/invalid-mutate-15.tolk new file mode 100644 index 000000000..f6874fb8c --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-15.tolk @@ -0,0 +1,12 @@ +fun asdf(mutate cs: slice) {} + +fun main(cs: slice) { + cs.asdf(); +} + +/** +@compilation_should_fail +@stderr function `asdf` mutates parameter `cs` +@stderr consider calling `asdf(mutate cs)`, not `cs.asdf`() +@stderr alternatively, rename parameter to `self` to make it a method + */ diff --git a/tolk-tester/tests/invalid-mutate-2.tolk b/tolk-tester/tests/invalid-mutate-2.tolk new file mode 100644 index 000000000..7501fdaf5 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-2.tolk @@ -0,0 +1,10 @@ +fun cantAssignToVal() { + val x = 10; + var y = 20; + [y, x] = [30, 40]; +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `x` + */ diff --git a/tolk-tester/tests/invalid-mutate-3.tolk b/tolk-tester/tests/invalid-mutate-3.tolk new file mode 100644 index 000000000..c49973f71 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-3.tolk @@ -0,0 +1,11 @@ +const op_increase = 0x123; + +fun cantAssignToConst() { + var x = 10; + (x, op_increase) = (20, 30); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `op_increase` + */ diff --git a/tolk-tester/tests/invalid-mutate-4.tolk b/tolk-tester/tests/invalid-mutate-4.tolk new file mode 100644 index 000000000..f25a707cb --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-4.tolk @@ -0,0 +1,14 @@ + +fun inc(mutate x: int) { + x += 1; +} + +fun cantPassToMutatingFunction() { + val myVal = 10; + inc(mutate myVal); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `myVal` + */ diff --git a/tolk-tester/tests/invalid-mutate-5.tolk b/tolk-tester/tests/invalid-mutate-5.tolk new file mode 100644 index 000000000..fd8d11924 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-5.tolk @@ -0,0 +1,14 @@ +fun cantCallMutatingMethod(c: cell) { + val s: slice = c.beginParse(); + if (1) { + var s: slice = c.beginParse(); + s.loadRef(); // this is ok, 's' is another variable + } + val i = s.loadUint(32); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `s` (call a mutating method) +@stderr s.loadUint + */ diff --git a/tolk-tester/tests/invalid-mutate-6.tolk b/tolk-tester/tests/invalid-mutate-6.tolk new file mode 100644 index 000000000..bb577ae47 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-6.tolk @@ -0,0 +1,16 @@ +const op_increase = 0x123; + +fun inc(mutate x: int): int { + x += 10; + return x + 1; +} + +fun cantCallMutatingFunctionWithImmutable() { + return inc(mutate op_increase); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `op_increase` (call a mutating function) +@stderr inc(mutate op_increase) + */ diff --git a/tolk-tester/tests/invalid-mutate-7.tolk b/tolk-tester/tests/invalid-mutate-7.tolk new file mode 100644 index 000000000..5b6b6afe4 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-7.tolk @@ -0,0 +1,15 @@ +fun incBoth(mutate x: int, mutate y: int) { + x += 10; + y += 10; +} + +fun cantCallMutatingFunctionWithRvalue() { + var x = 10; + incBoth(mutate x, mutate 30); +} + +/** +@compilation_should_fail +@stderr lvalue expected (call a mutating function) +@stderr incBoth(mutate x, mutate 30) + */ diff --git a/tolk-tester/tests/invalid-mutate-8.tolk b/tolk-tester/tests/invalid-mutate-8.tolk new file mode 100644 index 000000000..0dd7c5687 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-8.tolk @@ -0,0 +1,10 @@ +fun cantRedefImmutable() { + val x = 10; + var (y: int, x redef) = (20, 30); + return (y, x); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `x` (left side of assignment) + */ diff --git a/tolk-tester/tests/invalid-mutate-9.tolk b/tolk-tester/tests/invalid-mutate-9.tolk new file mode 100644 index 000000000..7e79052e4 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-9.tolk @@ -0,0 +1,9 @@ +fun increment(self: int) { + self = self + 1; +} + +/** +@compilation_should_fail +@stderr modifying `self` (left side of assignment), which is immutable by default +@stderr probably, you want to declare `mutate self` + */ diff --git a/tolk-tester/tests/invalid-no-import-1.tolk b/tolk-tester/tests/invalid-no-import-1.tolk new file mode 100644 index 000000000..89f879a36 --- /dev/null +++ b/tolk-tester/tests/invalid-no-import-1.tolk @@ -0,0 +1,8 @@ +import "imports/some-math.tolk"; +import "imports/invalid-no-import.tolk"; + +/** +@compilation_should_fail +@stderr imports/invalid-no-import.tolk:2:13 +@stderr Using a non-imported symbol `someAdd` + */ diff --git a/tolk-tester/tests/invalid-no-import-2.tolk b/tolk-tester/tests/invalid-no-import-2.tolk new file mode 100644 index 000000000..d78346b90 --- /dev/null +++ b/tolk-tester/tests/invalid-no-import-2.tolk @@ -0,0 +1,9 @@ +import "@stdlib/tvm-dicts" +import "imports/use-dicts-err.tolk" + +/** +@compilation_should_fail +@stderr imports/use-dicts-err.tolk:2:22 +@stderr Using a non-imported symbol `createEmptyDict` +@stderr Forgot to import "@stdlib/tvm-dicts"? + */ diff --git a/tolk-tester/tests/invalid-nopar-1.tolk b/tolk-tester/tests/invalid-nopar-1.tolk new file mode 100644 index 000000000..a9a848654 --- /dev/null +++ b/tolk-tester/tests/invalid-nopar-1.tolk @@ -0,0 +1,12 @@ +fun eq(x: int): int { + return x; +} + +fun main(x: int): int { + return eq x; +} + +/** +@compilation_should_fail +@stderr expected `;`, got `x` + */ diff --git a/tolk-tester/tests/invalid-nopar-2.tolk b/tolk-tester/tests/invalid-nopar-2.tolk new file mode 100644 index 000000000..c7c136509 --- /dev/null +++ b/tolk-tester/tests/invalid-nopar-2.tolk @@ -0,0 +1,12 @@ + +fun main(x: int): int { + if x { + return 10; + } + return 0; +} + +/** +@compilation_should_fail +@stderr expected `(`, got `x` + */ diff --git a/tolk-tester/tests/invalid-nopar-3.tolk b/tolk-tester/tests/invalid-nopar-3.tolk new file mode 100644 index 000000000..8249ca284 --- /dev/null +++ b/tolk-tester/tests/invalid-nopar-3.tolk @@ -0,0 +1,12 @@ + +fun main(x: int): int { + if (x, 1) { + return 10; + } + return 0; +} + +/** +@compilation_should_fail +@stderr expected `)`, got `,` + */ diff --git a/tolk-tester/tests/invalid-nopar-4.tolk b/tolk-tester/tests/invalid-nopar-4.tolk new file mode 100644 index 000000000..6e833f995 --- /dev/null +++ b/tolk-tester/tests/invalid-nopar-4.tolk @@ -0,0 +1,8 @@ +fun load_u32(cs: slice): (slice, int) { + return cs.load_uint 32; +} + +/** +@compilation_should_fail +@stderr expected `(`, got `32` + */ diff --git a/tolk-tester/tests/invalid-pure-1.tolk b/tolk-tester/tests/invalid-pure-1.tolk new file mode 100644 index 000000000..5baa32922 --- /dev/null +++ b/tolk-tester/tests/invalid-pure-1.tolk @@ -0,0 +1,20 @@ + +@pure +fun f_pure(): int { + return f_impure(); +} + +fun f_impure(): int {} + +fun main(): int { + return f_pure(); +} + +/** +@compilation_should_fail +@stderr +""" +an impure operation in a pure function +return f_impure(); +""" +*/ diff --git a/tolk-tester/tests/invalid-pure-2.tolk b/tolk-tester/tests/invalid-pure-2.tolk new file mode 100644 index 000000000..213206834 --- /dev/null +++ b/tolk-tester/tests/invalid-pure-2.tolk @@ -0,0 +1,23 @@ +global g: int; + +@pure +fun f_pure(): builder { + var b: builder = beginCell(); + g = g + 1; + return b; +} + +fun main(): int { + g = 0; + f_pure(); + return g; +} + +/** +@compilation_should_fail +@stderr +""" +an impure operation in a pure function +g = g + 1; +""" +*/ diff --git a/tolk-tester/tests/invalid-pure-3.tolk b/tolk-tester/tests/invalid-pure-3.tolk new file mode 100644 index 000000000..f64b81ce7 --- /dev/null +++ b/tolk-tester/tests/invalid-pure-3.tolk @@ -0,0 +1,23 @@ +@pure +fun validate_input(input: cell): (int, int) { + var (x, y, z, correct) = calculateCellSize(input, 10); + assert(correct) throw 102; +} + +@pure +fun someF(): int { + var c: cell = beginCell().endCell(); + validate_input(c); + return 0; +} + +fun main() {} + +/** +@compilation_should_fail +@stderr +""" +an impure operation in a pure function +assert(correct) +""" +*/ diff --git a/tolk-tester/tests/invalid-redefinition-1.tolk b/tolk-tester/tests/invalid-redefinition-1.tolk new file mode 100644 index 000000000..5238a6801 --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-1.tolk @@ -0,0 +1,7 @@ +global mulDivMod: int; + +/** +@compilation_should_fail +@stderr global mulDivMod: int; +@stderr redefinition of built-in symbol + */ diff --git a/tolk-tester/tests/invalid-redefinition-2.tolk b/tolk-tester/tests/invalid-redefinition-2.tolk new file mode 100644 index 000000000..3a300dc2e --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-2.tolk @@ -0,0 +1,12 @@ +global hello: int; + +fun hello(): int { + +} + +/** +@compilation_should_fail +@stderr fun hello() +@stderr redefinition of symbol, previous was at +@stderr invalid-redefinition-2.tolk:1:1 + */ diff --git a/tolk-tester/tests/invalid-redefinition-3.tolk b/tolk-tester/tests/invalid-redefinition-3.tolk new file mode 100644 index 000000000..04ed9383d --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-3.tolk @@ -0,0 +1,8 @@ +fun main(): int { + var demo_10: int = demo_10; +} + +/** +@compilation_should_fail +@stderr undefined symbol `demo_10` + */ diff --git a/tolk-tester/tests/invalid-redefinition-4.tolk b/tolk-tester/tests/invalid-redefinition-4.tolk new file mode 100644 index 000000000..993a869b7 --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-4.tolk @@ -0,0 +1,9 @@ +fun main(): int { + var (a: int, b: int) = (10, 20); + var (a, b: int) = (10, 20); +} + +/** +@compilation_should_fail +@stderr redeclaration of local variable `a` + */ diff --git a/tolk-tester/tests/invalid-redefinition-5.tolk b/tolk-tester/tests/invalid-redefinition-5.tolk new file mode 100644 index 000000000..4a8f5ea1d --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-5.tolk @@ -0,0 +1,9 @@ +fun main(x: int): int { + var (a: int, b: int) = (10, 20); + var (a redef, x: int) = (10, 20); +} + +/** +@compilation_should_fail +@stderr redeclaration of local variable `x` + */ diff --git a/tolk-tester/tests/invalid-self-1.tolk b/tolk-tester/tests/invalid-self-1.tolk new file mode 100644 index 000000000..40b54f163 --- /dev/null +++ b/tolk-tester/tests/invalid-self-1.tolk @@ -0,0 +1,8 @@ +fun cantReturnFromSelf(mutate self: int): self { + return 2; +} + +/** +@compilation_should_fail +@stderr invalid return from `self` function + */ diff --git a/tolk-tester/tests/invalid-self-2.tolk b/tolk-tester/tests/invalid-self-2.tolk new file mode 100644 index 000000000..c4aa758b2 --- /dev/null +++ b/tolk-tester/tests/invalid-self-2.tolk @@ -0,0 +1,8 @@ +fun cantUseSelfAsType(mutate x: int) { + var y: self = x; +} + +/** +@compilation_should_fail +@stderr `self` type can be used only as a return type of a function (enforcing it to be chainable) + */ diff --git a/tolk-tester/tests/invalid-self-3.tolk b/tolk-tester/tests/invalid-self-3.tolk new file mode 100644 index 000000000..330ac2495 --- /dev/null +++ b/tolk-tester/tests/invalid-self-3.tolk @@ -0,0 +1,10 @@ +fun cantReturnSelf(mutate x: int): int { + x += 1; + return self; +} + +/** +@compilation_should_fail +@stderr using `self` in a non-member function (it does not accept the first `self` parameter) +@stderr return self + */ diff --git a/tolk-tester/tests/invalid-self-4.tolk b/tolk-tester/tests/invalid-self-4.tolk new file mode 100644 index 000000000..f4856a465 --- /dev/null +++ b/tolk-tester/tests/invalid-self-4.tolk @@ -0,0 +1,9 @@ +fun cantReturnNothingFromSelf(mutate self: int): self { + self = self + 1; +} + +/** +@compilation_should_fail +@stderr missing return; forgot `return self`? +@stderr } + */ diff --git a/tolk-tester/tests/invalid-self-5.tolk b/tolk-tester/tests/invalid-self-5.tolk new file mode 100644 index 000000000..a007a93c2 --- /dev/null +++ b/tolk-tester/tests/invalid-self-5.tolk @@ -0,0 +1,15 @@ +fun increment(mutate self: int): self { + self = self + 1; + return self; +} + +fun cantReturnAnotherSelf(mutate self: int): self { + self = self + 1; + var x = 0; + return x.increment(); +} + +/** +@compilation_should_fail +@stderr invalid return from `self` function + */ diff --git a/tolk-tester/tests/invalid-self-6.tolk b/tolk-tester/tests/invalid-self-6.tolk new file mode 100644 index 000000000..588c70ab2 --- /dev/null +++ b/tolk-tester/tests/invalid-self-6.tolk @@ -0,0 +1,8 @@ +fun increment(x: int, self: int): int { + return x + self; +} + +/** +@compilation_should_fail +@stderr `self` can only be the first parameter + */ diff --git a/tolk-tester/tests/invalid-self-7.tolk b/tolk-tester/tests/invalid-self-7.tolk new file mode 100644 index 000000000..2fa2da492 --- /dev/null +++ b/tolk-tester/tests/invalid-self-7.tolk @@ -0,0 +1,8 @@ +fun increment(x: int): int { + return self + 1; +} + +/** +@compilation_should_fail +@stderr using `self` in a non-member function + */ diff --git a/tolk-tester/tests/invalid-shift-1.tolk b/tolk-tester/tests/invalid-shift-1.tolk new file mode 100644 index 000000000..5127ce05b --- /dev/null +++ b/tolk-tester/tests/invalid-shift-1.tolk @@ -0,0 +1,8 @@ +fun main(flags: int) { + return flags << 1 + 32; +} + +/** +@compilation_should_fail +@stderr << has lower precedence than + +*/ diff --git a/tolk-tester/tests/invalid-symbol-1.tolk b/tolk-tester/tests/invalid-symbol-1.tolk new file mode 100644 index 000000000..08a86f176 --- /dev/null +++ b/tolk-tester/tests/invalid-symbol-1.tolk @@ -0,0 +1,14 @@ +fun main(x: int): int { + if (x > 0) { + var y: int = 10; + } else { + var y: slice = "20"; + } + debugPrint(y); +} + +/** +@compilation_should_fail +@stderr debugPrint(y); +@stderr undefined symbol `y` + */ diff --git a/tolk-tester/tests/invalid-symbol-2.tolk b/tolk-tester/tests/invalid-symbol-2.tolk new file mode 100644 index 000000000..f55e15cec --- /dev/null +++ b/tolk-tester/tests/invalid-symbol-2.tolk @@ -0,0 +1,12 @@ +fun main(x: int): int { + try { + if (x > 10) { throw(44); } + } catch(code) {} + return code; +} + +/** +@compilation_should_fail +@stderr return code; +@stderr undefined symbol `code` + */ diff --git a/tolk-tester/tests/invalid-syntax-1.tolk b/tolk-tester/tests/invalid-syntax-1.tolk new file mode 100644 index 000000000..4ccc8f22d --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-1.tolk @@ -0,0 +1,15 @@ +fun main(x: int): int { + if (x > 0) { + return 1; + } + // 'elseif' doesn't exist anymore, it's treated as 'someFunction(arg)' + elseif(x < 0) { + return -1; + } + return x; +} + +/** +@compilation_should_fail +@stderr expected `;`, got `{` + */ diff --git a/tolk-tester/tests/invalid-syntax-2.tolk b/tolk-tester/tests/invalid-syntax-2.tolk new file mode 100644 index 000000000..1180dbbff --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-2.tolk @@ -0,0 +1,13 @@ +fun main(x: int) { + while (x > 0) { + if (x == 10) { + break; + } + x = x -1; + } +} + +/** +@compilation_should_fail +@stderr break/continue from loops are not supported yet + */ diff --git a/tolk-tester/tests/invalid-syntax-3.tolk b/tolk-tester/tests/invalid-syntax-3.tolk new file mode 100644 index 000000000..26ce82ac5 --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-3.tolk @@ -0,0 +1,8 @@ +fun main(x: int) { + return null(); +} + +/** +@compilation_should_fail +@stderr null is not a function: use `null`, not `null()` + */ diff --git a/tolk-tester/tests/invalid-syntax-4.tolk b/tolk-tester/tests/invalid-syntax-4.tolk new file mode 100644 index 000000000..044dd329a --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-4.tolk @@ -0,0 +1,8 @@ +fun main(x: int) { + assert(x > 0); +} + +/** +@compilation_should_fail +@stderr expected `throw excNo` after assert, got `;` + */ diff --git a/tolk-tester/tests/invalid-tolk-version.tolk b/tolk-tester/tests/invalid-tolk-version.tolk new file mode 100644 index 000000000..d66de9ff6 --- /dev/null +++ b/tolk-tester/tests/invalid-tolk-version.tolk @@ -0,0 +1,7 @@ +tolk asdf; + +/** +@compilation_should_fail +@stderr semver expected +@stderr tolk asdf; + */ diff --git a/tolk-tester/tests/invalid-typing-1.tolk b/tolk-tester/tests/invalid-typing-1.tolk new file mode 100644 index 000000000..a0fe296d8 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-1.tolk @@ -0,0 +1,10 @@ +fun main() { + var tri: [int, scli] = [10, null()]; + return; +} + +/** +@compilation_should_fail +@stderr .tolk:2 +@stderr expected , got `scli` + */ diff --git a/tolk-tester/tests/invalid-typing-2.tolk b/tolk-tester/tests/invalid-typing-2.tolk new file mode 100644 index 000000000..d7c6745f5 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-2.tolk @@ -0,0 +1,9 @@ +fun main() { + var tri: (int, bool) = (10, false); + return; +} + +/** +@compilation_should_fail +@stderr bool type is not supported yet + */ diff --git a/tolk-tester/tests/invalid-typing-3.tolk b/tolk-tester/tests/invalid-typing-3.tolk new file mode 100644 index 000000000..fb4b0bc51 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-3.tolk @@ -0,0 +1,19 @@ +fun incInt(mutate self: int): self { + self += 1; + return self; +} + +fun appendBuilder(mutate self: builder): self { + self.storeUint(1, 32); + return self; +} + +fun cantMixDifferentThis() { + var x = 0; + return x.incInt().appendBuilder().incInt(); +} + +/** +@compilation_should_fail +@stderr cannot apply function appendBuilder : builder -> (builder, ()) to arguments of type int: cannot unify type int with builder + */ diff --git a/tolk-tester/tests/invalid-typing-4.tolk b/tolk-tester/tests/invalid-typing-4.tolk new file mode 100644 index 000000000..0e6553690 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-4.tolk @@ -0,0 +1,14 @@ +fun incNotChained(mutate self: int) { + self = self + 1; +} + +fun cantCallNotChainedMethodsInAChain(x: int) { + return x.incNotChained().incNotChained(); +} + +/** +The error is very weird, but nevertheless, the type system prevents of doing such errors. + +@compilation_should_fail +@stderr cannot apply function incNotChained : int -> (int, ()) to arguments of type (): cannot unify type () with int + */ diff --git a/tolk-tester/tests/invalid-typing-5.tolk b/tolk-tester/tests/invalid-typing-5.tolk new file mode 100644 index 000000000..ba3450de2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-5.tolk @@ -0,0 +1,14 @@ +fun incNotChained(mutate self: int) { + self = self + 1; +} + +fun failWhenReturnANotChainedValue(x: int): int { + return x.incNotChained(); +} + +/** +The error is very weird, but nevertheless, the type system prevents of doing such errors. + +@compilation_should_fail +@stderr previous function return type int cannot be unified with return statement expression type (): cannot unify type () with int + */ diff --git a/tolk-tester/tests/invalid.tolk b/tolk-tester/tests/invalid.tolk new file mode 100644 index 000000000..217747748 --- /dev/null +++ b/tolk-tester/tests/invalid.tolk @@ -0,0 +1,8 @@ +fun main(s: auto) { + var (z, t) = ; + +/** +@compilation_should_fail +@stderr expected , got `;` +@stderr var (z, t) = ; +*/ diff --git a/tolk-tester/tests/known-bugs.tolk b/tolk-tester/tests/known-bugs.tolk new file mode 100644 index 000000000..4de6a3752 --- /dev/null +++ b/tolk-tester/tests/known-bugs.tolk @@ -0,0 +1,27 @@ +fun increment(mutate x: int): int { + x = x + 1; + return x; +} + +@method_id(101) +fun bugWithModifyingMethodInsideSameExpression() { + /* + The same bug existed in FunC: +#pragma allow-post-modification; +(int, int) ~increment(int x) { x = x + 5; return (x, x); } +int main() { int x = 0; x += x~increment(); return x; } + It's related to using a variable modified by ~method inside the same expression. + */ + var x = 0; + x = x + increment(mutate x); + return x; +} + +fun main() { + +} + +/** +// correct: 2 +@testcase | 101 | | 1 + */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk new file mode 100644 index 000000000..e9774f3f4 --- /dev/null +++ b/tolk-tester/tests/logical-operators.tolk @@ -0,0 +1,295 @@ +import "imports/use-dicts.tolk" + +fun simpleAllConst() { + return (!0, !!0 & !false, !!!0, !1, !!1, !-1, !!-1, (!5 == 0) == !0, !0 == true); +} + +fun compileTimeEval1(x: int) { + // todo now compiler doesn't understand that bool can't be equal to number other than 0/-1 + // (but understands that it can't be positive) + // that's why for now, the last condition is evaluated at runtime + return (!x, !x > 10, !x < 10, !!x == 5, !x == -10); +} + +@method_id(101) +fun withIfNot(x: int, y: int) { + if (!x) { return 10; } + else if (!y) { return 20; } + return x+y; +} + +@method_id(102) +fun withAndOr(x: int, y: int, z: int) { + var return_at_end = -1; + if (!x & !y) { + if (!z & !y) { return 10; } + else if (z | !!y) { return_at_end = 20; } + } else if (!!x & !!y & !z) { + if (!z & (x > 10)) { return_at_end = 30; } + if ((x != 11) & !z) { return 40; } + return_at_end = 50; + } else { + return_at_end = !x ? !y : !z | 1; + } + return return_at_end; +} + +@method_id(103) +fun someSum(upto: int) { + var x = 0; + var should_break = false; + while (!x & !should_break) { + if (upto < 10) { x = upto; should_break = true; } + else { upto = upto - 1; } + } + return x; +} + +@method_id(104) +fun testDict(last: int) { + // prepare dict: [3 => 30, 4 => 40, 5 => x] + var dict = prepareDict_3_30_4_40_5_x(!last ? 100 : last); + return (lookupIdxByValue(dict, 30), lookupIdxByValue(dict, last), lookupIdxByValue(dict, 100)); +} + +@method_id(105) +fun testNotNull(x: int) { + return [x == null, null == x, !(x == null), null == null, +(null != null)]; +} + +@method_id(106) +fun testAndConstCodegen() { + return ( + [1 && 0, 0 && 1, 0 && 0, 1 && 1], + [4 && 3 && 0, 5 && 0 && 7 && 8, (7 && 0) && -19], + [4 && 3 && -1, 5 && -100 && 7 && 8, (7 && (1 + 2)) && -19], + [true && false, true && true] + ); +} + +@method_id(107) +fun testOrConstCodegen() { + return ( + [1 || 0, 0 || 1, 0 || 0, 1 || 1], + [0 || 0 || 0, 0 || (0 || 0), ((0 || 0) || 0) || 0], + [4 || 3 || -1, 0 || -100 || 0 || 0, (0 || (1 + -1)) || -19], + [true || false, false || false] + ); +} + +global eqCallsCnt: int; + +fun eq(x: int) { return x; } +fun eqCnt(x: int) { eqCallsCnt += 1; return x; } +fun isGt0(x: int) { return x > 0; } + +fun alwaysThrows(): int { throw 444 ; return 444; } + +@method_id(108) +fun testAndSimpleCodegen(a: int, b: int) { + return a && b; +} + +@method_id(109) +fun testOrSimpleCodegen(a: int, b: int) { + return a > 0 || b > 0; +} + +@method_id(110) +fun testLogicalOps1(x: int) { + eqCallsCnt = 0; + return ( + isGt0(x) || !isGt0(x) || alwaysThrows(), + x && eqCnt(x) && eqCnt(x - 1) && eqCnt(x - 2), + (400 == eq(x)) && alwaysThrows(), + (500 == eq(x)) || eqCnt(x) || false, + (500 == eq(x)) || eqCnt(x) || true, + eqCallsCnt + ); +} + +@method_id(111) +fun testLogicalOps2(first: int) { + var s = beginCell().storeInt(1, 32).storeInt(2, 32).storeInt(3, 32).storeInt(4, 32).storeInt(5, 32).endCell().beginParse(); + var sum = 0; + if (first && s.loadUint(32)) { + (2 == s.loadUint(32)) && (sum += s.loadUint(32)); + (3 == s.loadUint(32)) && (sum += s.loadUint(32)); + (5 == s.preloadUint(32)) && (sum += s.loadUint(32)); + } else { + (10 == s.loadUint(32)) || (20 == s.loadUint(32)) || (3 == s.loadUint(32)) || (4 == s.loadUint(32)); + sum += s.loadUint(32); + } + return (s.getRemainingBitsCount(), sum); +} + +fun main() { + +} + +/** +@testcase | 101 | 0 0 | 10 +@testcase | 101 | 5 0 | 20 +@testcase | 101 | 5 8 | 13 +@testcase | 102 | 0 0 0 | 10 +@testcase | 102 | 0 0 5 | 20 +@testcase | 102 | 1 2 0 | 40 +@testcase | 102 | 11 2 0 | 50 +@testcase | 102 | 1 0 0 | -1 +@testcase | 102 | 0 1 0 | 0 +@testcase | 102 | 1 0 1 | 1 +@testcase | 103 | 15 | 9 +@testcase | 103 | 6 | 6 +@testcase | 103 | -1 | -1 +@testcase | 104 | 50 | 3 5 -1 +@testcase | 104 | 100 | 3 5 5 +@testcase | 104 | 0 | 3 -1 5 +@testcase | 105 | 0 | [ 0 0 -1 -1 0 ] +@testcase | 105 | null | [ -1 -1 0 -1 0 ] +@testcase | 106 | | [ 0 0 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ 0 -1 ] +@testcase | 107 | | [ -1 -1 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ -1 0 ] +@testcase | 108 | 1 2 | -1 +@testcase | 108 | 1 0 | 0 +@testcase | 109 | -5 -4 | 0 +@testcase | 109 | -5 4 | -1 +@testcase | 109 | 1 99 | -1 +@testcase | 110 | 0 | -1 0 0 0 -1 2 +@testcase | 110 | 1 | -1 0 0 -1 -1 4 +@testcase | 110 | 2 | -1 0 0 -1 -1 5 +@testcase | 110 | 500 | -1 -1 0 -1 -1 3 +@testcase | 111 | 0 | 32 4 +@testcase | 111 | -1 | 0 8 + +@fif_codegen +""" + simpleAllConst PROC:<{ + // + -1 PUSHINT + 0 PUSHINT + -1 PUSHINT + 0 PUSHINT + -1 PUSHINT + 0 PUSHINT + -1 PUSHINT + TRUE + TRUE + }> +""" + +@fif_codegen +""" + compileTimeEval1 PROC:<{ + // x + DUP // x x + 0 EQINT // x _1 + FALSE // x _1 _4 + TRUE // x _1 _4 _7 + FALSE // x _1 _4 _7 _11 + s0 s4 XCHG // _11 _1 _4 _7 x + 0 EQINT // _11 _1 _4 _7 _12 + -10 EQINT // _11 _1 _4 _7 _14 + s3 s4 XCHG + s1 s3 s0 XCHG3 // _1 _4 _7 _11 _14 + }> +""" + +@fif_codegen +""" + withIfNot PROC:<{ + c2 SAVE + SAMEALTSAVE // x y + OVER // x y x + IFNOTJMP:<{ // x y + 2DROP // + 10 PUSHINT // _2=10 + }> // x y + DUP // x y y + IFNOTJMP:<{ // x y + 2DROP // + 20 PUSHINT // _3=20 + RETALT + }> // x y + ADD // _4 + }> +""" + +@fif_codegen +""" + testAndConstCodegen PROC:<{ + // + FALSE + 0 PUSHINT + DUP + TRUE + 4 TUPLE + FALSE + 0 PUSHINT + DUP + TRIPLE + TRUE + TRUE + TRUE + TRIPLE + FALSE + TRUE + PAIR + }> +""" + +@fif_codegen +""" + testOrConstCodegen PROC:<{ + // + -1 PUSHINT + TRUE + FALSE + s2 PUSH + 4 TUPLE + FALSE + FALSE + FALSE + TRIPLE + -1 PUSHINT + DUP + TRUE + TRIPLE + -1 PUSHINT + FALSE + PAIR + }> +""" + +Currently, && operator is implemented via ?: and is not optimal in primitive cases. +For example, `a && b` can be expressed without IFs. +These are moments of future optimizations. For now, it's more than enough. +@fif_codegen +""" + testAndSimpleCodegen PROC:<{ + // a b + SWAP // b a + IF:<{ // b + 0 NEQINT // _2 + }>ELSE<{ // b + DROP // + 0 PUSHINT // _2=0 + }> + }> +""" + +@fif_codegen +""" + testOrSimpleCodegen PROC:<{ + // a b + SWAP // b a + 0 GTINT // b _3 + IF:<{ // b + DROP // + -1 PUSHINT // _4=-1 + }>ELSE<{ // b + 0 GTINT // _7 + 0 NEQINT // _4 + }> + }> +""" + + */ diff --git a/tolk-tester/tests/method_id.tolk b/tolk-tester/tests/method_id.tolk new file mode 100644 index 000000000..c2d0b9aad --- /dev/null +++ b/tolk-tester/tests/method_id.tolk @@ -0,0 +1,15 @@ +@method_id(1) +fun foo1(): int { return 111; } +@method_id(3) +fun foo2(): int { return 222; } +@method_id(10) +fun foo3(): int { return 333; } +fun main(): int { return 999; } + +/** + method_id | in | out +@testcase | 1 | | 111 +@testcase | 3 | | 222 +@testcase | 10 | | 333 +@testcase | 0 | | 999 +*/ diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk new file mode 100644 index 000000000..b9184ca9a --- /dev/null +++ b/tolk-tester/tests/mutate-methods.tolk @@ -0,0 +1,337 @@ +fun incrementInPlace(mutate self: int, byValue: int): void { + self = self + byValue; +} + +fun incrementTwoInPlace(mutate self: int, mutate y: int, byValue: int): int { + self.incrementInPlace(byValue); + y += byValue; + return self + y; +} + +@method_id(101) +fun testIncrement1() { + var x = 50; + var y = 30; + incrementInPlace(mutate x, 10); + incrementInPlace(mutate x, 10); + incrementInPlace(mutate y, 10); + y.incrementInPlace(10); + incrementInPlace(mutate y, 10); + return (x, y); +} + +@method_id(102) +fun testIncrement2() { + var x = 50; + var y = 30; + val sum1 = incrementTwoInPlace(mutate x, mutate y, 10); + val sum2 = x.incrementTwoInPlace(mutate y, 10); + return (x, y, sum1, sum2); +} + + +fun load_next(mutate cs: slice): int { + return loadInt(mutate cs, 32); +} + +fun myLoadInt(mutate self: slice, len: int): int + asm(-> 1 0) "LDIX"; +fun myStoreInt(mutate self: builder, x: int, len: int): self + asm(x self len) "STIX"; + +@inline_ref +fun unpack_utils_info(mutate utils_info_sl: slice): (int, int) { + return ( + utils_info_sl.myLoadInt(32), + utils_info_sl.myLoadInt(32) + ); +} + +@method_id(103) +fun testSlices1() { + var b: builder = beginCell().storeInt(1, 32).myStoreInt(2, 32); + b.myStoreInt(3, 32); + var c: cell = b.myStoreInt(4, 32).storeInt(5, 32).endCell(); + var cs = c.beginParse(); + var first = cs.preloadInt(32); + unpack_utils_info(mutate cs); + return (first, cs.myLoadInt(32), cs.loadInt(32)); +} + +fun load_decimal_symbol(mutate self: slice): int { + // load decimal from bits using utf-8 table + var n: int = self.loadUint(8); + n = n - 48; + assert(n >= 0) throw 400; + assert(n <= 9) throw 400; + return n; +} + +@method_id(104) +fun testSlices2() { + var cs = "123"; + return (cs.load_decimal_symbol(), cs.load_decimal_symbol(), cs.load_decimal_symbol()); +} + +global v1: int; +global v2: int; +global v3: int; + +@method_id(105) +fun testGlobals() { + v1 = 0; + v2 = 0; + v3 = 100; + v3 += incrementTwoInPlace(mutate v1, mutate v2, 5); + return (v1, v2, v3); +} + +fun withNameShadowing(mutate x: int, pivot: int, extra: int) { + x += pivot; + if (pivot < 100) { + var x = 100 + extra; + if (pivot < 50) { + var x = 50 + extra; + return x + extra; + } else { + x += extra; + return x + extra; + } + } else { + x += extra; + return -100 + extra; + } +} + +@method_id(106) +fun testNameShadowing() { + var x = 0; + var sum = 0; + sum += withNameShadowing(mutate x, 100, 10); + sum += withNameShadowing(mutate x, 50, 10); + sum += withNameShadowing(mutate x, 0, 10); + return (x, sum); +} + +fun updateTwoItems(mutate self: (int, int), byValue: int) { + val (first, second) = self; + self = (first + byValue, second + byValue); +} + +@method_id(107) +fun testMutableTensor() { + var t = (40, 50); + t.updateTwoItems(10); + updateTwoItems(mutate t, 10); + return t; +} + +@pure +fun myStoreUint(mutate self: builder, x: int, len: int): self + asm(x self len) "STIX"; + +@pure +fun myStoreU32(mutate self: builder, x: int): self { + return self.storeUint(x, 32); +} + +fun getSumOfNumbersInCell(c: cell): int { + var sum = 0; + var s = c.beginParse(); + var n_numbers = s.getRemainingBitsCount() / 32; + repeat (n_numbers) { + sum += s.loadUint(32); + } + return sum; +} + +@method_id(110) +fun testStoreChaining() { + var b = beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32); + b.storeUint(4, 32); + b.myStoreUint(5, 32).storeUint(6, 32); + storeUint(mutate b, 7, 32); + b = b.storeUint(8, 32); + b = b.storeUint(9, 32).storeUint(10, 32); + + return getBuilderBitsCount(b); +} + +@method_id(111) +fun testStoreChainingCustom() { + var b = beginCell().myStoreUint(1, 32).myStoreUint(2, 32).myStoreUint(3, 32); + b.myStoreUint(4, 32); + b.myStoreUint(5, 32).myStoreUint(6, 32); + myStoreUint(mutate b, 7, 32); + b = b.myStoreUint(8, 32); + b = b.myStoreUint(9, 32).myStoreUint(10, 32); + val sum1 = getSumOfNumbersInCell(b.endCell()); + + b = beginCell().myStoreU32(1).storeUint(2, 32).myStoreU32(3); + b.myStoreU32(4); + b.myStoreU32(5).myStoreU32(6); + myStoreU32(mutate b, 7); + b = b.myStoreU32(8); + b = b.storeUint(9, 32).myStoreU32(10); + val sum2 = getSumOfNumbersInCell(b.endCell()); + + return (sum1, sum2); +} + +fun myStoreU32_and_mutate_x(mutate self: builder, mutate x: int): void { + return myStoreUint(mutate self, x += 10, 32); +} + +@method_id(112) +fun testStoreAndMutateBoth() { + var x = 3; + var b: builder = beginCell().myStoreUint(1, 32); + b.myStoreU32_and_mutate_x(mutate x); + b.myStoreU32(3).myStoreU32_and_mutate_x(mutate x); + b.myStoreU32_and_mutate_x(mutate x); + + var cs: slice = b.endCell().beginParse(); + var (n1,n2,n3,n4,n5) = (cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32)); + assert(n5 == x) throw 100; + + return [n1,n2,n3,n4,n5]; +} + +global ccc: builder; + +@method_id(113) +fun testStoreChainingForGlobal() { + ccc = beginCell().storeUint(1, 32).myStoreUint(2, 32).myStoreU32(3); + ccc.storeUint(4, 32); + ccc.storeUint(5, 32).myStoreU32(6); + storeUint(mutate ccc, 7, 32); + ccc = ccc.myStoreU32(8); + ccc = ccc.storeUint(9, 32).myStoreUint(10, 32); + + return getBuilderBitsCount(ccc); +} + +fun alwaysThrows(): int { throw 123; return 123; } +fun loadIntFromCell(c: cell, len: int) { return c.beginParse().loadUint(len); } + +@method_id(114) +fun testLoadIntForTemporaryObject() { + val c0 = beginCell().storeUint(0, 32).endCell(); + val c4 = beginCell().storeUint(4, 32).endCell(); + return ( + beginCell().storeUint(1, 32).endCell().beginParse().loadUint(32), + beginCell().storeUint(2, 32).endCell().beginParse().loadUint(32), + c0.beginParse().loadUint(32) ? alwaysThrows() : loadIntFromCell(c4, 32) + ); +} + +@pure +fun myStoreUint_pure(mutate self: builder): void + asm "STIX"; + +fun myStoreUint_impure(mutate self: builder): void + asm "STIX"; + +fun testStoreUintPureUnusedResult() { + var b = beginCell(); + b.myStoreUint_pure(); + var s = b.endCell().beginParse(); + val ii = s.loadUint(32); + return 0; +} + +fun testStoreUintImpureUnusedResult() { + var b = beginCell(); + b.myStoreUint_impure(); + var s = b.endCell().beginParse(); + val ii = s.loadUint(32); + return 0; +} + +global counter: int; + +fun writeNext2(mutate self: builder): self { + return self.storeUint(counter += 1, 32).storeUint(counter += 1, 32); +} + +fun resetCounter(mutate self: builder): self { + counter = 0; + return self; +} + +@method_id(115) +fun testExplicitReturn() { + counter = 0; + return ( + beginCell().writeNext2().writeNext2().resetCounter().writeNext2().endCell().getSumOfNumbersInCell(), + counter + ); +} + + +fun main(){} + +/** +@testcase | 101 | | 70 60 +@testcase | 102 | | 70 50 100 120 +@testcase | 103 | | 1 3 4 +@testcase | 104 | | 1 2 3 +@testcase | 105 | | 5 5 110 +@testcase | 106 | | 160 110 +@testcase | 107 | | 60 70 +@testcase | 110 | | 320 +@testcase | 111 | | 55 55 +@testcase | 112 | | [ 1 13 3 23 33 ] +@testcase | 113 | | 320 +@testcase | 114 | | 1 2 4 +@testcase | 115 | | 13 2 + +@fif_codegen +""" + incrementInPlace PROC:<{ + // self byValue + ADD // self + }> +""" + +@fif_codegen +""" + testIncrement2 PROC:<{ + ... + incrementTwoInPlace CALLDICT // x y sum1 + -ROT + 10 PUSHINT // sum1 x y _9=10 + incrementTwoInPlace CALLDICT // sum1 x y sum2 + s1 s3 s0 XCHG3 // x y sum1 sum2 + }> +""" + +@fif_codegen +""" + load_next PROC:<{ + // cs + 32 LDI // _1 cs + SWAP // cs _1 + }> +""" + +@fif_codegen +""" + testStoreUintPureUnusedResult PROC:<{ + // + 0 PUSHINT // _12=0 + }> +""" + +@fif_codegen +""" + testStoreUintImpureUnusedResult PROC:<{ + // + NEWC // b + STIX // _2 + DROP // + 0 PUSHINT // _12=0 + }> +""" + + */ diff --git a/tolk-tester/tests/no-spaces.tolk b/tolk-tester/tests/no-spaces.tolk new file mode 100644 index 000000000..0d4c3b678 --- /dev/null +++ b/tolk-tester/tests/no-spaces.tolk @@ -0,0 +1,115 @@ +const int10:int=10; + +fun just10(): int { return int10; } +fun eq(v: int): int { return`v`; } + +@method_id(101) fun `get_-1` (): int {return-1;} +@method_id(102) fun `get_--1` (): int {return--1;} +@method_id(103) fun `get_---1`(): int {return---1;} +@method_id(104) fun `get_+++1`(): int {return+++1;} +@method_id(105) fun `get_+-+1`(): int {return+-+1;} + +global `some()var`:int; + +@method_id(110) fun `some_math`(): int { + `some()var`=--6; + return 1*-2*-3*-4*just10()*-5+-`some()var`+--`some()var`---`some()var`; +} + +@method_id(111) fun `negative_nums`(a:int):int { + var m$0:int=1; + var m1:int=-(+0x1)*m$0; + return `a`*-1*-(1)*---(1)*+just10()+-`just10`()*m1*-m1+-eq(m1)----0x1; +} + +@method_id(112) fun `bitwise~ops`(flags:int):[int,int] { + return[ + (just10()-3==just10()-(4)--1)|((2==2)&(eq(eq(10)) -3==just10()--13)), + ((flags&0xFF)!=0) + ]; +} + +@method_id(113)fun`unary+bitwise-constant`():[int,int,int]{ + return [~-~~+-3, ~+3-~9, -(-~+-20-~ 10+3+~38&39)]; +} + +@method_id(114)fun`unary+bitwize-parametrized`(c3:int, c9:int, c20:int, c10:int, c38:int):[int,int,int]{ + return [~-~~+-c3, ~+c3-~`c9`, -(-~+-c20-~c10+c3+~c38&39)]; +} + +fun add3(a: int, b: int, c: int) { return a+b+c; } + +@method_id(115) fun unary_const_check(): [int,int] { + var fst1: int=-1; + var snd1: int=-1; + var trd1: int=+2; + var (fst2,snd2,trd2)=(-1,-1,+2); + return [add3(fst2,snd2,trd2),add3(fst1,snd1,trd1)]; +} + +fun `load:u32`(mutate self: slice): int { + return self.loadUint(32); +} + +@method_id(116) fun `call_~_via_backticks`():[int,int,int,int] { + var b:builder = beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32).storeUint(4, 32); + var `cs`:slice = b.endCell().beginParse(); + val one:int=`cs`.`loadUint`(32); + val (two:int,three:int) = (`cs`.`loadUint`(32), cs.`load:u32`()); + val four:int = cs.`load:u32`(); + return [one,two,three,four]; +} + +fun`main`(){} + +/** + method_id | in | out +@testcase | 101 | | -1 +@testcase | 102 | | 1 +@testcase | 103 | | -1 +@testcase | 104 | | 1 +@testcase | 105 | | -1 +@testcase | 110 | | 1194 +@testcase | 111 | -1 | 22 +@testcase | 112 | 0 | [ -1 0 ] +@testcase | 113 | | [ -4 6 -4 ] +@testcase | 114 | 3 9 20 10 38 | [ -4 6 -4 ] +@testcase | 115 | | [ 0 0 ] +@testcase | 116 | | [ 1 2 3 4 ] + +@fif_codegen +""" + get_+-+1 PROC:<{ + // + -1 PUSHINT + }> +""" + +@fif_codegen +""" + unary+bitwise-constant PROC:<{ + // + -4 PUSHINT + 6 PUSHINT + -4 PUSHINT + TRIPLE + }> +""" + +@fif_codegen +""" + unary_const_check PROC:<{ + // + -1 PUSHINT // fst1=-1 + DUP // fst1=-1 snd1=-1 + 2 PUSHINT // fst1=-1 snd1=-1 trd1=2 + s1 s1 s0 PUSH3 // fst1=-1 snd1=-1 trd1=2 fst2=-1 snd2=-1 trd2=2 + add3 CALLDICT // fst1=-1 snd1=-1 trd1=2 _13 + 3 -ROLL // _13 fst1=-1 snd1=-1 trd1=2 + add3 CALLDICT // _13 _14 + PAIR // _12 + }> +""" + + */ + diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk new file mode 100644 index 000000000..cdfe5acf9 --- /dev/null +++ b/tolk-tester/tests/null-keyword.tolk @@ -0,0 +1,158 @@ +import "@stdlib/lisp-lists" + +@method_id(101) +fun test1() { + var numbers: tuple = createEmptyList(); + numbers = listPrepend(1, numbers); + numbers = listPrepend(2, numbers); + numbers = listPrepend(3, numbers); + numbers = listPrepend(4, numbers); + var (h, numbers redef) = listSplit(numbers); + h += listGetHead(numbers); + + var t = createEmptyTuple(); + do { + var num = numbers.listNext(); + t.tuplePush(num); + } while (numbers != null); + + return (h, numbers == null, t); +} + +@method_id(102) +fun test2(x: int) { + if (null != x) { + var y: int = null; + if (y != null) { return 10; } + return y; + } + try { + return x + 10; // will throw, since not a number + } catch { + return -1; + } + return 100; +} + +fun myIsNull(x: int): int { + return x == null ? -1 : x; +} + +@method_id(103) +fun test3(x: int) { + return myIsNull(x > 10 ? null : x); +} + +fun getUntypedNull() { + var untyped = null; + if (true) { + return untyped; + } + return untyped; +} + +@method_id(104) +fun test4() { + var (_, (_, untyped)) = (3, (createEmptyTuple, null)); + if (true) { + return untyped; + } + return untyped; +} + +@method_id(105) +fun test5() { + var n = getUntypedNull(); + return !(null == n) ? n.loadInt(32) : 100; +} + +@method_id(106) +fun test6(x: int) { + return x > null; // this compiles (for now), but fails at runtime +} + +@method_id(107) +fun test7() { + var b = beginCell().storeMaybeRef(null); + var s = b.endCell().beginParse(); + var c = s.loadMaybeRef(); + return (null == c) * 10 + (b != null); +} + +fun main() { + // now, the compiler doesn't optimize this at compile-time, fif codegen contains ifs + var i: int = null; + if (i == null) { + return 1; + } + return 10; +} + +/** +@testcase | 101 | | 7 -1 [ 3 2 1 ] +@testcase | 102 | 5 | (null) +@testcase | 102 | null | -1 +@testcase | 103 | 5 | 5 +@testcase | 103 | 15 | -1 +@testcase | 104 | | (null) +@testcase | 105 | | 100 +@testcase | 107 | | -11 +@fif_codegen +""" + test1 PROC:<{ + // + PUSHNULL // numbers + 1 PUSHINT // numbers _2=1 + SWAP // _2=1 numbers + CONS // numbers + 2 PUSHINT // numbers _4=2 + SWAP // _4=2 numbers + CONS // numbers + 3 PUSHINT // numbers _6=3 + SWAP // _6=3 numbers + CONS // numbers + 4 PUSHINT // numbers _8=4 + SWAP // _8=4 numbers + CONS // numbers + UNCONS // h numbers + DUP // h numbers numbers + CAR // h numbers _12 +""" + +@fif_codegen +""" + main PROC:<{ + // + PUSHNULL // i + ISNULL // _2 + IFJMP:<{ // + 1 PUSHINT // _3=1 + }> // + 10 PUSHINT // _4=10 + }> +""" + +@fif_codegen +""" + test6 PROC:<{ + // x + PUSHNULL // x _1 + GREATER // _2 + }> +""" + +@fif_codegen +""" + test7 PROC:<{ + ... + LDOPTREF // b _20 _19 + DROP // b c + ISNULL // b _13 + 10 MULCONST // b _15 + SWAP // _15 b + ISNULL // _15 _16 + 0 EQINT // _15 _17 + ADD // _18 + }> +""" +*/ diff --git a/tolk-tester/tests/op_priority.tolk b/tolk-tester/tests/op_priority.tolk new file mode 100644 index 000000000..e4f97b759 --- /dev/null +++ b/tolk-tester/tests/op_priority.tolk @@ -0,0 +1,121 @@ +fun justTrue(): int { return true; } + +fun unary_minus_1(a: int, b: int, c: int): int{return -(a+b) *c;} +fun unary_minus_2(a: int, b: int, c: int): int{return(-(a+b))*c;} +fun unary_minus_3(a: int, b: int, c: int): int{return-((a+b) *c);} + + +@method_id(101) +fun test1(x: int, y: int, z: int): int { + return (x > 0) & (y > 0) & (z > 0); +} + +@method_id(102) +fun test2(x: int, y: int, z: int): int { + return x > (0 & (y > 0) & (z > 0)); +} + +@method_id(103) +fun test3(x: int, y: int, z: int): int { + if ((x < 0) | (y < 0)) { + return z < 0; + } + return (x > 0) & (y > 0); +} + +@method_id(104) +fun test4(x: int, y: int, mode: int): int { + if (mode == 1) { + return (x == 10) | (y == 20); + } if (mode == 2) { + return (x == 10) | (y == 20); + } else { + return x == (10 | (y == 20)); + } +} + +@method_id(105) +fun test5(status: int): int { + return justTrue() & (status == 1) & ((justTrue() & status) == 1); +} + +@method_id(106) +fun test6(a: int, b: int, c: int): int { + return (unary_minus_1(a,b,c) == unary_minus_2(a,b,c)) & (unary_minus_1(a,b,c) == unary_minus_3(a,b,c)); +} + +@method_id(107) +fun test7(b: int): int { + var a = b == 3 ? 3 : b == 4 ? 4 : (b == 5) & 1 ? 5 : 100; + return a; +} + +@method_id(108) +fun test8(b: int): int { + var a = b == 3 ? 3 : b == 4 ? 4 : b = 5 ? 5 : 100; + return a; +} + +fun `_ 0, 3 & (3 > 0), 3 & (`_<_`(3, 0)), + 3 & `_ + unary_minus_2 PROC:<{ + // a b c + -ROT // c a b + ADD // c _3 + NEGATE // c _4 + SWAP // _4 c + MUL // _5 + }> + unary_minus_3 PROC:<{ + // a b c + -ROT // c a b + ADD // c _3 + SWAP // _3 c + MUL // _4 + NEGATE // _5 + }> +""" + + */ diff --git a/tolk-tester/tests/parse-address.tolk b/tolk-tester/tests/parse-address.tolk new file mode 100644 index 000000000..385aa3b53 --- /dev/null +++ b/tolk-tester/tests/parse-address.tolk @@ -0,0 +1,113 @@ +const cc1 = "0:ca6e321c7cce9ecedf0a8ca2492ec8592494aa5fb5ce0387dff96ef6af982a3e"a; +const cc2 = "EQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff-W72r5gqPrHF"a; + +fun verifyAddr(addr: slice, workchain: int, number: int) { + assert (addr.getRemainingBitsCount() == 3 + 8 + 256) throw 112; + addr.skipBits(3); + assert (addr.loadUint(8) == workchain) throw 111; + assert (addr.loadUint(256) == number) throw 111; +} + +fun main() { + verifyAddr("Ef8zMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzM0vF"a, 255, 23158417847463239084714197001737581570653996933128112807891516801582625927987); + verifyAddr("EQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM9c"a, 0, 0); + verifyAddr("EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5"a, 0, 65607996509792174074532427555986248720836864382484024657400295821210434460432); + verifyAddr("UQCOgxbCOjOLH_cEuQdGgS23zBM5SrQQepMFedjK-oixYbis"a, 0, 64460038539088394980732229180523693489682583665805557562964506609821558550881); + verifyAddr("EQDa4VOnTYlLvDJ0gZjNYm5PXfSmmtL6Vs6A_CZEtXCNICq_"a, 0, 99002318936150612861744867526221033858534811876886359650897405270877291973920); + verifyAddr("Ef8BtXO9bcTMXjg9bgivKh4lhJmZWQPP6_rb9vfjlTP5FJtM"a, 255, 772910975127952880303441415761050161913031788763061162001556772893733681428); + verifyAddr("Ef89xh-uy860-mCcvS8zcAUs8bApmxLGygDLEKjUk5RL-311"a, 255, 27941138149036269893630478666581900122707382189183906805784676408403709676539); + verifyAddr("Ef_vA6yRfmt2P4UHnxlrQUZFcBnKux8mL2eMqBgpeMFPorr4"a, 255, 108109262375472472702582493362335418330829651067377177643099076957184687427490); + verifyAddr("Ef8o6AM9sUZ8rOqLFY8PYeaC3gbopZR1BMkE8fcD0r5NnmCi"a, 255, 18502444830824300068094395885436326119386947594392869497312068745716154912158); + verifyAddr("Ef_fvrd0hBoVJUxoi7wH173Zk8NPiyVvxh5IoYSjEYZbOhsu"a, 255, 101202732337223525952216789200341489000836292542250083765062769181728788863802); + verifyAddr("Ef9nzj6RBc4mQ6p3ng7mGJ7tp7MbzERhe7obkM9A0wnCCEcf"a, 255, 46952625717497919357580310066854892621799390294920450816077086267929711460872); + verifyAddr("Ef9rU-_AAnBkHB71TIC3QvUf5LcAsvj0B4IoYzAXLpEFd5CA"a, 255, 48545777798729612074233611768739897492467685225150339217043102685589809464695); + verifyAddr("Ef9LynHHKgBxY6-l-W_dWN-CtGT2_ji5rN3EzOI-p9zWEfq6"a, 255, 34281152017620085319078796986198022632548048219136747083019177301186013091345); + verifyAddr("Ef9hMd78gzSiVsK0zz0AHtEja8x1UoB_NDZMjn-l86NQK_2Y"a, 255, 43962460814164090767878334494257755557842170134382045184921495822637115592747); + verifyAddr("Ef80FNJ5NJO4-0QwlVAWckUZXdk-PfYDexDZ1-ju9SxhF0A6"a, 255, 23557057702048801338698514499604413540742716310574705490458593067566768087319); + verifyAddr("Ef_fdIbThooPs4_r2DE_Z6ZsWycJdHLnsuKAJHTcbaZaipez"a, 255, 101071650030310556115830521522496708686577365303530257137459798093298869361290); + verifyAddr("Ef_lva0qEiZhWrrZJl-IJxyCcTQmmTo71fIWyQ31HxJ8NurV"a, 255, 103914771557158282349484109182290824591675204108148026180964788916630125182006); + verifyAddr("Ef8sMGKypw006AeRYqimLjmY2Ufp-SHk8C0ZJBNgVBlzw_Nr"a, 255, 19987255184378161380023126214650814972824352533523055905552702178965809886147); + verifyAddr("EQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff+W72r5gqPrHF"a, 0, 91561894446285001782438967260723928368560331318344957259023550817453781559870); + verifyAddr("EQCaSCHVak-jIc9ANutTAfHpZNM3YdGky7yaDzsTrg0WhFlm"a, 0, 69783625181781015447914682554083924798054947959007050695795761257887453484676); + verifyAddr("EQBS9U3AfD15fGmOtRMXQAxcPVBwNuItfLcDni9fkbTyyNX0"a, 0, 37523067738561024305547433298623118197038688994386001017161816416175242146504); + verifyAddr("EQBiMNL9qNWMAkJHuM0BFneYcuHL17kzS4pswpaEO-NGWrFG"a, 0, 44412924025649114419413541526870954696667907029239618728289150652715284776538); + verifyAddr("EQAUzE-Nef80O9dLZy91HfPiOb6EEQ8YqyWKyIU-KeaYLNUi"a, 0, 9407242825041766837311851458322335726136775042891143504070507665010681354284); + verifyAddr("EQD-nhrinjv0B4LTgr0dRHTHwH1MOsgGhKBXJZd7vESMZUf1"a, 0, 115166810931401616117484448645661180241548402534908005320733783571353775148133); + verifyAddr("EQAVD3Fni9I6j8XeSIl-wAGBEhqhame6OtAY0GScKT0D9X6f"a, 0, 9525855215156855607080079714361451576383963668563135377495902959388099150837); + verifyAddr("EQC6ACq3VANZjqfRBy7JMHkpLwqQ9qyYJsCIGx1mYbQgxaKw"a, 0, 84130484652351964071210477536969520113177637645401392541565606610268614566085); + verifyAddr("EQCIJLNFIko5CvpKn9oAkrDgLocDOoD4vwmHxNx_fsG_LkwW"a, 0, 61579391178099797614367237687950512448308156724136883899001108680249616482094); + verifyAddr("EQCe4AYIBce1pAk2qJJPSs1OzyZRlKjkfq8zuC8D7erv6DUP"a, 0, 71861245445432818728925844931259040612664802586395398157190478191760507596776); + verifyAddr("EQCtrtTXEAoSpoERmiqOnICe9LHxn2N89N4BH9qdHlrG-U0i"a, 0, 78559023162479717496981724991265882229440558807791659796411897368395464230649); + verifyAddr("EQBBlraAps0OZaB9Q8ePQn2wVAaL1G411A-dNppyWe3X3GIT"a, 0, 29666621803903557832193058147214384979915773445007872807927344851911086823388); + verifyAddr("EQBiASqUqaVizrozLRbszkWC2kETbkhpO2qniDVDPPg2_0W8"a, 0, 44328719889509369519441680467651025944540360433148852643949783408843779749631); + verifyAddr("EQBu2Q1EO8gIoNA1qoGWnHUudKfmqlKEDTQE-DxN-_4sdg14"a, 0, 50137910719490808065414827264266674858051167131188257457782826342827836714102); + verifyAddr("EQA5bvxWd5-q2vJUVqR9AlbEIfdFysLR0PXGgVlBf8x5hWuF"a, 0, 25977927117604457079092522008276392864656238504700352770597256138254994667909); + verifyAddr("EQBguMSHjFv5bfoOdshr3ruS9ymSZzhRKMovoNrxGxZXvmee"a, 0, 43748489720571123896506696370504498290006245978262404519821633796370658121662); + verifyAddr("EQAxL0oF1-zNgimPKthbDnYS4xj94rHtfNRN7_Pd1r2LNNv3"a, 0, 22246882279393590648219842750911786376759362211171398419754426796438233910068); + verifyAddr("EQANX1uRKGZfyPIwEaIXrR0ZOqadct5q10dvKxWIxx7SQqzW"a, 0, 6048549475100840191738010856156544571222758030966479209409932714701987172930); + verifyAddr("EQBitdFDoU5DWSjfKq7AsO29RIwAnBzzvcVVSn5ekQoB9Liv"a, 0, 44647902768175374073183447303109856895983123510911038181495138821771906122228); + verifyAddr("EQBgbux7VSjqJHP7ByRK1q4QuVZbpSCesNgvz5qad3lfXX_B"a, 0, 43618018778298854282398238948198420936771670943015013768514626213000552996701); + verifyAddr("EQDisBd8U7M3CEOZ8gcWCdetdmJi3AI31zIT5qBwOdmUbsxY"a, 0, 102533830955233207294921564956803510155400341370448300698800842506363763004526); + verifyAddr("EQAZpn_eynVlf7Ii2d6jP_p1URPrdF9F3S7DiudQyelkjzwE"a, 0, 11602000355550451044739442929923326898313570892134000961608306166632391730319); + verifyAddr("EQDE0HBgfkOiqHezLtExBGTvOs8eitthHQosBjW3BmDy1y2K"a, 0, 89021598108837008984355105304701054698583123510131754065320641619941010764503); + verifyAddr("EQDyT36zktBN9PVWvZ1joRxhIfEUgCPt4F2isa-enUA_d6CP"a, 0, 109600164736599393471831241268953938618560132398692391517933933264745646800759); + verifyAddr("EQDSMUGwt25IQd3_yHjI03F71G8Kp2GMaMEv2TiWoTKbsyRH"a, 0, 95072727086440754059372943502908629555499501854161516009430039520728770059187); + verifyAddr("EQAgK1EcrvEuL9sCtoj3cNhVNOuf3lo5GIPE2gn1fwZZYB3j"a, 0, 14550545393206146289454646242321274637527057595221202748348667645886114191712); + verifyAddr("EQCDKqL5w_6MD-Z7AOButu-uR-ZJTsgNU1fu464hn9grY81U"a, 0, 59328315557704100696483472039557119625141880163887490602190749720459366378339); + verifyAddr("EQB1aVMyFBhnlYXmQjsma0S63kvxKU7ccZKFNCFTwX7ASPv4"a, 0, 53106696421104300082516512931084483581353095629408473618166869610568148238408); + verifyAddr("EQBbjrXHoxDyh1ZYGBdBoQgLaScxW6pZR1hEhJC8BqF-5Kgq"a, 0, 41412616102566803060532874463898939692666425753852274254609049615175463829220); + verifyAddr("EQC-QeZ13QP0lszxNKt380fCWuaV94vwC/bfuqmrlg1/fJPA"a, 0, 86055876869280374285292827775555707420719385459150221433115419095878595346300); + verifyAddr("EQAiUwpF27vXCngqNhf_TQ5E_06ah0G4zuSrnfU7CLLaht5H"a, 0, 15525356059048115813946213102829493539706126913595626308144289257869196581510); + verifyAddr("EQBqiVjmhe2iVGmgOSDO1FGjSiz_AMtb1w7lLEiP4XIF_SFy"a, 0, 48187833566271418625754761625661652107159264793429628379411792200127405491709); + verifyAddr("EQDmwvaK2d_SbaPdpOM60effPWeKsksgDVwFPEyxuftM396K"a, 0, 104376425077737068747642645125299653296942252727305637929378053253273342397663); + verifyAddr("EQDWtPZZgF7wvIMUHZQojuD3utiuivsW7WslRJ33dgv-5yc8"a, 0, 97114682311034709685427168495629428400170984047839002197324103884924936519399); + verifyAddr("EQAA7z0JI0JKqbN-1uENKz9JrxIO5ZRY-ehMeg9fPncx50Ck"a, 0, 422697701361909095759185681783393186844038628935759044330165207027374567911); + verifyAddr("EQBVUHRoCq6coQYUwOAhGSoAmQ6Mpm7dFlDYon6HMgWV8Ftr"a, 0, 38588743302295548905191533977469452945717219128199196974980570837505276220912); + verifyAddr("EQCTdvDCf0bA5dOPI1-44tB2ZfNcMGiklzvg27TovgDEqM6E"a, 0, 66700138358140658950710678965721715920748906761125730971082529064117803730088); + verifyAddr("EQBDBKE5WGKIlnoi3OOzw7vkKKIX55eWjPvgxJWwek8AyL2J"a, 0, 30313140970524770883308749215942283658935592719811899513010665548955593408712); + verifyAddr("EQAvCSyLCo21GrqLAifdov4WkOxuGQCjMRxgF1cXSaNzLHZe"a, 0, 21274912932379789207153885262858116665851037273450532982121514600400844714796); + verifyAddr("EQCsLpDeHB2qpRbmsCb_0xmsYVNx1NgeYrvHGT1TDrHkDgL4"a, 0, 77880084760844670718511036557364450189323549135231036747480176919181282894862); + verifyAddr("EQCTQ8kPwyX92r48gCIL_pLN_RcQT9ghZygnmDTYkOkuW_j5"a, 0, 66609755171046741472463433430016629628609840960137226492652665879987546041947); + verifyAddr("EQCTrFRSHt-tfk7WxK9ZHQmqLcgxXxTK7wGfCEbqgY2W9Mcx"a, 0, 66794468397542182731534559853537484892417154018190804733043974345563210356468); + verifyAddr("EQCv28y49GdaLncoclv0ISdDlMUY_cxDPGNWFCPT8t4GuqUJ"a, 0, 79543100951881731989812212377176715376834409392116644269458867858071577560762); + verifyAddr("EQCVL-k6deDR56Z8pcb0Btg0lGfaivOGfdDCD1vvyRsyL9vS"a, 0, 67479265933941008511790471646564661743401752930295407567346938670637286896175); + verifyAddr("EQD6t2dXDjZxF1DqunKF-8dEWivJdliY_0FYiCXnthuqnDCa"a, 0, 113402258385556889021060606279033166272577193563727959698596277924908309916316); + verifyAddr("EQDE98XNzXiPq7VnbJJ2M4-Ht3tX_OWR0xUTTnDC8NObLmyU"a, 0, 89091094739778473356272490822716056624384395256388481953562551087642791090990); + verifyAddr("EQDfeRDE1TDhwt478CDR0Q7MDwqcTUhfjqyTT59mgoAaF6f7"a, 0, 101079669463449311486034260688909914923832300293253430457119371423825321269783); + verifyAddr("EQDijcEyUKa-QgCbeGlggQk1uBtt2ZRHyW4Y4gB4R6MN6RLW"a, 0, 102473162609487797404330889623966425536887610061087715571345738626121871855081); + verifyAddr("EQDOtFOt41skbjBkZF89oYXpoDECjlxIzD-ShWAOYyzuxqLA"a, 0, 93495056812773926196963707371665512785268729004579280701087533371037976424134); + verifyAddr("EQDuJKSFWU7AYqH6KLFfAbYvMuz346eWmJvG6_2NYE42_B4T"a, 0, 107715199938628393100813870735031557263256555616273999363057194279168054802172); + verifyAddr("EQDwGu4vFv1e3wn8min_iy7OPJXegOYTFQ5bZFZ5a5ZPiBpX"a, 0, 108602665568837301744601989570019709742180613578164394799026726718721456754568); + verifyAddr("EQC4G2ph6AS_mD_-cIv4aIYm1z5jAgCW_TTDEr72ygXOP2X-"a, 0, 83274003234732023403481554420859495155084746906198543572711543697320249249343); + verifyAddr("EQDpUkyAa6lZ12P3ZB2PL_rmWwI1I55BU4kxw_rssFL5dswA"a, 0, 105534303174146507629736518862713754948570412188900908177600861330298381728118); + verifyAddr("EQDoIA20MF1qEcSPtROdCu5ukGx9dVjgMeJh1oQ4A4cf_Jif"a, 0, 104993214557977037193613824776415934089204193426692473563548548423424814817276); + verifyAddr("EQDpUkyAa6lZ12P3ZB2PL_rmWwI1I55BU4kxw_rssFL5dswA"a, 0, 105534303174146507629736518862713754948570412188900908177600861330298381728118); + verifyAddr("EQClLO4EnZ_rTyV1GVpWy53pLgWJRki5c4ZzuM_1O_ClBkO9"a, 0, 74711004027159342540251007601464500186374346239921204216319145006974068892934); + verifyAddr("EQDmkj65Ab_m0aZaW8IpKw4kYqIgITw_HRstYEkVQ6NIYCyW"a, 0, 104290347741656803921830951060768893809692975574470790497562993373950614128736); + verifyAddr("EQCqNTwAYUNhPFS0RgqZoTLGJcQQxbAJ7csUo4YO3_TONLab"a, 0, 76987241268612358571638783428744566580605181728938801022059780105627411729972); + verifyAddr("EQCL3DmCynaRK7-vsfeNmd4Jj-UxAIHPvA4qS2xwaL6UpLbF"a, 0, 63260589232981964910240894899061676480139492286430589202252472895352724165796); + verifyAddr("EQDbU1SVEjBE73oUqgAoM9gDcShUkM5EC2PgoCjuwVUKo-Ee"a, 0, 99203745911752606845646497420891218522647962685916739950275357890977532807843); + verifyAddr("EQD02VdcF4TDbCKLLhZJ39NQTu6aWq2LnLjp0oXqbNu_BANK"a, 0, 110748343802097970709980079967961144373090790244250392237586606542170934198020); + verifyAddr("EQBynBO23ywHy_CgarY9NK9FTz0yDsG82PtcbSTQgGoXwiuA"a, 0, 51839428943991432793039248316067731096592274748149794482308513726460953499586); + verifyAddr("UQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff-W72r5gqPuwA"a, 0, 91561894446285001782438967260723928368560331318344957259023550817453781559870); + verifyAddr("EQAUTbQiM522Y_XJ_T98QPhPhTmb4nV--VSPiha8kC6kRfPO"a, 0, 9183547432069678364603018431103042146626948674383548774683927217595824907333); + verifyAddr("EQBlqsm144Dq6SjbPI4jjZvA1hqTIP3CvHovbIfW_t-SCALE"a, 0, 45985353862647206060987594732861817093328871106941773337270673759241903247880); + verifyAddr("UQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff-W72r5gqPuwA"a, 0, 91561894446285001782438967260723928368560331318344957259023550817453781559870); + verifyAddr("kQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff-W72r5gqPgpP"a, 0, 91561894446285001782438967260723928368560331318344957259023550817453781559870); + verifyAddr("kf-Dfdg-YQXaR2Q97gZJ4fGBtmV1DHOU1y1RPyyZZtRy_Ikh"a, 255, 59475331506450494976393625198911249698879029820580340449086829444312920781564); + verifyAddr("0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 0, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + verifyAddr("0:0000000000000000000000000000000000000000000000000000000000000000"a, 0, 0); + verifyAddr("0:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFfffffffffffffffffffffffffffff"a, 0, 115792089237316195423570985008687907853269984665640564039457584007913129639935); + verifyAddr("0:zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"a, 0, 23158417847463239084714197001737581570653996933128112807891516801582625927987); + verifyAddr("0:0000000000000000000000000000000000000000000000000000000000000000"a, 0, 0); + verifyAddr("1:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 1, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + verifyAddr("9:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 9, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + verifyAddr("99:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 99, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + verifyAddr("-1:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 255, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + + return cc1.isSliceBitsEqual(cc2); +} + +/** +@testcase | 0 | | -1 + */ diff --git a/tolk-tester/tests/pure-functions.tolk b/tolk-tester/tests/pure-functions.tolk new file mode 100644 index 000000000..8598e85c2 --- /dev/null +++ b/tolk-tester/tests/pure-functions.tolk @@ -0,0 +1,46 @@ + +@pure +fun f_pure1(): int { + return f_pure2(); +} + +@pure +fun f_pure2(): int { + return 2; +} + +@pure +fun get_contract_data(): (int, int) { + var c: cell = getContractData(); + var cs: slice = c.beginParse(); + cs.loadBits(32); + var value: int = cs.loadUint(16); + return (1, value); +} + +fun save_contract_data(value: int) { + var b: builder = beginCell().storeInt(1, 32).storeUint(value, 16); + setContractData(b.endCell()); +} + +@pure +@method_id(101) +fun test1(): int { + return f_pure1(); +} + +@method_id(102) +fun test2(value: int): int { + save_contract_data(value); + var (_, restored: auto) = get_contract_data(); + return restored; +} + +fun main() { return; } + +/** + +@testcase | 101 | | 2 +@testcase | 102 | 44 | 44 + +*/ diff --git a/tolk-tester/tests/remove-unused-functions.tolk b/tolk-tester/tests/remove-unused-functions.tolk new file mode 100644 index 000000000..e5d8aabcb --- /dev/null +++ b/tolk-tester/tests/remove-unused-functions.tolk @@ -0,0 +1,48 @@ +fun unused1(): int { return 2; } +fun unused2(): int { return unused1(); } +fun unused3(x: int): int { return x * 2+unused2(); } + +fun used_from_noncall1(): int { return 10; } +fun used_as_noncall1(): int { return used_from_noncall1(); } + +const int20: int = 20; +fun used_from_noncall2(): int { return int20; } +fun used_as_noncall2(): int { return 0 * 0 + used_from_noncall2() + (0 << 0); } + +global unused_gv: int; +global used_gv: auto; + +fun receiveGetter(): (() -> int) { return used_as_noncall2; } + +@pure +fun usedButOptimizedOut(x: int): int { return x + 2; } + +fun main(): (int, int, int) { + used_gv = 1; + used_gv = used_gv + 2; + var getter1 = used_as_noncall1; + var getter2 = receiveGetter(); + usedButOptimizedOut(used_gv); + return (used_gv, getter1(), getter2()); +} + +/** +@experimental_options remove-unused-functions + +@testcase | 0 | | 3 10 20 + +@fif_codegen DECLPROC used_as_noncall1 +@fif_codegen DECLGLOBVAR used_gv + +@fif_codegen_avoid DECLPROC unused1 +@fif_codegen_avoid DECLPROC unused2 +@fif_codegen_avoid DECLPROC unused3 +@fif_codegen_avoid DECLGLOBVAR unused_gv + +Note, that `usedButOptimizedOut()` (a pure function which result is unused) +is currently codegenerated, since it's formally reachable. +This is because optimizing code is a moment of codegen for now (later than marking unused symbols). + +@fif_codegen DECLPROC usedButOptimizedOut +@fif_codegen_avoid usedButOptimizedOut CALLDICT +*/ diff --git a/tolk-tester/tests/s1.tolk b/tolk-tester/tests/s1.tolk new file mode 100644 index 000000000..c7c4f6946 --- /dev/null +++ b/tolk-tester/tests/s1.tolk @@ -0,0 +1,61 @@ +get ascii_slice(): slice { + return"string"; +} + +get raw_slice(): slice { + return "abcdef"s; +} + +get addr_slice(): slice { + return "Ef8zMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzM0vF"a; +} + +get string_hex(): int { + return "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"u; +} + +get fun string_minihash(): int { // 'get' and 'get fun' both possible + return "transfer(slice, int)"h; +} + +get fun string_maxihash(): int { + return "transfer(slice, int)"H; +} + +get fun string_crc32(): int { + return "transfer(slice, int)"c; +} + +@pure +fun newc(): builder +asm "NEWC"; +fun endcs(b: builder): slice +asm "ENDC" "CTOS"; +@pure +fun sdeq(s1: slice, s2: slice): int +asm "SDEQ"; + +fun main() { + var s_ascii: slice = ascii_slice(); + var s_raw: slice = raw_slice(); + var s_addr: slice = addr_slice(); + var i_hex: int = string_hex(); + var i_mini: int = string_minihash(); + var i_maxi: int = string_maxihash(); + var i_crc: int = string_crc32(); + assert(sdeq(s_ascii, newc().storeUint(0x737472696E67, 12 * 4).endcs())) throw 101; + assert(sdeq(s_raw, newc().storeUint(0xABCDEF, 6 * 4).endcs())) throw 102; + assert(sdeq(s_addr, newc().storeUint(4, 3).storeInt(-1, 8) + .storeUint(0x3333333333333333333333333333333333333333333333333333333333333333, 256).endcs()), 103); + assert(i_hex == 0x4142434445464748494A4B4C4D4E4F505152535455565758595A303132333435) throw 104; + assert(i_mini == 0x7a62e8a8) throw 105; + assert(i_maxi == 0x7a62e8a8ebac41bd6de16c65e7be363bc2d2cbc6a0873778dead4795c13db979) throw 106; + assert(i_crc == 2235694568) throw 107; + return 0; +} + +/** +@testcase | 0 | | 0 + +@code_hash 13830542019509784148027107880226447201604257839069192762244575629978154217223 +*/ diff --git a/tolk-tester/tests/self-keyword.tolk b/tolk-tester/tests/self-keyword.tolk new file mode 100644 index 000000000..a339e7d01 --- /dev/null +++ b/tolk-tester/tests/self-keyword.tolk @@ -0,0 +1,213 @@ +fun incChained(mutate self: int): self { + self = self + 1; + return self; +} + +fun incChained2(mutate self: int): self { + return self.incChained(); +} + +fun incChained3(mutate self: int): self { + incChained(mutate self); + return self; +} + +fun incChained4(mutate self: int): self { + self.incChained(); + return self; +} + +@method_id(101) +fun testIncChainedCodegen(x: int) { + return x.incChained().incChained2().incChained3().incChained4(); +} + +@method_id(102) +fun testIncChained() { + var x: int = 10; + incChained(mutate x); + x.incChained(); + x.incChained2(); + x.incChained2().incChained(); + x = x.incChained(); + x = x.incChained2().incChained().incChained2(); + return x.incChained(); +} + +fun incChainedWithMiddleReturn(mutate self: int, maxValue: int): self { + if (self >= maxValue) { + return self; + } + self += 1; + return self; +} + +@method_id(103) +fun testIncChainedWithMiddleReturn(x: int) { + x.incChainedWithMiddleReturn(10).incChainedWithMiddleReturn(10); + x = x.incChainedWithMiddleReturn(10).incChainedWithMiddleReturn(10); + return x.incChainedWithMiddleReturn(10).incChainedWithMiddleReturn(999); +} + +fun incChainedMutatingBoth(mutate self: int, mutate y: int): self { + self += 1; + y += 1; + return self; +} + +global c104: int; + +@method_id(104) +fun testIncChainedMutatingBoth() { + var (x, y) = (0, 0); + c104 = 0; + x.incChainedMutatingBoth(mutate y).incChainedMutatingBoth(mutate y); + incChainedMutatingBoth(mutate x, mutate y); + x = x.incChainedMutatingBoth(mutate c104).incChainedMutatingBoth(mutate c104).incChainedMutatingBoth(mutate y); + return (x, y, c104); +} + +fun incTensorChained(mutate self: (int, int)): self { + val (f, s) = self; + self = (f + 1, s + 1); + return self; +} + +@method_id(105) +fun testIncTensorChained(f: int, s: int) { + var tens = (f, s); + tens.incTensorChained().incTensorChained(); + return tens.incTensorChained().incTensorChained(); +} + +fun incConditionalChainable(mutate self: int, mutate another: int, ifLessThan: int): self { + another += 1; + return self.incChained() < ifLessThan ? self.incChained().incChained() : self; +} + +@method_id(106) +fun testIncConditionalChainable(x: int) { + var y = 0; + x.incConditionalChainable(mutate y, 5).incConditionalChainable(mutate y, 5); + x = x.incConditionalChainable(mutate y, 5).incConditionalChainable(mutate y, 5); + return (x.incConditionalChainable(mutate y, 5), y); +} + +fun checkNotEq(self: int, throwIfEq: int): void { + if (self == throwIfEq) { + throw 100 + throwIfEq; + } +} + +@method_id(107) +fun testNotMutatingSelf(arg: int) { + try { + arg.checkNotEq(100); + arg.checkNotEq(101); + arg.checkNotEq(102); + return 0; + } catch (code) { + return code; + } +} + +global c108: int; + +fun checkNotEqChainable(self: int, throwIfEq: int): self { + c108 += 1; + if (self != throwIfEq) { + return self; + } + throw 100 + throwIfEq; + return self; +} + +@method_id(108) +fun testNotMutatingChainableSelf(arg: int) { + c108 = 0; + try { + arg.checkNotEqChainable(100).checkNotEqChainable(101).checkNotEqChainable(102); + arg = arg.checkNotEqChainable(100).checkNotEqChainable(101).checkNotEqChainable(102); + return (arg, c108); + } catch (code) { + return (code, c108); + } +} + +global onceFailed109: int; + +fun checkNotEqChainableMutateAnother(self: int, throwIfEq: int, mutate toInc: int): self { + if (onceFailed109) { return self; } + toInc += 1; + try { return self.checkNotEqChainable(throwIfEq); } + catch { onceFailed109 = 1; return self; } +} + +global c109: int; + +@method_id(109) +fun testNotMutatingChainableSelfMutateAnother(initial: int) { + val arg = initial; + var x = 0; + c108 = 0; + c109 = 0; + onceFailed109 = 0; + arg.checkNotEqChainableMutateAnother(100, mutate x) + .checkNotEqChainableMutateAnother(101, mutate c109) + .checkNotEqChainableMutateAnother(102, mutate x); + return (arg, c108, c109, x); +} + + +fun main() { } + +/** +@testcase | 101 | 5 | 9 +@testcase | 102 | | 20 +@testcase | 103 | 1 | 7 +@testcase | 103 | 100 | 101 +@testcase | 103 | 8 | 11 +@testcase | 104 | | 6 4 2 +@testcase | 105 | 1 2 | 5 6 +@testcase | 106 | -20 | -5 5 +@testcase | 106 | -1 | 8 5 +@testcase | 106 | 7 | 12 5 +@testcase | 107 | 200 | 0 +@testcase | 107 | 102 | 202 +@testcase | 108 | 200 | 200 6 +@testcase | 108 | 101 | 201 0 +@testcase | 109 | 200 | 200 3 1 2 +@testcase | 109 | 100 | 100 0 0 1 +@testcase | 109 | 102 | 102 2 1 2 + +@fif_codegen +""" + incChained PROC:<{ + // self + INC // self + }> + incChained2 PROC:<{ + // self + incChained CALLDICT // self + }> + incChained3 PROC:<{ + // self + incChained CALLDICT // self + }> + incChained4 PROC:<{ + // self + incChained CALLDICT // self + }> +""" + +@fif_codegen +""" + testIncChainedCodegen PROC:<{ + // x + incChained CALLDICT // x + incChained2 CALLDICT // x + incChained3 CALLDICT // x + incChained4 CALLDICT // x + }> +""" + */ diff --git a/tolk-tester/tests/special-fun-names.tolk b/tolk-tester/tests/special-fun-names.tolk new file mode 100644 index 000000000..8fae6d5db --- /dev/null +++ b/tolk-tester/tests/special-fun-names.tolk @@ -0,0 +1,24 @@ +fun onInternalMessage() { return 0; } +fun onExternalMessage() { return -1; } +fun onRunTickTock() { return -2; } +fun onSplitPrepare() { return -3; } +fun onSplitInstall() { return -4; } + +/** +@experimental_options remove-unused-functions + +@testcase | 0 | | 0 +@testcase | -1 | | -1 +@testcase | -2 | | -2 +@testcase | -3 | | -3 +@testcase | -4 | | -4 + +@fif_codegen +""" + 0 DECLMETHOD onInternalMessage + -1 DECLMETHOD onExternalMessage + -2 DECLMETHOD onRunTickTock + -3 DECLMETHOD onSplitPrepare + -4 DECLMETHOD onSplitInstall +""" + */ diff --git a/tolk-tester/tests/test-math.tolk b/tolk-tester/tests/test-math.tolk new file mode 100644 index 000000000..893035fde --- /dev/null +++ b/tolk-tester/tests/test-math.tolk @@ -0,0 +1,1287 @@ +// this is actually `mathlib.fc` transformed to Tolk + +import "@stdlib/tvm-lowlevel" + +/*--------------- MISSING OPERATIONS AND BUILT-INS ----------------*/ + +/// compute floor(log2(x))+1 +@pure +fun log2_floor_p1(x: int): int + asm "UBITSIZE"; + +@pure +fun mulrshiftr(x: int, y: int, s: int): int + asm "MULRSHIFTR"; + +@pure +fun mulrshiftr256(x: int, y: int): int + asm "256 MULRSHIFTR#"; + +@pure +fun mulrshift256mod(x: int, y: int): (int, int) + asm "256 MULRSHIFT#MOD"; + +@pure +fun mulrshiftr256mod(x: int, y: int): (int, int) + asm "256 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr255mod(x: int, y: int): (int, int) + asm "255 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr248mod(x: int, y: int): (int, int) + asm "248 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr5mod(x: int, y: int): (int, int) + asm "5 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr6mod(x: int, y: int): (int, int) + asm "6 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr7mod(x: int, y: int): (int, int) + asm "7 MULRSHIFTR#MOD"; + +@pure +fun lshift256divr(x: int, y: int): int + asm "256 LSHIFT#DIVR"; + +@pure +fun lshift256divmodr(x: int, y: int): (int, int) + asm "256 LSHIFT#DIVMODR"; + +@pure +fun lshift255divmodr(x: int, y: int): (int, int) + asm "255 LSHIFT#DIVMODR"; + +@pure +fun lshift2divmodr(x: int, y: int): (int, int) + asm "2 LSHIFT#DIVMODR"; + +@pure +fun lshift7divmodr(x: int, y: int): (int, int) + asm "7 LSHIFT#DIVMODR"; + +@pure +fun lshiftdivmodr(x: int, y: int, s: int): (int, int) + asm "LSHIFTDIVMODR"; + +@pure +fun rshiftr256mod(x: int): (int, int) + asm "256 RSHIFTR#MOD"; + +@pure +fun rshiftr248mod(x: int): (int, int) + asm "248 RSHIFTR#MOD"; + +@pure +fun rshiftr4mod(x: int): (int, int) + asm "4 RSHIFTR#MOD"; + +@pure +fun rshift3mod(x: int): (int, int) + asm "3 RSHIFT#MOD"; + +/// computes y - x (Tolk compiler does not try to use this by itself) +@pure +fun sub_rev(x: int, y: int): int + asm "SUBR"; + +@pure +fun nan(): int + asm "PUSHNAN"; + +@pure +fun is_nan(x: int): int + asm "ISNAN"; + +/*----------------------- SQUARE ROOTS ---------------------------*/ + +/// computes sqrt(a*b) exactly rounded to the nearest integer +/// for all 0 <= a, b <= 2^256-1 +/// may be used with b=1 or b=scale of fixed-point numbers +@pure +@inline_ref +fun geom_mean(a: int, b: int): int { + if (!min(a, b)) { + return 0; + } + var s: int = log2_floor_p1(a); // throws out of range error if a < 0 or b < 0 + var t: int = log2_floor_p1(b); + // NB: (a-b)/2+b == (a+b)/2, but without overflow for large a and b + var x: int = (s == t ? (a - b) / 2 + b : 1 << ((s + t) / 2)); + do { + // if always used with b=2^const, may be optimized to "const LSHIFTDIVC#" + // it is important to use `mulDivCeil` here, not `mulDivFloor` or `mulDivRound` + var q: int = (mulDivCeil(a, b, x) - x) / 2; + x += q; + } while (q); + return x; +} + +/// integer square root, computes round(sqrt(a)) for all a>=0. +/// note: `inline` is better than `inline_ref` for such simple functions +@pure +@inline +fun sqrt(a: int): int { + return geom_mean(a, 1); +} + +/// version for fixed248 = fixed-point numbers with scale 2^248 +/// fixed248 sqrt(fixed248 x) +@pure +@inline +fun fixed248_sqrt(x: int): int { + return geom_mean(x, 1 << 248); +} + +/// fixed255 sqrt(fixed255 x) +@pure +@inline +fun fixed255_sqrt(x: int): int { + return geom_mean(x, 1 << 255); +} + +/// fixed248 sqr(fixed248 x); +@pure +@inline +fun fixed248_sqr(x: int): int { + return mulDivRound(x, x, 1 << 248); +} + +/// fixed255 sqr(fixed255 x); +@pure +@inline +fun fixed255_sqr(x: int): int { + return mulDivRound(x, x, 1 << 255); +} + +const fixed248_One: int = (1 << 248); +const fixed255_One: int = (1 << 255); + +/*------------------- USEFUL CONSTANTS -------------------*/ + +/// store huge constants in inline_ref functions for reuse +/// (y,z) where y=round(log(2)*2^256), z=round((log(2)*2^256-y)*2^128) +/// then log(2) = y/2^256 + z/2^384 +@pure +@inline_ref +fun log2_xconst_f256(): (int, int) { + return (80260960185991308862233904206310070533990667611589946606122867505419956976172, -32272921378999278490133606779486332143); +} + +/// (y,z) where Pi = y/2^254 + z/2^382 +@pure +@inline_ref +fun Pi_xconst_f254(): (int, int) { + return (90942894222941581070058735694432465663348344332098107489693037779484723616546, 108051869516004014909778934258921521947); +} + +/// atan(1/16) as fixed260 +@pure +@inline_ref +fun Atan1_16_f260(): int { + return 115641670674223639132965820642403718536242645001775371762318060545014644837101; // true value is ...101.0089... +} + +/// atan(1/8) as fixed259 +@pure +@inline_ref +fun Atan1_8_f259(): int { + return 115194597005316551477397594802136977648153890007566736408151129975021336532841; // correction -0.1687... +} + +/// atan(1/32) as fixed261 +@pure +@inline_ref +fun Atan1_32_f261(): int { + return 115754418570128574501879331591757054405465733718902755858991306434399246026247; // correction 0.395... +} + +/// inline is better than inline_ref for such very small functions +@pure +@inline +fun log2_const_f256(): int { + var (c: int, _) = log2_xconst_f256(); + return c; +} + +@pure +@inline +fun fixed248_log2_const(): int { + return log2_const_f256() ~>> 8; +} + +@pure +@inline +fun Pi_const_f254(): int { + var (c: auto, _) = Pi_xconst_f254(); + return c; +} + +@pure +@inline +fun fixed248_Pi_const(): int { + return Pi_const_f254() ~>> 6; +} + +/*-------------- HYPERBOLIC TANGENT AND EXPONENT ------------------*/ + +/// hyperbolic tangent of small x via n+2 terms of Lambert's continued fraction +/// n=17: good for |x| < log(2)/4 = 0.173 +/// fixed258 tanh_f258(fixed258 x, int n) +@pure +@inline_ref +fun tanh_f258(x: int, n: int): int { + var x2: int = mulDivRound(x, x, 1 << 255); // x^2 as fixed261 + var a: int = (2 * n + 5) << 250; // a=2n+5 as fixed250 + var c = a; + var Two: int = (1 << 251); // 2. as fixed250 + repeat (n) { + a = (c -= Two) + mulDivRound(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (3 << 254) + mulDivRound(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 + // y = x/(1+a') = x - x*a'/(1+a') = x - x*x^2/(a+x^2) where a' = x^2/a + return x - (mulDivRound(x, x2, a + (x2 ~>> 7)) ~>> 7); +} + +/// fixed257 expm1_f257(fixed257 x) +/// computes exp(x)-1 for small x via 19 terms of Lambert's continued fraction for tanh(x/2) +/// good for |x| < log(2)/2 = 0.347 (n=17); consumes ~3500 gas +@pure +@inline_ref +fun expm1_f257(x: int): int { + // (almost) compute tanh(x/2) first; x/2 as fixed258 = x as fixed257 + var x2: int = mulDivRound(x, x, 1 << 255); // x^2 as fixed261 + var Two: int = (1 << 251); // 2. as fixed250 + var a: int = 39 << 250; // a=2n+5 as fixed250 + var c = a; + repeat (17) { + a = (c -= Two) + mulDivRound(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (3 << 254) + mulDivRound(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 + // now tanh(x/2) = x/(1+a') where a'=x^2/a ; apply exp(x)-1=2*tanh(x/2)/(1-tanh(x/2)) + var t: int = (x ~>> 4) - a; // t:=x-a as fixed254 + return x - mulDivRound(x2, t / 2, a + mulrshiftr256(x, t) ~/ 4) ~/ 4; // x - x^2 * (x-a) / (a + x*(x-a)) +} + +/// expm1_f257() may be used to implement specific fixed-point exponentials +/// example: +/// fixed248 exp(fixed248 x) +@pure +@inline_ref +fun fixed248_exp(x: int): int { + var (l2c, l2d) = log2_xconst_f256(); + // divide x by log(2) and convert to fixed257 + // (int q, x) = muldivmodr(x, 256, l2c); // unfortunately, no such built-in + var (q: int, x redef) = lshiftdivmodr(x, l2c, 8); + x = 2 * x - mulDivRound(q, l2d, 1 << 127); + var y: int = expm1_f257(x); + // result is (1 + y) * (2^q) --> ((1 << 257) + y) >> (9 - q) + return (y ~>> (9 - q)) - (-1 << (248 + q)); + // note that (y ~>> (9 - q)) + (1 << (248 + q)) leads to overflow when q=8 +} + +/// compute 2^x in fixed248 +/// fixed248 exp2(fixed248 x) +@pure +@inline_ref +fun fixed248_exp2(x: int): int { + // (int q, x) = divmodr(x, 1 << 248); // no such built-in + var (q: int, x redef) = rshiftr248mod(x); + x = mulDivRound(x, log2_const_f256(), 1 << 247); + var y: int = expm1_f257(x); + return (y ~>> (9 - q)) - (-1 << (248 + q)); +} + +/*-------------------- TRIGONOMETRIC FUNCTIONS ----------------------*/ + +/// fixed260 tan(fixed260 x); +/// computes tan(x) for small |x|> 10)) ~>> 9); +} + +/// fixed260 tan(fixed260 x); +@pure +@inline_ref +fun tan_f260(x: int): int { + return tan_f260_inlined(x); +} + +/// fixed258 tan(fixed258 x); +/// computes tan(x) for small |x|> 6)) ~>> 5); +} + +/// fixed258 tan(fixed258 x); +@pure +@inline_ref +fun tan_f258(x: int): int { + return tan_f258_inlined(x); +} + +/// (fixed259, fixed263) sincosm1(fixed259 x) +/// computes (sin(x), 1-cos(x)) for small |x|<2*atan(1/16) +@pure +@inline +fun sincosm1_f259_inlined(x: int): (int, int) { + var t: int = tan_f260_inlined(x); // t=tan(x/2) as fixed260 + var tt: int = mulrshiftr256(t, t); // t^2 as fixed264 + var y: int = tt ~/ 512 + (1 << 255); // 1+t^2 as fixed255 + // 2*t/(1+t^2) as fixed259 and 2*t^2/(1+t^2) as fixed263 + // return (mulDivRound(t, 1 << 255, y), mulDivRound(tt, 1 << 255, y)); + return (t - mulDivRound(t / 2, tt, y) ~/ 256, tt - mulDivRound(tt / 2, tt, y) ~/ 256); +} + +@pure +@inline_ref +fun sincosm1_f259(x: int): (int, int) { + return sincosm1_f259_inlined(x); +} + +/// computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small +/// this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) +/// (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) +@pure +@inline_ref +fun sincosn_f256(x: int, xe: int): (int, int) { + // var (q, x1) = muldivmodr(x, 8, Atan1_8_f259()); // no muldivmodr() builtin + var (q, x1) = lshift2divmodr(abs(x), Atan1_8_f259()); // reduce mod theta where theta=2*atan(1/8) + var (si, co) = sincosm1_f259(x1 * 2 + xe); + var (a, b, c) = (-1, 0, 1); + repeat (q) { + // (a+b*I) *= (8+I)^2 = 63+16*I + (a, b, c) = (63 * a - 16 * b, 16 * a + 63 * b, 65 * c); + } + // now a/c = cos(q*theta), b/c = sin(q*theta) exactly(!) + // compute (a+b*I)*(1-co+si*I)/c + // (b, a) = (lshift256divr(b, c), lshift256divr(a, c)); + var (b redef, br: int) = lshift256divmodr(b, c); br = mulDivRound(br, 128, c); + var (a redef, ar: int) = lshift256divmodr(a, c); ar = mulDivRound(ar, 128, c); + return (sign(x) * (((mulrshiftr256(b, co) - br) ~/ 16 - mulrshiftr256(a, si)) ~/ 8 - b), + a - ((mulrshiftr256(a, co) - ar) ~/ 16 + mulrshiftr256(b, si)) ~/ 8); +} + +/// compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 +/// (fixed256, fixed257) sincosm1_f256(fixed256 x); +/// slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter +@pure +@inline_ref +fun sincosm1_f256(x: int): (int, int) { + var (si, co) = sincosm1_f259_inlined(x); // compute (sin,1-cos)(x/8) in (fixed259,fixed263) + var r: int = 7; + repeat (r / 2) { + // 1-cos(2*x) = 2*sin(x)^2, sin(2*x) = 2*sin(x)*cos(x) + (co, si) = (mulrshiftr256(si, si), si - (mulrshiftr256(si, co) ~>> r)); + r -= 2; + } + return (si, co); +} + +/// compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 +/// (int, int) tan_aux(fixed256 x); +@pure +@inline_ref +fun tan_aux_f256(x: int): (int, int) { + var t: int = tan_f258_inlined(x); // t=tan(x/4) as fixed258 + // t:=2*t/(1-t^2)=2*(t-t^3/(t^2-1)) + var tt: int = mulrshiftr256(t, t); // t^2 as fixed260 + t = mulDivRound(t, tt, tt ~/ 16 + (-1 << 256)) ~/ 16 - t; // now t=-tan(x/2) as fixed259 + return (t, mulrshiftr256(t, t) ~/ 4 + (-1 << 256)); // return (2*t, t^2-1) as fixed256 +} + +/// sincosm1_f256() and sincosn_f256() may be used to implement trigonometric functions for different fixed-point types +/// example: +/// (fixed248, fixed248) sincos(fixed248 x); +@pure +@inline_ref +fun fixed248_sincos(x: int): (int, int) { + var (Pic, Pid) = Pi_xconst_f254(); + // (int q, x) = muldivmodr(x, 128, Pic); // no muldivmodr() builtin + var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 + x = 2 * x - mulDivRound(q, Pid, 1 << 127); + var (si: int, co: int) = sincosm1_f256(x); // doesn't make sense to use more accurate sincosn_f256() + co = (1 << 248) - (co ~>> 9); + si = si ~>> 8; + repeat (q & 3) { + (si, co) = (co, -si); + } + return (si, co); +} + +/// fixed248 sin(fixed248 x); +/// inline is better than inline_ref for such simple functions +@pure +@inline +fun fixed248_sin(x: int): int { + var (si: int, _) = fixed248_sincos(x); + return si; +} + +/// fixed248 cos(fixed248 x); +@pure +@inline +fun fixed248_cos(x: int): int { + var (_, co: int) = fixed248_sincos(x); + return co; +} + +/// similarly, tan_aux_f256() may be used to implement tan() and cot() for specific fixed-point formats +/// fixed248 tan(fixed248 x); +/// not very accurate when |tan(x)| is very large (difficult to do better without floating-point numbers) +/// however, the relative accuracy is approximately 2^-247 in all cases, which is good enough for arguments given up to 2^-249 +@pure +@inline_ref +fun fixed248_tan(x: int): int { + var (Pic, Pid) = Pi_xconst_f254(); + // (int q, x) = muldivmodr(x, 128, Pic); // no muldivmodr() builtin + var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 + x = 2 * x - mulDivRound(q, Pid, 1 << 127); + var (a, b) = tan_aux_f256(x); // now a/b = tan(x') + if (q & 1) { + (a, b) = (b, -a); + } + return mulDivRound(a, 1 << 248, b); // either -b/a or a/b as fixed248 +} + +/// fixed248 cot(fixed248 x); +@pure +@inline_ref +fun fixed248_cot(x: int): int { + var (Pic, Pid) = Pi_xconst_f254(); + var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 + x = 2 * x - mulDivRound(q, Pid, 1 << 127); + var (b, a) = tan_aux_f256(x); // now b/a = tan(x') + if (q & 1) { + (a, b) = (b, -a); + } + return mulDivRound(a, 1 << 248, b); // either -b/a or a/b as fixed248 +} + +/*---------------- INVERSE HYPERBOLIC TANGENT AND LOGARITHMS ----------------*/ + +/// inverse hyperbolic tangent of small x, evaluated by means of n terms of the continued fraction +/// valid for |x| < 2^-2.5 ~ 0.18 if n=37 (slightly less accurate with n=36) +/// |x| < 1/8 if n=32; |x| < 2^-3.5 if n=28; |x| < 1/16 if n=25 +/// |x| < 2^-4.5 if n=23; |x| < 1/32 if n=21; |x| < 1/64 if n=18 +/// fixed258 atanh(fixed258 x); +@pure +@inline_ref +fun atanh_f258(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed260 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + var t: int = One - mulDivRound(x2, 1 << 248, a); // t := 1 - x^2 / a + var n1: int = n - 1; + a = mulDivRound(t, n, n1) + One; + n = n1; + } + // x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + // int d = mulDivRound(x2, 1 << 255, a - (x2 ~>> 6)); // d/(1-d) = x^2/(a-x^2) as fixed261 + // return x + (mulrshiftr256(x, d) ~>> 5); + return x + mulDivRound(x, x2 / 2, a - x2 ~/ 64) ~/ 32; +} + +/// number of terms n should be chosen as for atanh_f258() +/// fixed261 atanh(fixed261 x); +@pure +@inline +fun atanh_f261_inlined(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed266 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + var t: int = One - mulDivRound(x2, 1 << 242, a); // t := 1 - x^2 / a + var n1: int = n - 1; + a = mulDivRound(t, n, n1) + One; + n = n1; + } + // x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + // int d = mulDivRound(x2, 1 << 255, a - (x2 ~>> 12)); // d/(1-d) = x^2/(a-x^2) as fixed267 + // return x + (mulrshiftr256(x, d) ~>> 11); + return x + mulDivRound(x, x2, a - x2 ~/ 4096) ~/ 4096; +} + +/// fixed261 atanh(fixed261 x); +@pure +@inline_ref +fun atanh_f261(x: int, n: int): int { + return atanh_f261_inlined(x, n); +} + +/// returns (y, s) such that log(x) = y/2^257 + s*log(2) for positive integer x +/// (fixed257, int) log_aux(int x) +@pure +@inline_ref +fun log_aux_f257(x: int): (int, int) { + var s: int = log2_floor_p1(x); + x <<= 256 - s; + var t: int = -1 << 256; + if ((x >> 249) <= 90) { + t >>= 1; + s -= 1; + } + x += t; + var `2x`: int = 2 * x; + var y: int = lshift256divr(`2x`, (x >> 1) - t); + // y = `2x` - (mulrshiftr256(2x, y) ~>> 2); // this line could improve precision on very rare occasions + return (atanh_f258(y, 36), s); +} + +/// computes 33^m for small m +@pure +@inline +fun pow33(m: int): int { + var t: int = 1; + repeat (m) { + t *= 33; + } + return t; +} + +/// computes 33^m for small 0<=m<=22 +/// slightly faster than pow33() +@pure +@inline +fun pow33b(m: int): int { + var (mh: int, ml: int) = divMod(m, 5); + var t: int = 1; + repeat (ml) { + t *= 33; + } + repeat (mh) { + t *= 33 * 33 * 33 * 33 * 33; + } + return t; +} + +/// returns (s, q, y) such that log(x) = s*log(2) + q*log(33/32) + y/2^260 for positive integer x +/// (int, int, fixed260) log_auxx_f260(int x); +@pure +@inline_ref +fun log_auxx_f260(x: int): (int, int, int) { + var s: int = log2_floor_p1(x) - 1; + x <<= 255 - s; // rescale to 1 <= x < 2 as fixed255 + var t: int = 2873 << 244; // ~ (33/32)^11 ~ sqrt(2) as fixed255 + var x1: int = (x - t) >> 1; + var q: int = mulDivRound(x1, 65, x1 + t) + 11; // crude approximation to round(log(x)/log(33/32)) + // t = 1; repeat (q) { t *= 33; } // t:=33^q, 0<=q<=22 + t = pow33b(q); + t <<= (51 - q) * 5; // t:=(33/32)^q as fixed255, nearest power of 33/32 to x + x -= t; + var y: int = lshift256divr(x << 4, (x >> 1) + t); // y = (x-t)/(x+t) as fixed261 + y = atanh_f261(y, 18); // atanh((x-t)/(x+t)) as fixed261, or log(x/t) as fixed260 + return (s, q, y); +} + +/// returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x +/// this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +/// may be used to implement specific fixed-point instances of log() and log2() +/// (fixed256, int) log_aux_f256(int x); +@pure +@inline_ref +fun log_aux_f256(x: int): (int, int) { + var (s, q, y) = log_auxx_f260(x); + var (yh, yl) = rshiftr4mod(y); // y ~/% 16 , but Tolk does not optimize this to RSHIFTR#MOD + // int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; // log(33/32) as fixed256 + // int Log33_32_l = -3769; // log(33/32) = Log33_32 / 2^256 + Log33_32_l / 2^269 + yh += (yl * 512 + q * -3769) ~>> 13; // compensation, may be removed if slightly worse accuracy is acceptable + var Log33_32: int = 3563114646320977386603103333812068872452913448227778071188132859183498739150; // log(33/32) as fixed256 + return (yh + q * Log33_32, s); +} + +/// returns (y, s) such that log2(x) = y/2^256 + s for positive integer x +/// this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +/// may be used to implement specific fixed-point instances of log() and log2() +/// (fixed256, int) log2_aux_f256(int x); +@pure +@inline_ref +fun log2_aux_f256(x: int): (int, int) { + var (s, q, y) = log_auxx_f260(x); + y = lshift256divr(y, log2_const_f256()) ~>> 4; // y/log(2) as fixed256 + var Log33_32: int = 5140487830366106860412008603913034462883915832139695448455767612111363481357; // log_2(33/32) as fixed256 + // Log33_32/2^256 happens to be a very precise approximation to log_2(33/32), no compensation required + return (y + q * Log33_32, s); +} + + +/// fixed248 log(fixed248 x) +@pure +@inline_ref +fun fixed248_log(x: int): int { + var (y, s) = log_aux_f256(x); + return mulDivRound(s - 248, log2_const_f256(), 1 << 8) + (y ~>> 8); + // return mulDivRound(s - 248, 80260960185991308862233904206310070533990667611589946606122867505419956976172, 1 << 8) + (y ~>> 8); +} + +/// fixed248 log2(fixed248 x) +@pure +@inline +fun fixed248_log2(x: int): int { + var (y, s) = log2_aux_f256(x); + return ((s - 248) << 248) + (y ~>> 8); +} + +/// computes x^y as exp(y*log(x)), x >= 0 +/// fixed248 pow(fixed248 x, fixed248 y); +@pure +@inline_ref +fun fixed248_pow(x: int, y: int): int { + if (!y) { + return 1 << 248; // x^0 = 1 + } + if (x <= 0) { + var bad: int = (x | y) < 0; + return 0 >> bad; // 0^y = 0 if x=0 and y>=0; "out of range" exception otherwise + } + var (l, s) = log2_aux_f256(x); + s -= 248; // log_2(x) = s+l, l is fixed256, 0<=l<1 + // compute (s+l)*y = q+ll + var (q1, r1) = mulrshiftr248mod(s, y); // muldivmodr(s, y, 1 << 248) + var (q2, r2) = mulrshift256mod(l, y); + r2 >>= 247; + var (q3, r3) = rshiftr248mod(q2); // divmodr(q2, 1 << 248); + var (q, ll) = rshiftr248mod(r1 + r3); + ll = 512 * ll + r2; + q += q1 + q3; + // now log_2(x^y) = y*log_2(x) = q + ll, ss integer, ll fixed257, -1/2<=ll<1/2 + var sq: int = q + 248; + if (sq <= 0) { + return -(sq == 0); // underflow + } + y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); + return (y ~>> (9 - q)) - (-1 << sq); +} + +/*-------------------- INVERSE TRIGONOMETRIC FUNCTIONS ------------------*/ + +/// number of terms n should be chosen as for atanh_f258() +/// fixed259 atan(fixed259 x); +@pure +@inline_ref +fun atan_f259(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed262 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + var t: int = One + mulDivRound(x2, 1 << 246, a); // t := 1 + x^2 / a + var n1: int = n - 1; + a = mulDivRound(t, n, n1) + One; + n = n1; + } + // x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - mulDivRound(x, x2, a + x2 ~/ 256) ~/ 256; +} + +/// number of terms n should be chosen as for atanh_f261() +/// fixed261 atan(fixed261 x); +@pure +@inline +fun atan_f261_inlined(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed266 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + var t: int = One + mulDivRound(x2, 1 << 242, a); // t := 1 + x^2 / a + var n1: int = n - 1; + a = mulDivRound(t, n, n1) + One; + n = n1; + } + // x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - mulDivRound(x, x2, a + x2 ~/ 4096) ~/ 4096; +} + +/// fixed261 atan(fixed261 x); +@pure +@inline_ref +fun atan_f261(x: int, n: int): int { + return atan_f261_inlined(x, n); +} + +/// computes (q,a,b) such that q is approximately atan(x)/atan(1/32) and a+b*I=(1+I/32)^q as fixed255 +/// then b/a=atan(q*atan(1/32)) exactly, and (a,b) is almost a unit vector pointing in the direction of (1,x) +/// must have |x|<1.1, x is fixed24 +/// (int, fixed255, fixed255) atan_aux_prereduce(fixed24 x); +@pure +@inline_ref +fun atan_aux_prereduce(x: int): (int, int, int) { + var xu: int = abs(x); + var tc: int = 7214596; // tan(13*theta) as fixed24 where theta=atan(1/32) + var t1: int = mulDivRound(xu - tc, 1 << 88, xu * tc + (1 << 48)); // tan(x') as fixed64 where x'=atan(x)-13*theta + // t1/(3+t1^2) * 3073/32 = x'/3 * 3072/32 = x' / (96/3072) = x' / theta + var q: int = mulDivRound(t1 * 3073, 1 << 59, t1 * t1 + (3 << 128)) + 13; // approximately round(atan(x)/theta), 0<=q<=25 + var (pa, pb) = (33226912, 5232641); // (32+I)^5 + var (qh, ql) = divMod(q, 5); + var (a, b) = (1 << (5 * (51 - q)), 0); // (1/32^q, 0) as fixed255 + repeat (ql) { + // a+b*I *= 32+I + b.stackMoveToTop(); + (a, b) = (sub_rev(b, 32 * a), a + 32 * b); // same as (32 * a - b, 32 * b + a), but more efficient + } + repeat (qh) { + // a+b*I *= (32+I)^5 = pa + pb*I + (a, b) = (a * pa - b * pb, a * pb + b * pa); + } + var xs: int = sign(x); + return (xs * q, a, xs * b); +} + +/// compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +/// this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas +/// this is sufficient for most purposes +/// (int, fixed261) atan_aux(fixed256 x) +@pure +@inline_ref +fun atan_aux_f256(x: int): (int, int) { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); // convert x to fixed24 + // now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + // compute y = u/v = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + var (u, ul) = mulrshiftr256mod(a, x); + u = (ul ~>> 250) + ((u - b) << 6); // |u| < 1/32, convert fixed255 -> fixed261 + var v: int = a + mulrshiftr256(b, x); // v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + var y: int = mulDivRound(u, 1 << 255, v); // y = u/v as fixed261 + var z: int = atan_f261_inlined(y, 18); // z = atan(x)-q*atan(1/32) + return (q, z); +} + +/// compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +/// this function is very accurate (error < 2 ulp), but it consumes >7k gas +/// in most cases, faster function atan_aux_f256() should be used +/// (int, fixed261) atan_auxx(fixed256 x) +@pure +@inline_ref +fun atan_auxx_f256(x: int): (int, int) { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); // convert x to fixed24 + // now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + // compute y = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + // use sort of double precision arithmetic for this + var (u, ul) = mulrshiftr256mod(a, x); + ul /= 2; + u -= b; // |u| < 1/32 as fixed255 + var (v, vl) = mulrshiftr256mod(b, x); + vl /= 2; + v += a; // v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + // y = (u + ul*eps) / (v + vl*eps) = u/v + (ul - vl * u/v)/v * eps where eps=1/2^255 + var (y, r) = lshift255divmodr(u, v); // y = u/v as fixed255 + var yl: int = mulDivRound(ul + r, 1 << 255, v) - mulDivRound(vl, y, v); // y/2^255 + yl/2^510 represent u/v + y = (yl ~>> 249) + (y << 6); // convert y to fixed261 + var z: int = atan_f261_inlined(y, 18); // z = atan(x)-q*atan(1/32) + return (q, z); +} + +/// consumes ~ 8k gas +/// fixed255 atan(fixed255 x); +@pure +@inline_ref +fun atan_f255(x: int): int { + var s: int = (x ~>> 256); + x.stackMoveToTop(); + if (s) { + x = lshift256divr(-1 << 255, x); // x:=-1/x as fixed256 + } else { + x *= 2; // convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + // now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + var (Pi_h, Pi_l) = Pi_xconst_f254(); // Pi/2 as fixed255 + fixed383 + var (qh, ql) = mulrshiftr6mod(q, Atan1_32_f261()); + return qh + s * Pi_h + (z + ql + mulDivRound(s, Pi_l, 1 << 122)) ~/ 64; +} + +/// computes atan(x) for -1 <= x < 1 only +/// fixed256 atan_small(fixed256 x); +@pure +@inline_ref +fun atan_f256_small(x: int): int { + var (q, z) = atan_aux_f256(x); + // now atan(x) = z + q*atan(1/32), z is fixed261 + var (qh, ql) = mulrshiftr5mod(q, Atan1_32_f261()); + return qh + (z + ql) ~/ 32; +} + +/// fixed255 asin(fixed255 x); +@pure +@inline_ref +fun asin_f255(x: int): int { + var a: int = fixed255_One - fixed255_sqr(x); // a:=1-x^2 + if (!a) { + return sign(x) * Pi_const_f254(); // Pi/2 or -Pi/2 + } + var y: int = fixed255_sqrt(a); // sqrt(1-x^2) + var t: int = -lshift256divr(x, (-1 << 255) - y); // t = x/(1+sqrt(1-x^2)) avoiding overflow + return atan_f256_small(t); // asin(x)=2*atan(t) +} + +/// fixed254 acos(fixed255 x); +@pure +@inline_ref +fun acos_f255(x: int): int { + var Pi: int = Pi_const_f254(); + if (x == (-1 << 255)) { + return Pi; // acos(-1) = Pi + } + Pi /= 2; + var y: int = fixed255_sqrt(fixed255_One - fixed255_sqr(x)); // sqrt(1-x^2) + var t: int = lshift256divr(x, (-1 << 255) - y); // t = -x/(1+sqrt(1-x^2)) avoiding overflow + return Pi + atan_f256_small(t) ~/ 2; // acos(x)=Pi/2 + 2*atan(t) +} + +/// consumes ~ 10k gas +/// fixed248 asin(fixed248 x) +@pure +@inline +fun fixed248_asin(x: int): int { + return asin_f255(x << 7) ~>> 7; +} + +/// consumes ~ 10k gas +/// fixed248 acos(fixed248 x) +@pure +@inline +fun fixed248_acos(x: int): int { + return acos_f255(x << 7) ~>> 6; +} + +/// consumes ~ 7500 gas +/// fixed248 atan(fixed248 x); +@pure +@inline_ref +fun fixed248_atan(x: int): int { + var s: int = (x ~>> 249); + x.stackMoveToTop(); + if (s) { + s = sign(s); + x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 + } else { + x <<= 8; // convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + // now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + return (z ~/ 64 + s * Pi_const_f254() + mulDivRound(q, Atan1_32_f261(), 64)) ~/ 128; // compute in fixed255, then convert +} + +/// fixed248 acot(fixed248 x); +@pure +@inline_ref +fun fixed248_acot(x: int): int { + var s: int = (x ~>> 249); + x.stackMoveToTop(); + if (s) { + x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 + s = 0; + } else { + x <<= 8; // convert to fixed256 + s = sign(x); + } + var (q, z) = atan_aux_f256(x); + // now acot(x) = - z - q*atan(1/32) + s*(Pi/2), z is fixed261 + return (s * Pi_const_f254() - z ~/ 64 - mulDivRound(q, Atan1_32_f261(), 64)) ~/ 128; // compute in fixed255, then convert +} + +/*-------------------- PSEUDO-RANDOM NUMBERS ------------------*/ + +/// random number with standard normal distribution N(0,1) +/// generated by Kinderman--Monahan ratio method modified by J.Leva +/// spends ~ 2k..3k gas on average +/// fixed252 nrand(); +@inline_ref +fun nrand_f252(): int { + var (x, s, t, A, B, r0) = (nan(), 29483 << 236, -3167 << 239, 12845, 16693, 9043); + // 4/sqrt(e*Pi) = 1.369 loop iterations on average + do { + var (u, v) = (random() / 16 + 1, mulDivRound(random() - (1 << 255), 7027, 1 << 16)); // fixed252; 7027=ceil(sqrt(8/e)*2^12) + var va: int = abs(v); + var (u1, v1) = (u - s, va - t); // (u - 29483/2^16, abs(v) + 3167/2^13) as fixed252 + // Q := u1^2 + v1 * (A*v1 - B*u1) as fixed252 where A=12845/2^16, B=16693/2^16 + var Q: int = mulDivRound(u1, u1, 1 << 252) + mulDivRound(v1, mulDivRound(v1, A, 1 << 16) - mulDivRound(u1, B, 1 << 16), 1 << 252); + // must have 9043 / 2^15 < Q < 9125 / 2^15, otherwise accept if smaller, reject if larger + var Qd: int = (Q >> 237) - r0; + if ((Qd < 9125 - 9043) & (va / u < 16)) { + x = mulDivRound(v, 1 << 252, u); // x:=v/u as fixed252; reject immediately if |v/u| >= 16 + if (Qd >= 0) { + // immediately accept if Qd < 0 + // rarely taken branch - 0.012 times per call on average + // check condition v^2 < -4*u^2*log(u), or equivalent condition u < exp(-x^2/4) for x=v/u + var xx: int = mulrshiftr256(x, x) ~/ 4; // x^2/4 as fixed248 + var ex: int = fixed248_exp(-xx) * 16; // exp(-x^2/4) as fixed252 + if (u > ex) { + x = nan(); // condition false, reject + } + } + } + } while (!(~ is_nan(x))); + return x; +} + +/// generates a random number approximately distributed according to the standard normal distribution +/// much faster than nrand_f252(), should be suitable for most purposes when only several random numbers are needed +/// fixed252 nrand_fast(); +@inline_ref +fun nrand_fast_f252(): int { + var t: int = -3 << 253; // -6. as fixed252 + repeat (12) { + t += random() / 16; // add together 12 uniformly random numbers + } + return t; +} + +/// random number uniformly distributed in [0..1) +/// fixed248 random(); +@inline +fun fixed248_random(): int { + return random() >> 8; +} + +/// random number with standard normal distribution +/// fixed248 nrand(); +@inline +fun fixed248_nrand(): int { + return nrand_f252() ~>> 4; +} + +/// generates a random number approximately distributed according to the standard normal distribution +/// fixed248 nrand_fast(); +@inline +fun fixed248_nrand_fast(): int { + return nrand_fast_f252() ~>> 4; +} + +@pure +fun tset(mutate self: tuple, idx: int, value: X): void + asm(self value idx) "SETINDEXVAR"; + +// computes 1-acos(x)/Pi by a very simple, extremely slow (~70k gas) and imprecise method +// fixed256 acos_prepare_slow(fixed255 x); +@inline +fun acos_prepare_slow_f255(x: int): int { + x -= (x == 0); + var t: int = 1; + repeat (255) { + t = t * sign(x) * 2 + 1; // decode Gray code (sign(x_0), sign(x_1), ...) + x = (-1 << 255) - mulDivRound(x, - x, 1 << 254); // iterate x := 2*x^2 - 1 = cos(2*acos(x)) + } + return abs(t); +} + +// extremely slow (~70k gas) and somewhat imprecise (very imprecise when x is small), for testing only +// fixed254 acos_slow(fixed255 x); +@inline_ref +fun acos_slow_f255(x: int): int { + var t: int = acos_prepare_slow_f255(x); + return - mulrshiftr256(t + (-1<<256), Pi_const_f254()); +} + +// fixed255 asin_slow(fixed255 x); +@inline_ref +fun asin_slow_f255(x: int): int { + var t: int = acos_prepare_slow_f255(abs(x)) % (1 << 255); + return mulDivRound(t, Pi_const_f254(), 1 << 255) * sign(x); +} + +@inline_ref +fun test_nrand(n: int): tuple { + var t: tuple = createEmptyTuple(); + repeat (255) { + t.tuplePush(0); + } + repeat (n) { + var x: int = fixed248_nrand(); + var bucket: int = (abs(x) >> 243); // 255 buckets starting from x=0, each 1/32 wide + t.tset(bucket, t.tupleAt(bucket) + 1); + } + return t; +} + +@method_id(10000) +fun geom_mean_test(x: int, y: int): int { + return geom_mean(x, y); +} +@method_id(10001) +fun tan_f260_test(x: int): int { + return tan_f260(x); +} +@method_id(10002) +fun sincosm1_f259_test(x: int): (int, int) { + return sincosm1_f259(x); +} +@method_id(10003) +fun sincosn_f256_test(x: int, y: int): (int, int) { + return sincosn_f256(x, y); +} +@method_id(10004) +fun sincosm1_f256_test(x: int): (int, int) { + return sincosm1_f256(x); +} +@method_id(10005) +fun tan_aux_f256_test(x: int): (int, int) { + return tan_aux_f256(x); +} +@method_id(10006) +fun fixed248_tan_test(x: int): int { + return fixed248_tan(x); +} +/* + (int) atanh_alt_f258_test(x) method_id(10007) { + return atanh_alt_f258(x); + } +*/ +@method_id(10008) +fun atanh_f258_test(x:int, y:int): int { + return atanh_f258(x, y); +} +@method_id(10009) +fun atanh_f261_test(x:int, y:int): int { + return atanh_f261(x, y); +} + +@method_id(10010) +fun log2_aux_f256_test(x:int): (int, int) { + return log2_aux_f256(x); +} +@method_id(10011) +fun log_aux_f256_test(x:int): (int, int) { + return log_aux_f256(x); +} +@method_id(10012) +fun fixed248_pow_test(x:int, y:int): int { + return fixed248_pow(x, y); +} +@method_id(10013) +fun exp_log_div(x:int, y:int): int { + return fixed248_exp(fixed248_log(x << 248) ~/ y); +} +@method_id(10014) +fun fixed248_log_test(x:int): int { + return fixed248_log(x); +} +@method_id(10015) +fun log_aux_f257_test(x:int): (int,int) { + return log_aux_f257(x); +} +@method_id(10016) +fun fixed248_sincos_test(x:int): (int,int) { + return fixed248_sincos(x); +} +@method_id(10017) +fun fixed248_exp_test(x:int): int { + return fixed248_exp(x); +} +@method_id(10018) +fun fixed248_exp2_test(x:int): int { + return fixed248_exp2(x); +} +@method_id(10019) +fun expm1_f257_test(x:int): int { + return expm1_f257(x); +} +@method_id(10020) +fun atan_f255_test(x:int): int { + return atan_f255(x); +} +@method_id(10021) +fun atan_f259_test(x:int, n:int): int { + return atan_f259(x, n); +} +@method_id(10022) +fun atan_aux_f256_test(x:int): (int, int) { + return atan_aux_f256(x); +} +@method_id(10023) +fun asin_f255_test(x:int): int { + return asin_f255(x); +} +@method_id(10024) +fun asin_slow_f255_test(x:int): int { + return asin_slow_f255(x); +} +@method_id(10025) +fun acos_f255_test(x:int): int { + return acos_f255(x); +} +@method_id(10026) +fun acos_slow_f255_test(x:int): int { + return acos_slow_f255(x); +} +@method_id(10027) +fun fixed248_atan_test(x:int): int { + return fixed248_atan(x); +} +@method_id(10028) +fun fixed248_acot_test(x:int): int { + return fixed248_acot(x); +} + +fun main() { + var One: int = 1; + // repeat(76 / 4) { One *= 10000; } + var sqrt2: int = geom_mean(One, 2 * One); + var sqrt3: int = geom_mean(One, 3 * One); + // return geom_mean(-1 - (-1 << 256), -1 - (-1 << 256)); + // return geom_mean(-1 - (-1 << 256), -2 - (-1 << 256)); + // return geom_mean(-1 - (-1 << 256), 1 << 255); + // return (sqrt2, geom_mean(sqrt2, One)); // (sqrt(2), 2^(1/4)) + // return (sqrt3, geom_mean(sqrt3, One)); // (sqrt(3), 3^(1/4)) + // return geom_mean(3 << 254, 1 << 254); + // return geom_mean(3, 5); + // return tan_f260(115641670674223639132965820642403718536242645001775371762318060545014644837101 - 1); + // return tan_f260(15 << 252); // tan(15/256) * 2^260 + // return sincosm1_f259(1 << 255); // (sin,1-cos)(1/16) * 2^259 + // return sincosm1_f259(115641670674223639132965820642403718536242645001775371762318060545014644837101 - 1); + // return sincosm1_f256((1 << 255) - 1 + (1 << 255)); // (sin,1-cos)(1-2^(-256)) + // return sincosm1_f256(Pi_const_f254()); // (sin,1-cos)(Pi/4) + // return sincosn_f256(Pi_const_f254(), 0); // (sin,-cos)(Pi/4) + // return sincosn_f256((1 << 255) + 1, 0); // (sin,-cos)(1/2+1/2^256) + // return sincosn_f256(1 << 254, 0); + // return sincosn_f256(stackMoveToTop(15) << 252, 0); // (sin,-cos)(15/16) + // return sincosm1_f256(stackMoveToTop(15) << 252); // (sin,1-cos)(15/16) + // return sincosn_f256(60628596148627720713372490462954977108898896221398738326462025186323149077698, 0); // (sin,-cos)(Pi/6) + // return sincosm1_f256(60628596148627720713372490462954977108898896221398738326462025186323149077698); // (sin,1-cos)(Pi/6) + // return tan_aux_f256(1899 << 245); // (p,q) such that p/q=tan(1899/2048) + // return fixed248_tan(11 << 248); // tan(11) + // return atanh_alt_f258(1 << 252); // atanh(1/64) * 2^258 + // return atanh_f258(1 << 252, 18); // atanh(1/64) * 2^258 + // return atanh_f261(mulDivRound(64, 1 << 255, 55), 18); // atanh(1/55) * 2^261 + // return log2_aux_f256(1 << 255); + // return log2_aux_f256(-1 - (-1 << 256)); // log2(2-1/2^255))*2^256 ~ 2^256 - 1.43 + // return log_aux_f256(-1 - (-1 << 256)); + // return log_aux_f256(3); // log(3/2)*2^256 + // return fixed248_pow(3 << 248, 3 << 248); // 3^3 + // return fixed248_exp(fixed248_log(5 << 248) ~/ 7); // exp(log(5)/7) = 5^(1/7) + // return fixed248_log(Pi_const_f254() ~>> 6); // log(Pi) + // return atanh_alt_f258(1 << 255); // atanh(1/8) * 2^258 + // return atanh_f258(1 << 255, 37); // atanh(1/8) * 2^258 + // return atanh_f258(81877371507464127617551201542979628307507432471243237061821853600756754782485, 36); // atanh(sqrt(2)/8) * 2^258 + // return log_aux_f257(Pi_const_f254()); // log(Pi/4) + // return log_aux_f257(3 << 254); // log(3) + // return atanh_alt_f258(81877371507464127617551201542979628307507432471243237061821853600756754782485); // atanh(sqrt(2)/8) * 2^258 + // return fixed248_sincos(Pi_const_f254() ~/ (64 * 3)); // (sin,cos)(Pi/3) + // return fixed248_exp(3 << 248); // exp(3)*2^248 + // return fixed248_exp2((1 << 248) ~/ 5); // 2^(1/5)*2^248 + // return fixed248_pow(3 << 248, -3 << 247); // 3^(-1.5) + // return fixed248_pow(10 << 248, -70 << 248); // 10^(-70) + // return fixed248_pow(fixed248_Pi_const(), stackMoveToTop(3) << 248); // Pi^3 ~ 31.006, computed more precisely + // return fixed248_pow(fixed248_Pi_const(), fixed248_Pi_const()); // Pi^Pi, more precisely + // return fixed248_exp(fixed248_log(fixed248_Pi_const()) * 3); // Pi^3 ~ 31.006 + // return fixed248_exp(mulDivRound(fixed248_log(fixed248_Pi_const()), fixed248_Pi_const(), 1 << 248)); // Pi^Pi + // return fixed248_sin(fixed248_log(fixed248_exp(fixed248_Pi_const()))); // sin(log(e^Pi)) + // return expm1_f257(1 << 255); // (exp(1/4)-1)*2^256 + // return expm1_f257(-1 << 256); // (exp(-1/2)-1)*2^256 (argument out of range, will overflow) + // return expm1_f257(log2_const_f256()); // (exp(log(2)/2)-1)*2^256 + // return expm1_f257(- log2_const_f256()); // (exp(-log(2)/2)-1)*2^256 + // return tanh_f258(log2_const_f256(), 17); // tanh(log(2)/4)*2^258 + // return atan_f255(0xa0 << 247); + // return atan_f259(1 << 255, 26); // atan(1/16) + // return atan_f259(stackMoveToTop(2273) << 244, 26); // atan(2273/2^15) + // return atan_aux_f256(0xa0 << 248); + // return atan_aux_f256(-1 - (-1 << 256)); + // return atan_aux_f256(-1 << 256); + // return atan_aux_f256(1); // atan(1/2^256)*2^261 = 32 + //return fixed248_nrand(); + // return test_nrand(100000); + var One2: int = 1 << 255; + // return asin_f255(One); + // return asin_f255(-2 * One ~/ -3); + var arg: int = mulDivRound(12, One2, 17); // 12/17 + // return [ asin_slow_f255(arg), asin_f255(arg) ]; + // return [ acos_slow_f255(arg), acos_f255(arg) ]; + // return 4 * atan_f255(One ~/ 5) - atan_f255(One ~/ 239); // 4 * atan(1/5) - atan(1/239) = Pi/4 as fixed255 + var One3: int = 1 << 248; + // return fixed248_atan(One) ~/ 5); // atan(1/5) + // return fixed248_acot(One ~/ 239); // atan(1/5) +} + +/** + method_id | in | out +@testcase | 10000 | -1-(-1<<256) -1-(-1<<256) | 115792089237316195423570985008687907853269984665640564039457584007913129639935 +@testcase | 10000 | -1-(-1<<256) -2-(-1<<256) | 115792089237316195423570985008687907853269984665640564039457584007913129639934 +@testcase | 10000 | -1-(-1<<256) 1<<255 | 81877371507464127617551201542979628307507432471243237061821853600756754782485 +@testcase | 10000 | 1 2 | 1 +@testcase | 10000 | 1 3 | 2 +@testcase | 10000 | 3<<254 1<<254 | 50139445418395255283694704271811692336355250894665672355503583528635147053497 +@testcase | 10000 | 3 5 | 4 +@testcase | 10001 | 115641670674223639132965820642403718536242645001775371762318060545014644837101-1 | 115792089237316195423570985008687907853269984665640564039457584007913129639935 +@testcase | 10001 | 15<<252 | 108679485937549714997960660780289583146059954551846264494610741505469565211201 + +@testcase | 10002 | 1<<255 | 57858359242454268843682786479537198006144860419130642837770554273561536355094 28938600351875109040123440645416448095273333920390487381363947585666516031269 +@testcase | 10002 | 90942894222941581070058735694432465663348344332098107489693037779484723616546 | 90796875678616203090520439851979829600860326752181983760731669850687818036503 71369031536005973567205947792557760023823761636922618688720973932041901854510 +@testcase | 10002 | 115641670674223639132965820642403718536242645001775371762318060545014644837100 | 115341536360906404779899502576747487978354537254490211650198994186870666100480 115341536360906404779899502576747487978354537254490211650198994186870666100479 +@testcase | 10003 | 90942894222941581070058735694432465663348344332098107489693037779484723616546 0 | 81877371507464127617551201542979628307507432471243237061821853600756754782485 -81877371507464127617551201542979628307507432471243237061821853600756754782486 +@testcase | 10003 | (1<<255)+1 0 | 55513684748706254392157395574451324146997108788015526773113170656738693667657 -101617118319522600545601981648807607350213579319835970884288805016705398675944 +@testcase | 10003 | 1<<254 0 | 28647421327665059059430596260119789787021370826354543144805343654507971817712 -112192393597863122712065585177748900737784171216163716639418346853706594800924 +@testcase | 10003 | 15<<252 0 | 93337815620236900315136494926097782162348358704087992554326802765553037216157 -68526346066204767396483080633934170508153877799043171682610011603005473885083 +@testcase | 10004 | 15<<252 | 93337815620236900315136494926097782162348358704087992554326802765553037216158 94531486342222856054175808749507474690232213733194784713695144809815311509707 +@testcase | 10003 | 60628596148627720713372490462954977108898896221398738326462025186323149077698 0 | 57896044618658097711785492504343953926634992332820282019728792003956564819968 -100278890836790510567389408543623384672710501789331344711007167057270294106993 +@testcase | 10004 | 60628596148627720713372490462954977108898896221398738326462025186323149077698 | 57896044618658097711785492504343953926634992332820282019728792003956564819968 31026396801051369712363152930129046361118965752618438656900833901285671065886 +@testcase | 10005 | 1899<<245 | -115784979074977116522606932816046735344768048129666123117516779696532375620701 -86847621900007587791673148476644866514014227467564880140262768165345715058771 +@testcase | 10006 | 11<<248 | -102200470999497240398685962406597118965525125432278008915850368651878945159221 +@testcase | 10008 | 1<<252 18 | 7237594612640731814076778712183932891481921212865048737772958953246047977071 +@testcase! | 10009 | 64*(1<<255)//55 18 | 67377367986958444187782963285047188951340314639925508148698906136973510008513 +@testcase | 10010 | 1<<255 | 0 255 +@testcase | 10011 | -1-(-1<<256) | 80260960185991308862233904206310070533990667611589946606122867505419956976171 255 +@testcase | 10012 | 3<<248 3<<248 | 12212446911748192486079752325135052781399568695204278238536542063334587891712 +@testcase | 10013 | 5 7 | 569235245303856216139605450142923208167703167128528666640203654338408315932 +@testcase | 10014 | 1420982722233462204219667745225507275989817880189032929526453715304448806508 | 517776035526939558040896860590142614178014859368681705591403663865964112176 +@testcase | 10008 | 1<<255 37 | 58200445412255555045265806996802932280233368707362818578692888102488340124094 +@testcase | 10008 | 81877371507464127617551201542979628307507432471243237061821853600756754782485 36 | 82746618329032515754939514227666784789465120373484337368014239356561508382845 +@testcase | 10015 | 90942894222941581070058735694432465663348344332098107489693037779484723616546 | -55942510554172181731996424203087263676819062449594753161692794122306202470292 256 +@testcase | 10015 | 3<<254 | -66622616410625360568738677407433830899150908037353507097280251369610028875158 256 +@testcase | 10016 | 90942894222941581070058735694432465663348344332098107489693037779484723616546//(64*3) | 391714417331212931903864877123528846377775397614575565277371746317462086355 226156424291633194186662080095093570025917938800079226639565593765455331328 +@testcase | 10017 | 3<<248 | 9084946421051389814103830025729847734065792062362132089390904679466687950835 +@testcase | 10018 | (1<<248)//5 | 519571025111621076330285524602776985448579272766894385941850747946908706857 +@testcase | 10012 | 3<<248 -3<<247 | 87047648295825095978636639360784188083950088358794570061638165848324908079 +@testcase | 10012 | 10<<248 -70<<248 | 45231 +@testcase | 10012 | 1420982722233462204219667745225507275989817880189032929526453715304448806508 3<<248 | 14024537329227316173680050897643053638073167245065581681188087336877135047241 +@testcase | 10012 | 1420982722233462204219667745225507275989817880189032929526453715304448806508 1420982722233462204219667745225507275989817880189032929526453715304448806508 | 16492303277433924047657446877966346821161732581471802839855102123372676002295 +@testcase | 10019 | 1<<255 | 65775792789545756849501669218806308540691279864498696756901136302101823231959 +@testcase | 10019 | -1<<255 | -51226238931640701466578648374135745377468902266335737558089915608594425303282 + +@testcase | 10020 | 160<<247 | 32340690885082755723307749066376646841771751777398167772823878380310576779097 +@testcase | 10021 | 1<<255 26 | 57820835337111819566482910321201859268121322500887685881159030272507322418551 +@testcase | 10021 | 2273<<244 26 | 64153929153128256059565403901040178355488584937372975321150754259394300105908 +@testcase | 10022 | 160<<248 | 18 -13775317617017974742132028403521581424991093186766868001115299479309514610238 +@testcase | 10022 | -1-(-1<<256) | 25 16312150880916231694896252427912541090503675654570543195394548083530005073282 +@testcase | 10022 | -1<<256 | -25 -16312150880916231694896252427912541090503675654570543195394548083530005073298 +@testcase | 10022 | 1 | 0 32 + +@testcase | 10023 | 1<<255 | 90942894222941581070058735694432465663348344332098107489693037779484723616546 +@testcase | 10023 | (1-(1<<255))//-3 | 19675212872822715586637341573564384553677006914302429002469838095945333339604 +@testcase | 10023 | 12*(1<<255)//17 | 45371280744427205854111943101074857545572584208710061167826656461897302968384 +@testcase | 10024 | 12*(1<<255)//17 | 45371280744427205854111943101074857545572584208710061167826656461897302968387 +@testcase | 10025 | 12*(1<<255)//17 | 22785806739257187607973396296678804058887880061694023160933190658793710324081 +@testcase | 10026 | 12*(1<<255)//17 | 22785806739257187607973396296678804058887880061694023160933190658793710324080 + +@testcase | 10027 | (1<<248)//5 | 89284547973388213553327350968415123522888028497458323165947767504203347189 +@testcase | 10028 | (1<<248)//239 | 708598849781543798951441405045469962900811296151941404481049216461523216127 +*/ diff --git a/tolk-tester/tests/try-func.tolk b/tolk-tester/tests/try-func.tolk new file mode 100644 index 000000000..7963a8500 --- /dev/null +++ b/tolk-tester/tests/try-func.tolk @@ -0,0 +1,151 @@ +fun unsafeGetInt(any: X): int + asm "NOP"; + +@method_id(11) +fun foo(x: int): int { + try { + if (x == 7) { + throw 44; + } + return x; + } catch { + return 2; + } +} + +@inline +@method_id(12) +fun foo_inline(x: int): int { + try { + assert(!(x == 7)) throw 44; + return x; + } catch { + return 2; + } +} + +@inline_ref +@method_id(13) +fun foo_inlineref(x: int): int { + try { + if (x == 7) { throw (44, 2); } + return x; + } catch (_, arg) { + return unsafeGetInt(arg); + } +} + +@method_id(1) +fun test(x: int, y: int, z: int): int { + y = foo(y); + return x * 100 + y * 10 + z; +} + +@method_id(2) +fun test_inline(x: int, y: int, z: int): int { + y = foo_inline(y); + return x * 100 + y * 10 + z; +} + +@method_id(3) +fun test_inlineref(x: int, y: int, z: int): int { + y = foo_inlineref(y); + return x * 100 + y * 10 + z; +} + +@inline +@method_id(14) +fun foo_inline_big( + x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, + x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int +): int { + try { + if (x1 == 7) { + throw 44; + } + return x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + x20; + } catch { + return 1; + } +} + +@method_id(4) +fun test_inline_big(x: int, y: int, z: int): int { + y = foo_inline_big( + y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, + y + 10, y + 11, y + 12, y + 13, y + 14, y + 15, y + 16, y + 17, y + 18, y + 19); + return x * 1000000 + y * 1000 + z; +} + +@method_id(15) +fun foo_big( + x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, + x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int +): int { + try { + if (x1 == 7) { + throw (44, 1); + } + return x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + x20; + } catch (code, arg) { + return unsafeGetInt(arg); + } +} + +@method_id(5) +fun test_big(x: int, y: int, z: int): int { + y = foo_big( + y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, + y + 10, y + 11, y + 12, y + 13, y + 14, y + 15, y + 16, y + 17, y + 18, y + 19); + return x * 1000000 + y * 1000 + z; +} + +@method_id(16) +fun test_catch_into_same(x: int): int { + var code = x; + try { + assert(x <= 10, 44); + } catch(code) { + return code; + } + return code; +} + + +@method_id(17) +fun test_catch_into_same_2(x: int): int { + var code = x; + try { + if (x > 10) { + throw 44; + } + } catch(code) { + } + return code; +} + +fun main() { +} + +/** + method_id | in | out +@testcase | 1 | 1 2 3 | 123 +@testcase | 1 | 3 8 9 | 389 +@testcase | 1 | 3 7 9 | 329 +@testcase | 2 | 1 2 3 | 123 +@testcase | 2 | 3 8 9 | 389 +@testcase | 2 | 3 7 9 | 329 +@testcase | 3 | 1 2 3 | 123 +@testcase | 3 | 3 8 9 | 389 +@testcase | 3 | 3 7 9 | 329 +@testcase | 4 | 4 8 9 | 4350009 +@testcase | 4 | 4 7 9 | 4001009 +@testcase | 5 | 4 8 9 | 4350009 +@testcase | 5 | 4 7 9 | 4001009 +@testcase | 16 | 5 | 5 +@testcase | 16 | 20 | 44 +@testcase | 17 | 5 | 5 +@testcase | 17 | 20 | 20 + +@code_hash 73240939343624734070640372352271282883450660826541545137654364443860257436623 +*/ diff --git a/tolk-tester/tests/unbalanced_ret.tolk b/tolk-tester/tests/unbalanced_ret.tolk new file mode 100644 index 000000000..6cf42643a --- /dev/null +++ b/tolk-tester/tests/unbalanced_ret.tolk @@ -0,0 +1,17 @@ +fun main(x: int): (int, int) { + var y: int = 5; + if (x < 0) { + x *= 2; + y += 1; + if (x == -10) { + return (111, 0); + } + } + return (x + 1, y); +} +/** + method_id | in | out +@testcase | 0 | 10 | 11 5 +@testcase | 0 | -5 | 111 0 +@testcase | 0 | -4 | -7 6 +*/ diff --git a/tolk-tester/tests/unbalanced_ret_inline.tolk b/tolk-tester/tests/unbalanced_ret_inline.tolk new file mode 100644 index 000000000..4e24fbd8f --- /dev/null +++ b/tolk-tester/tests/unbalanced_ret_inline.tolk @@ -0,0 +1,19 @@ +@inline +fun foo(x: int): int { + if (x < 0) { + x *= 2; + if (x == -10) { + return 111; + } + } + return x + 1; +} +fun main(x: int): int { + return foo(x) * 10; +} +/** + method_id | in | out +@testcase | 0 | 10 | 110 +@testcase | 0 | -5 | 1110 +@testcase | 0 | -4 | -70 +*/ diff --git a/tolk-tester/tests/unbalanced_ret_loops.tolk b/tolk-tester/tests/unbalanced_ret_loops.tolk new file mode 100644 index 000000000..9b59339d8 --- /dev/null +++ b/tolk-tester/tests/unbalanced_ret_loops.tolk @@ -0,0 +1,68 @@ +fun main() { } + +@method_id(1) +fun foo_repeat(x: int): int { + repeat(10) { + x += 10; + if (x >= 100) { + return x; + } + } + return -1; +} + +@method_id(2) +fun foo_while(x: int): int { + var i: int = 0; + while (i < 10) { + x += 10; + if (x >= 100) { + return x; + } + i += 1; + } + return -1; +} + +@method_id(3) +fun foo_until(x: int): int { + var i: int = 0; + do { + x += 10; + if (x >= 100) { + return x; + } + i += 1; + } while (i < 10); + return -1; +} + +@method_id(4) +fun test4(x: int): (int, int) { + var s = 0; + var reached = false; + do { + x = x - 1; + s = s + 1; + if (x < 10) { + reached = true; + } + } while (!reached); + return (s, reached); +} + +/** + method_id | in | out +@testcase | 1 | 40 | 100 +@testcase | 1 | 33 | 103 +@testcase | 1 | -5 | -1 +@testcase | 2 | 40 | 100 +@testcase | 2 | 33 | 103 +@testcase | 2 | -5 | -1 +@testcase | 3 | 40 | 100 +@testcase | 3 | 33 | 103 +@testcase | 3 | -5 | -1 +@testcase | 4 | 18 | 9 -1 + +@code_hash 12359153928622198176298534554187062238616102949658930329300859312625793323482 +*/ diff --git a/tolk-tester/tests/unbalanced_ret_nested.tolk b/tolk-tester/tests/unbalanced_ret_nested.tolk new file mode 100644 index 000000000..05e609240 --- /dev/null +++ b/tolk-tester/tests/unbalanced_ret_nested.tolk @@ -0,0 +1,37 @@ +fun foo(y: int): int { + if (y < 0) { + y *= 2; + if (y == -10) { + return 111; + } + } + return y + 1; +} +fun bar(x: int, y: int): (int, int) { + if (x < 0) { + y = foo(y); + x *= 2; + if (x == -10) { + return (111, y); + } + } + return (x + 1, y); +} +fun main(x: int, y: int): (int, int) { + (x, y) = bar(x, y); + return (x, y * 10); +} +/** + method_id | in | out +@testcase | 0 | 3 3 | 4 30 +@testcase | 0 | 3 -5 | 4 -50 +@testcase | 0 | 3 -4 | 4 -40 +@testcase | 0 | -5 3 | 111 40 +@testcase | 0 | -5 -5 | 111 1110 +@testcase | 0 | -5 -4 | 111 -70 +@testcase | 0 | -4 3 | -7 40 +@testcase | 0 | -4 -5 | -7 1110 +@testcase | 0 | -4 -4 | -7 -70 + +@code_hash 68625253347714662162648433047986779710161195298061582217368558479961252943991 +*/ diff --git a/tolk-tester/tests/use-before-declare.tolk b/tolk-tester/tests/use-before-declare.tolk new file mode 100644 index 000000000..384569b93 --- /dev/null +++ b/tolk-tester/tests/use-before-declare.tolk @@ -0,0 +1,49 @@ +fun main(): int { + var c: cell = my_begin_cell().storeInt(demo_10, 32).my_end_cell(); + var cs: slice = my_begin_parse(c); + var ten: int = cs.loadInt(32); + return 1 + demo1(ten) + demo_var; +} + +@pure +fun my_begin_cell(): builder +asm "NEWC"; +@pure +fun my_end_cell(b: builder): cell +asm "ENDC"; +@pure +fun my_begin_parse(c: cell): slice +asm "CTOS"; + +fun demo1(v: int): int { + demo_var = 23; + return v; +} + +global demo_var: int; +const demo_10: int = 10; + +fun test1(): int { + var demo_var: int = demo_10; + var demo_slice: int = demo_20; + if (demo_var > 0) { + var demo_var: tuple = null; + var demo_slice: tuple = null; + } + return demo_var + demo_slice; +} + +global demo_slice: slice; +const demo_20: int = 20; + +/** +@testcase | 0 | | 34 + +@fif_codegen +""" + test1 PROC:<{ + // + 30 PUSHINT // _10 + }> +""" + */ diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk new file mode 100644 index 000000000..9bee862ac --- /dev/null +++ b/tolk-tester/tests/var-apply.tolk @@ -0,0 +1,22 @@ +fun getBeginCell() { + return beginCell; +} + +fun getBeginParse() { + return beginParse; +} + +@method_id(101) +fun testVarApply1() { + var (_, f_end_cell) = (0, endCell); + var b: builder = (getBeginCell())().storeInt(1, 32); + b.storeInt(2, 32); + var s = (getBeginParse())(f_end_cell(b)); + return (s.loadInt(32), s.loadInt(32)); +} + +fun main() {} + +/** +@testcase | 101 | | 1 2 + */ diff --git a/tolk-tester/tests/w1.tolk b/tolk-tester/tests/w1.tolk new file mode 100644 index 000000000..eb06bec67 --- /dev/null +++ b/tolk-tester/tests/w1.tolk @@ -0,0 +1,14 @@ +fun main(id: int): (int, int) { + if (id > 0) { + if (id > 10) { + return (2 * id, 3 * id); + } + } + return (5, 6); +} +/** + method_id | in | out +@testcase | 0 | 0 | 5 6 +@testcase | 0 | 4 | 5 6 +@testcase | 0 | 11 | 22 33 +*/ diff --git a/tolk-tester/tests/w2.tolk b/tolk-tester/tests/w2.tolk new file mode 100644 index 000000000..728b18d3f --- /dev/null +++ b/tolk-tester/tests/w2.tolk @@ -0,0 +1,34 @@ +@method_id(101) +fun test1(cs: slice) { + return cs.loadUint(8)+cs.loadUint(8)+cs.loadUint(8)+cs.loadUint(8); +} + +@method_id(102) +fun test2(cs: slice) { + var (x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, + x11, x12, x13, x14, x15, x16, x17, x18, x19) = f(cs); + return x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + + x10+ x11+ x12+ x13+ x14+ x15+ x16+ x17+ x18+ x19; +} + +fun main(cs: slice) { + return (cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8)); +} + +fun f(cs: slice) { + return (cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), + cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), + cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), + cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), + cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8)); +} + + +/** + method_id | in | out +@testcase | 102 | x{000102030405060708090a0b0c0d0e0f10111213} | 190 +@testcase | 101 | x{000102030405060708090a0b0c0d0e0f10111213} | 6 +@testcase | 0 | x{000102030405060708090a0b0c0d0e0f10111213} | 0 1 2 3 + +@code_hash 58474889199998908444151060994149070836199913191952040273624197630531731101157 +*/ diff --git a/tolk-tester/tests/w6.tolk b/tolk-tester/tests/w6.tolk new file mode 100644 index 000000000..2f8956440 --- /dev/null +++ b/tolk-tester/tests/w6.tolk @@ -0,0 +1,19 @@ +fun main(x: int): int { + var i: int = 0; + // int f = false; + do { + i = i + 1; + if (i > 5) { + return 1; + } + var f: int = (i * i == 64); + } while (!f); + return -1; +} + +/** + method_id | in | out +@testcase | 0 | 0 | 1 + +@code_hash 36599880583276393028571473830850694081778552118303309411432666239740650614479 +*/ diff --git a/tolk-tester/tests/w7.tolk b/tolk-tester/tests/w7.tolk new file mode 100644 index 000000000..85081fbb3 --- /dev/null +++ b/tolk-tester/tests/w7.tolk @@ -0,0 +1,26 @@ +@method_id(1) +fun test(y: int): int { + var x: int = 1; + if (y > 0) { + return 1; + } + return x > 0; +} + +@method_id(2) +fun f(y: int): int { + if (y > 0) { + return 1; + } + return 2; +} + +fun main() { } + +/** + method_id | in | out +@testcase | 1 | 10 | 1 +@testcase | 1 | -5 | -1 +@testcase | 2 | 10 | 1 +@testcase | 2 | -5 | 2 +*/ diff --git a/tolk-tester/tests/w9.tolk b/tolk-tester/tests/w9.tolk new file mode 100644 index 000000000..b88dc736e --- /dev/null +++ b/tolk-tester/tests/w9.tolk @@ -0,0 +1,14 @@ +fun main(s: int) { + var (z, t) = (17, s); + while (z > 0) { + t = s; + z -= 1; + } + return ~ t; +} + +/** + method_id | in | out +@testcase | 0 | 1 | -2 +@testcase | 0 | 5 | -6 +*/ diff --git a/tolk-tester/tolk-tester.js b/tolk-tester/tolk-tester.js new file mode 100644 index 000000000..2a3eb776a --- /dev/null +++ b/tolk-tester/tolk-tester.js @@ -0,0 +1,533 @@ +// Usage: `node tolk-tester.js tests_dir` OR `node tolk-tester.js test_file.tolk` +// from current dir, providing some env (see getenv() calls). +// This is a JS version of tolk-tester.py to test Tolk compiled to WASM. +// Don't forget to keep it identical to Python version! + +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const child_process = require('child_process'); + +function print(...args) { + console.log(...args) +} + +/** @return {string} */ +function getenv(name, def = null) { + if (name in process.env) + return process.env[name] + if (def === null) { + print(`Environment variable ${name} is not set`) + process.exit(1) + } + return def +} + +const TOLKFIFTLIB_MODULE = getenv('TOLKFIFTLIB_MODULE') +const TOLKFIFTLIB_WASM = getenv('TOLKFIFTLIB_WASM') +const FIFT_EXECUTABLE = getenv('FIFT_EXECUTABLE') +const FIFT_LIBS_FOLDER = getenv('FIFTPATH') // this env is needed for fift to work properly +const STDLIB_FOLDER = __dirname + '/../crypto/smartcont/tolk-stdlib' +const TMP_DIR = os.tmpdir() + +class CmdLineOptions { + constructor(/**string[]*/ argv) { + if (argv.length !== 3) { + print("Usage: node tolk-tester.js tests_dir OR node tolk-tester.js test_file.tolk") + process.exit(1) + } + if (!fs.existsSync(argv[2])) { + print(`Input '${argv[2]}' doesn't exist`) + process.exit(1) + } + + if (fs.lstatSync(argv[2]).isDirectory()) { + this.tests_dir = argv[2] + this.test_file = null + } else { + this.tests_dir = path.dirname(argv[2]) + this.test_file = argv[2] + } + } + + /** @return {string[]} */ + find_tests() { + if (this.test_file) // an option to run (debug) a single test + return [this.test_file] + + let tests = fs.readdirSync(this.tests_dir).filter(f => f.endsWith('.tolk') || f.endsWith('.ton')) + tests.sort() + return tests.map(f => path.join(this.tests_dir, f)) + } +} + + +class ParseInputError extends Error { +} + +class TolkCompilationFailedError extends Error { + constructor(/**string*/ message, /**string*/ stderr) { + super(message); + this.stderr = stderr + } +} + +class TolkCompilationSucceededError extends Error { +} + +class FiftExecutionFailedError extends Error { + constructor(/**string*/ message, /**string*/ stderr) { + super(message); + this.stderr = stderr + } +} + +class CompareOutputError extends Error { + constructor(/**string*/ message, /**string*/ output) { + super(message); + this.output = output + } +} + +class CompareFifCodegenError extends Error { +} + +class CompareCodeHashError extends Error { +} + + +/* + * In positive tests, there are several testcases "input X should produce output Y". + */ +class TolkTestCaseInputOutput { + static reJustNumber = /^[-+]?\d+$/ + static reMathExpr = /^[0x123456789()+\-*/<>]*$/ + + constructor(/**string*/ method_id_str, /**string*/ input_str, /**string*/ output_str) { + let processed_inputs = [] + for (let in_arg of input_str.split(' ')) { + if (in_arg.length === 0) + continue + else if (in_arg.startsWith("x{") || TolkTestCaseInputOutput.reJustNumber.test(in_arg)) + processed_inputs.push(in_arg) + else if (TolkTestCaseInputOutput.reMathExpr.test(in_arg)) + // replace "3<<254" with "3n<<254n" (big number) before eval (in Python we don't need this) + processed_inputs.push(eval(in_arg.replace('//', '/').replace(/(\d)($|\D)/gmi, '$1n$2')).toString()) + else if (in_arg === "null") + processed_inputs.push("null") + else + throw new ParseInputError(`'${in_arg}' can't be evaluated`) + } + + this.method_id = +method_id_str + this.input = processed_inputs.join(' ') + this.expected_output = output_str + } + + check(/**string[]*/ stdout_lines, /**number*/ line_idx) { + if (stdout_lines[line_idx] !== this.expected_output) + throw new CompareOutputError(`error on case #${line_idx + 1} (${this.method_id} | ${this.input}): expected '${this.expected_output}', found '${stdout_lines[line_idx]}'`, stdout_lines.join("\n")) + } +} + +/* + * @stderr checks, when compilation fails, that stderr (compilation error) is expected. + * If it's multiline, all lines must be present in specified order. + */ +class TolkTestCaseStderr { + constructor(/**string[]*/ stderr_pattern, /**boolean*/ avoid) { + this.stderr_pattern = stderr_pattern + this.avoid = avoid + } + + check(/**string*/ stderr) { + const line_match = this.find_pattern_in_stderr(stderr.split(/\n/)) + if (line_match === -1 && !this.avoid) + throw new CompareOutputError("pattern not found in stderr:\n" + + this.stderr_pattern.map(x => " " + x).join("\n"), stderr) + else if (line_match !== -1 && this.avoid) + throw new CompareOutputError(`pattern found (line ${line_match + 1}), but not expected to be:\n` + + this.stderr_pattern.map(x => " " + x).join("\n"), stderr) + } + + find_pattern_in_stderr(/**string[]*/ stderr) { + for (let line_start = 0; line_start < stderr.length; ++line_start) + if (this.try_match_pattern(0, stderr, line_start)) + return line_start + return -1 + } + + try_match_pattern(/**number*/ pattern_offset, /**string[]*/ stderr, /**number*/ offset) { + if (pattern_offset >= this.stderr_pattern.length) + return true + if (offset >= stderr.length) + return false + + const line_pattern = this.stderr_pattern[pattern_offset] + const line_output = stderr[offset] + return line_output.includes(line_pattern) && this.try_match_pattern(pattern_offset + 1, stderr, offset + 1) + } +} + +/* + * @fif_codegen checks that contents of compiled.fif matches the expected pattern. + * @fif_codegen_avoid checks that is does not match the pattern. + * See comments in run_tests.py. + */ +class TolkTestCaseFifCodegen { + constructor(/**string[]*/ fif_pattern, /**boolean*/ avoid) { + /** @type {string[]} */ + this.fif_pattern = fif_pattern.map(s => s.trim()) + this.avoid = avoid + } + + check(/**string[]*/ fif_output) { + const line_match = this.find_pattern_in_fif_output(fif_output) + if (line_match === -1 && !this.avoid) + throw new CompareFifCodegenError("pattern not found:\n" + + this.fif_pattern.map(x => " " + x).join("\n")) + else if (line_match !== -1 && this.avoid) + throw new CompareFifCodegenError(`pattern found (line ${line_match + 1}), but not expected to be:\n` + + this.fif_pattern.map(x => " " + x).join("\n")) + } + + find_pattern_in_fif_output(/**string[]*/ fif_output) { + for (let line_start = 0; line_start < fif_output.length; ++line_start) + if (this.try_match_pattern(0, fif_output, line_start)) + return line_start + return -1 + } + + try_match_pattern(/**number*/ pattern_offset, /**string[]*/ fif_output, /**number*/ offset) { + if (pattern_offset >= this.fif_pattern.length) + return true + if (offset >= fif_output.length) + return false + const line_pattern = this.fif_pattern[pattern_offset] + const line_output = fif_output[offset] + + if (line_pattern !== "...") { + if (!TolkTestCaseFifCodegen.does_line_match(line_pattern, line_output)) + return false + return this.try_match_pattern(pattern_offset + 1, fif_output, offset + 1) + } + while (offset < fif_output.length) { + if (this.try_match_pattern(pattern_offset + 1, fif_output, offset)) + return true + offset = offset + 1 + } + return false + } + + static split_line_to_cmd_and_comment(/**string*/ trimmed_line) { + const pos = trimmed_line.indexOf("//") + if (pos === -1) + return [trimmed_line, null] + else + return [trimmed_line.substring(0, pos).trimEnd(), trimmed_line.substring(pos + 2).trimStart()] + } + + static does_line_match(/**string*/ line_pattern, /**string*/ line_output) { + const [cmd_pattern, comment_pattern] = TolkTestCaseFifCodegen.split_line_to_cmd_and_comment(line_pattern) + const [cmd_output, comment_output] = TolkTestCaseFifCodegen.split_line_to_cmd_and_comment(line_output.trim()) + return cmd_pattern === cmd_output && (comment_pattern === null || comment_pattern === comment_output) + } +} + +/* + * @code_hash checks that hash of compiled output.fif matches the provided value. + * It's used to "record" code boc hash and to check that it remains the same on compiler modifications. + * Being much less flexible than @fif_codegen, it nevertheless gives a guarantee of bytecode stability. + */ +class TolkTestCaseExpectedHash { + constructor(/**string*/ expected_hash) { + this.code_hash = expected_hash + } + + check(/**string*/ fif_code_hash) { + if (this.code_hash !== fif_code_hash) + throw new CompareCodeHashError(`expected ${this.code_hash}, actual ${fif_code_hash}`) + } +} + + +class TolkTestFile { + constructor(/**string*/ tolk_filename, /**string*/ artifacts_folder) { + this.line_idx = 0 + this.tolk_filename = tolk_filename + this.artifacts_folder = artifacts_folder + this.compilation_should_fail = false + /** @type {TolkTestCaseStderr[]} */ + this.stderr_includes = [] + /** @type {TolkTestCaseInputOutput[]} */ + this.input_output = [] + /** @type {TolkTestCaseFifCodegen[]} */ + this.fif_codegen = [] + /** @type {TolkTestCaseExpectedHash | null} */ + this.expected_hash = null + /** @type {string | null} */ + this.experimental_options = null + } + + parse_input_from_tolk_file() { + const lines = fs.readFileSync(this.tolk_filename, 'utf-8').split(/\r?\n/) + this.line_idx = 0 + + while (this.line_idx < lines.length) { + const line = lines[this.line_idx] + if (line.startsWith('@testcase')) { + let s = line.split("|").map(p => p.trim()) + if (s.length !== 4) + throw new ParseInputError(`incorrect format of @testcase: ${line}`) + this.input_output.push(new TolkTestCaseInputOutput(s[1], s[2], s[3])) + } else if (line.startsWith('@compilation_should_fail')) { + this.compilation_should_fail = true + } else if (line.startsWith('@stderr')) { + this.stderr_includes.push(new TolkTestCaseStderr(this.parse_string_value(lines), false)) + } else if (line.startsWith("@fif_codegen_avoid")) { + this.fif_codegen.push(new TolkTestCaseFifCodegen(this.parse_string_value(lines), true)) + } else if (line.startsWith("@fif_codegen")) { + this.fif_codegen.push(new TolkTestCaseFifCodegen(this.parse_string_value(lines), false)) + } else if (line.startsWith("@code_hash")) { + this.expected_hash = new TolkTestCaseExpectedHash(this.parse_string_value(lines, false)[0]) + } else if (line.startsWith("@experimental_options")) { + this.experimental_options = line.substring(22) + } + this.line_idx++ + } + + if (this.input_output.length === 0 && !this.compilation_should_fail) + throw new ParseInputError("no @testcase present") + if (this.input_output.length !== 0 && this.compilation_should_fail) + throw new ParseInputError("@testcase present, but compilation_should_fail") + } + + /** @return {string[]} */ + parse_string_value(/**string[]*/ lines, allow_multiline = true) { + // a tag must be followed by a space (single-line), e.g. '@stderr some text' + // or be a multi-line value, surrounded by """ + const line = lines[this.line_idx] + const pos_sp = line.indexOf(' ') + const is_multi_line = lines[this.line_idx + 1] === '"""' + const is_single_line = pos_sp !== -1 + if (!is_single_line && !is_multi_line) + throw new ParseInputError(`${line} value is empty (not followed by a string or a multiline """)`) + if (is_single_line && is_multi_line) + throw new ParseInputError(`${line.substring(0, pos_sp)} value is both single-line and followed by """`) + if (is_multi_line && !allow_multiline) + throw new ParseInputError(`${line} value should be single-line`); + + if (is_single_line) + return [line.substring(pos_sp + 1).trim()] + + this.line_idx += 2 + let s_multiline = [] + while (this.line_idx < lines.length && lines[this.line_idx] !== '"""') { + s_multiline.push(lines[this.line_idx]) + this.line_idx = this.line_idx + 1 + } + return s_multiline + } + + get_compiled_fif_filename() { + return this.artifacts_folder + "/compiled.fif" + } + + get_runner_fif_filename() { + return this.artifacts_folder + "/runner.fif" + } + + async run_and_check() { + const wasmModule = await compileWasm(TOLKFIFTLIB_MODULE, TOLKFIFTLIB_WASM) + let res = compileFile(wasmModule, this.tolk_filename, this.experimental_options) + let exit_code = res.status === 'ok' ? 0 : 1 + let stderr = res.message + let stdout = '' + + if (exit_code === 0 && this.compilation_should_fail) + throw new TolkCompilationSucceededError("compilation succeeded, but it should have failed") + + if (exit_code !== 0 && this.compilation_should_fail) { + for (let should_include of this.stderr_includes) + should_include.check(stderr) + return + } + + if (exit_code !== 0 && !this.compilation_should_fail) + throw new TolkCompilationFailedError(`tolk exit_code = ${exit_code}`, stderr) + + fs.writeFileSync(this.get_compiled_fif_filename(), `"Asm.fif" include\n${res.fiftCode}`) + { + let runner = `"${this.get_compiled_fif_filename()}" include x.trim()).filter(s => s.length > 0) + let fif_code_hash = null + if (this.expected_hash !== null) { // then the last stdout line is a hash + fif_code_hash = stdout_lines[stdout_lines.length - 1] + stdout_lines = stdout_lines.slice(0, stdout_lines.length - 1) + } + + if (stdout_lines.length !== this.input_output.length) + throw new CompareOutputError(`unexpected number of fift output: ${stdout_lines.length} lines, but ${this.input_output.length} testcases`, stdout) + + for (let i = 0; i < stdout_lines.length; ++i) + this.input_output[i].check(stdout_lines, i) + + if (this.fif_codegen.length) { + const fif_output = fs.readFileSync(this.get_compiled_fif_filename(), 'utf-8').split(/\r?\n/) + for (let fif_codegen of this.fif_codegen) + fif_codegen.check(fif_output) + } + + if (this.expected_hash !== null) + this.expected_hash.check(fif_code_hash) + } +} + +async function run_all_tests(/**string[]*/ tests) { + for (let ti = 0; ti < tests.length; ++ti) { + let tolk_filename = tests[ti] + print(`Running test ${ti + 1}/${tests.length}: ${tolk_filename}`) + + let artifacts_folder = path.join(TMP_DIR, tolk_filename) + let testcase = new TolkTestFile(tolk_filename, artifacts_folder) + + try { + if (!fs.existsSync(artifacts_folder)) + fs.mkdirSync(artifacts_folder, {recursive: true}) + testcase.parse_input_from_tolk_file() + await testcase.run_and_check() + fs.rmSync(artifacts_folder, {recursive: true}) + + if (testcase.compilation_should_fail) + print(" OK, compilation failed as it should") + else + print(` OK, ${testcase.input_output.length} cases`) + } catch (e) { + if (e instanceof ParseInputError) { + print(` Error parsing input (cur line #${testcase.line_idx + 1}):`, e.message) + process.exit(2) + } else if (e instanceof TolkCompilationFailedError) { + print(" Error compiling tolk:", e.message) + print(" stderr:") + print(e.stderr.trimEnd()) + process.exit(2) + } else if (e instanceof FiftExecutionFailedError) { + print(" Error executing fift:", e.message) + print(" stderr:") + print(e.stderr.trimEnd()) + print(" compiled.fif at:", testcase.get_compiled_fif_filename()) + process.exit(2) + } else if (e instanceof CompareOutputError) { + print(" Mismatch in output:", e.message) + print(" Full output:") + print(e.output.trimEnd()) + print(" Was compiled to:", testcase.get_compiled_fif_filename()) + process.exit(2) + } else if (e instanceof CompareFifCodegenError) { + print(" Mismatch in fif codegen:", e.message) + print(" Was compiled to:", testcase.get_compiled_fif_filename()) + print(fs.readFileSync(testcase.get_compiled_fif_filename(), 'utf-8')) + process.exit(2) + } else if (e instanceof CompareCodeHashError) { + print(" Mismatch in code hash:", e.message) + print(" Was compiled to:", testcase.get_compiled_fif_filename()) + process.exit(2) + } + throw e + } + } +} + +const tests = new CmdLineOptions(process.argv).find_tests() +print(`Found ${tests.length} tests`) +run_all_tests(tests).then( + () => print(`Done, ${tests.length} tests`), + console.error +) + +// below are WASM helpers, which don't exist in Python version + +process.setMaxListeners(0); + +function copyToCString(mod, str) { + const len = mod.lengthBytesUTF8(str) + 1; + const ptr = mod._malloc(len); + mod.stringToUTF8(str, ptr, len); + return ptr; +} + +function copyToCStringPtr(mod, str, ptr) { + const allocated = copyToCString(mod, str); + mod.setValue(ptr, allocated, '*'); + return allocated; +} + +/** @return {string} */ +function copyFromCString(mod, ptr) { + return mod.UTF8ToString(ptr); +} + +/** @return {{status: string, message: string, fiftCode: string, codeBoc: string, codeHashHex: string}} */ +function compileFile(mod, filename, experimentalOptions) { + // see tolk-wasm.cpp: typedef void (*WasmFsReadCallback)(int, char const*, char**, char**) + const callbackPtr = mod.addFunction((kind, dataPtr, destContents, destError) => { + if (kind === 0) { // realpath + try { + let relative = copyFromCString(mod, dataPtr) + if (relative.startsWith('@stdlib/')) { + // import "@stdlib/filename" or import "@stdlib/filename.tolk" + relative = STDLIB_FOLDER + '/' + relative.substring(7) + if (!relative.endsWith('.tolk')) { + relative += '.tolk' + } + } + copyToCStringPtr(mod, fs.realpathSync(relative), destContents); + } catch (err) { + copyToCStringPtr(mod, 'cannot find file', destError); + } + } else if (kind === 1) { // read file + try { + const absolute = copyFromCString(mod, dataPtr) // already normalized (as returned above) + copyToCStringPtr(mod, fs.readFileSync(absolute).toString('utf-8'), destContents); + } catch (err) { + copyToCStringPtr(mod, err.message || err.toString(), destError); + } + } else { + copyToCStringPtr(mod, 'Unknown callback kind=' + kind, destError); + } + }, 'viiii'); + + const config = { + optimizationLevel: 2, + withStackComments: true, + experimentalOptions: experimentalOptions || undefined, + entrypointFileName: filename + }; + + const configPtr = copyToCString(mod, JSON.stringify(config)); + + const responsePtr = mod._tolk_compile(configPtr, callbackPtr); + + return JSON.parse(copyFromCString(mod, responsePtr)); +} + +async function compileWasm(tolkFiftLibJsFileName, tolkFiftLibWasmFileName) { + const wasmModule = require(tolkFiftLibJsFileName) + const wasmBinary = new Uint8Array(fs.readFileSync(tolkFiftLibWasmFileName)) + + return await wasmModule({ wasmBinary }) +} diff --git a/tolk-tester/tolk-tester.py b/tolk-tester/tolk-tester.py new file mode 100644 index 000000000..261ab4962 --- /dev/null +++ b/tolk-tester/tolk-tester.py @@ -0,0 +1,430 @@ +# Usage: `tolk-tester.py tests_dir` OR `tolk-tester.py test_file.tolk` +# from current dir, providing some env (see getenv() calls). +# Every .tolk file should provide /* testcase description in a comment */, consider tests/ folder. +# +# Tests for Tolk can be +# * positive (compiled to .fif, run with fift, compared output with the one expected) +# * negative (compilation fails, and it's expected; patterns in stderr can be specified) +# +# Note, that there is also tolk-tester.js to test Tolk compiled to WASM. +# Don't forget to keep it identical to Python version! + +import os +import os.path +import re +import shutil +import subprocess +import sys +import tempfile +from typing import List + + +def getenv(name, default=None): + if name in os.environ: + return os.environ[name] + if default is None: + print("Environment variable", name, "is not set", file=sys.stderr) + exit(1) + return default + + +TOLK_EXECUTABLE = getenv("TOLK_EXECUTABLE", "tolk") +FIFT_EXECUTABLE = getenv("FIFT_EXECUTABLE", "fift") +FIFT_LIBS_FOLDER = getenv("FIFTPATH") # this env is needed for fift to work properly +TMP_DIR = tempfile.mkdtemp() + + +class CmdLineOptions: + def __init__(self, argv: List[str]): + if len(argv) != 2: + print("Usage: tolk-tester.py tests_dir OR tolk-tester.py test_file.tolk", file=sys.stderr) + exit(1) + if not os.path.exists(argv[1]): + print("Input '%s' doesn't exist" % argv[1], file=sys.stderr) + exit(1) + + if os.path.isdir(argv[1]): + self.tests_dir = argv[1] + self.test_file = None + else: + self.tests_dir = os.path.dirname(argv[1]) + self.test_file = argv[1] + + def find_tests(self) -> List[str]: + if self.test_file is not None: # an option to run (debug) a single test + return [self.test_file] + + tests = [f for f in os.listdir(self.tests_dir) if f.endswith(".tolk") or f.endswith(".ton")] + tests.sort() + return [os.path.join(self.tests_dir, f) for f in tests] + + +class ParseInputError(Exception): + pass + + +class TolkCompilationFailedError(Exception): + def __init__(self, message: str, stderr: str): + super().__init__(message) + self.stderr = stderr + + +class TolkCompilationSucceededError(Exception): + pass + + +class FiftExecutionFailedError(Exception): + def __init__(self, message: str, stderr: str): + super().__init__(message) + self.stderr = stderr + + +class CompareOutputError(Exception): + def __init__(self, message: str, output: str): + super().__init__(message) + self.output = output + + +class CompareFifCodegenError(Exception): + pass + + +class CompareCodeHashError(Exception): + pass + + +class TolkTestCaseInputOutput: + """ + In positive tests, there are several testcases "input X should produce output Y". + They are written as a table: + @testcase | method_id | input (one or several) | output + """ + reJustNumber = re.compile(r"[-+]?\d+") + reMathExpr = re.compile(r"[0x123456789()+\-*/<>]+") + + def __init__(self, method_id_str: str, input_str: str, output_str: str): + processed_inputs = [] + for in_arg in input_str.split(" "): + if len(in_arg) == 0: + continue + elif in_arg.startswith("x{") or TolkTestCaseInputOutput.reJustNumber.fullmatch(in_arg): + processed_inputs.append(in_arg) + elif TolkTestCaseInputOutput.reMathExpr.fullmatch(in_arg): + processed_inputs.append(str(eval(in_arg))) + elif in_arg == "null": + processed_inputs.append("null") + else: + raise ParseInputError("'%s' can't be evaluated" % in_arg) + + self.method_id = int(method_id_str) + self.input = " ".join(processed_inputs) + self.expected_output = output_str + + def check(self, stdout_lines: List[str], line_idx: int): + if stdout_lines[line_idx] != self.expected_output: + raise CompareOutputError("error on case #%d (%d | %s): expected '%s', found '%s'" % (line_idx + 1, self.method_id, self.input, self.expected_output, stdout_lines[line_idx]), "\n".join(stdout_lines)) + + +class TolkTestCaseStderr: + """ + @stderr checks, when compilation fails, that stderr (compilation error) is expected. + If it's multiline, all lines must be present in specified order. + """ + + def __init__(self, stderr_pattern: List[str], avoid: bool): + self.stderr_pattern = stderr_pattern + self.avoid = avoid + + def check(self, stderr: str): + line_match = self.find_pattern_in_stderr(stderr.splitlines()) + if line_match == -1 and not self.avoid: + raise CompareOutputError("pattern not found in stderr:\n%s" % + "\n".join(map(lambda x: " " + x, self.stderr_pattern)), stderr) + elif line_match != -1 and self.avoid: + raise CompareOutputError("pattern found (line %d), but not expected to be:\n%s" % + (line_match + 1, "\n".join(map(lambda x: " " + x, self.stderr_pattern))), stderr) + + def find_pattern_in_stderr(self, stderr: List[str]) -> int: + for line_start in range(len(stderr)): + if self.try_match_pattern(0, stderr, line_start): + return line_start + return -1 + + def try_match_pattern(self, pattern_offset: int, stderr: List[str], offset: int) -> bool: + if pattern_offset >= len(self.stderr_pattern): + return True + if offset >= len(stderr): + return False + + line_pattern = self.stderr_pattern[pattern_offset] + line_output = stderr[offset] + return line_output.find(line_pattern) != -1 and self.try_match_pattern(pattern_offset + 1, stderr, offset + 1) + + +class TolkTestCaseFifCodegen: + """ + @fif_codegen checks that contents of compiled.fif matches the expected pattern. + @fif_codegen_avoid checks that is does not match the pattern. + The pattern is a multiline piece of fift code, optionally with "..." meaning "any lines here". + See tests/codegen_check_demo.tolk of how it looks. + A notable thing about indentations (spaces at line starts): + Taking them into account will complicate the code without reasonable profit, + that's why we just trim every string. + And one more word about //comments. Tolk inserts them into fift output. + If a line in the pattern contains a //comment, it's expected to be equal. + If a line does not, we just compare a command. + """ + + def __init__(self, fif_pattern: List[str], avoid: bool): + self.fif_pattern = [s.strip() for s in fif_pattern] + self.avoid = avoid + + def check(self, fif_output: List[str]): + line_match = self.find_pattern_in_fif_output(fif_output) + if line_match == -1 and not self.avoid: + raise CompareFifCodegenError("pattern not found:\n%s" % + "\n".join(map(lambda x: " " + x, self.fif_pattern))) + elif line_match != -1 and self.avoid: + raise CompareFifCodegenError("pattern found (line %d), but not expected to be:\n%s" % + (line_match + 1, "\n".join(map(lambda x: " " + x, self.fif_pattern)))) + + def find_pattern_in_fif_output(self, fif_output: List[str]) -> int: + for line_start in range(len(fif_output)): + if self.try_match_pattern(0, fif_output, line_start): + return line_start + return -1 + + def try_match_pattern(self, pattern_offset: int, fif_output: List[str], offset: int) -> bool: + if pattern_offset >= len(self.fif_pattern): + return True + if offset >= len(fif_output): + return False + line_pattern = self.fif_pattern[pattern_offset] + line_output = fif_output[offset] + + if line_pattern != "...": + if not TolkTestCaseFifCodegen.does_line_match(line_pattern, line_output): + return False + return self.try_match_pattern(pattern_offset + 1, fif_output, offset + 1) + while offset < len(fif_output): + if self.try_match_pattern(pattern_offset + 1, fif_output, offset): + return True + offset = offset + 1 + return False + + @staticmethod + def split_line_to_cmd_and_comment(trimmed_line: str) -> tuple: + pos = trimmed_line.find("//") + if pos == -1: + return trimmed_line, None + else: + return trimmed_line[:pos].rstrip(), trimmed_line[pos + 2:].lstrip() + + @staticmethod + def does_line_match(line_pattern: str, line_output: str) -> bool: + cmd_pattern, comment_pattern = TolkTestCaseFifCodegen.split_line_to_cmd_and_comment(line_pattern) + cmd_output, comment_output = TolkTestCaseFifCodegen.split_line_to_cmd_and_comment(line_output.strip()) + return cmd_pattern == cmd_output and (comment_pattern is None or comment_pattern == comment_output) + + +class TolkTestCaseExpectedHash: + """ + @code_hash checks that hash of compiled output.fif matches the provided value. + It's used to "record" code boc hash and to check that it remains the same on compiler modifications. + Being much less flexible than @fif_codegen, it nevertheless gives a guarantee of bytecode stability. + """ + + def __init__(self, expected_hash: str): + self.code_hash = expected_hash + + def check(self, fif_code_hash: str): + if self.code_hash != fif_code_hash: + raise CompareCodeHashError("expected %s, actual %s" % (self.code_hash, fif_code_hash)) + + +class TolkTestFile: + def __init__(self, tolk_filename: str, artifacts_folder: str): + self.line_idx = 0 + self.tolk_filename = tolk_filename + self.artifacts_folder = artifacts_folder + self.compilation_should_fail = False + self.stderr_includes: List[TolkTestCaseStderr] = [] + self.input_output: List[TolkTestCaseInputOutput] = [] + self.fif_codegen: List[TolkTestCaseFifCodegen] = [] + self.expected_hash: TolkTestCaseExpectedHash | None = None + self.experimental_options: str | None = None + + def parse_input_from_tolk_file(self): + with open(self.tolk_filename, "r") as fd: + lines = fd.read().splitlines() + self.line_idx = 0 + + while self.line_idx < len(lines): + line = lines[self.line_idx] + if line.startswith("@testcase"): + s = [x.strip() for x in line.split("|")] + if len(s) != 4: + raise ParseInputError("incorrect format of @testcase: %s" % line) + self.input_output.append(TolkTestCaseInputOutput(s[1], s[2], s[3])) + elif line.startswith("@compilation_should_fail"): + self.compilation_should_fail = True + elif line.startswith("@stderr"): + self.stderr_includes.append(TolkTestCaseStderr(self.parse_string_value(lines), False)) + elif line.startswith("@fif_codegen_avoid"): + self.fif_codegen.append(TolkTestCaseFifCodegen(self.parse_string_value(lines), True)) + elif line.startswith("@fif_codegen"): + self.fif_codegen.append(TolkTestCaseFifCodegen(self.parse_string_value(lines), False)) + elif line.startswith("@code_hash"): + self.expected_hash = TolkTestCaseExpectedHash(self.parse_string_value(lines, False)[0]) + elif line.startswith("@experimental_options"): + self.experimental_options = line[22:] + self.line_idx = self.line_idx + 1 + + if len(self.input_output) == 0 and not self.compilation_should_fail: + raise ParseInputError("no @testcase present") + if len(self.input_output) != 0 and self.compilation_should_fail: + raise ParseInputError("@testcase present, but compilation_should_fail") + + def parse_string_value(self, lines: List[str], allow_multiline = True) -> List[str]: + # a tag must be followed by a space (single-line), e.g. '@stderr some text' + # or be a multi-line value, surrounded by """ + line = lines[self.line_idx] + pos_sp = line.find(' ') + is_multi_line = lines[self.line_idx + 1] == '"""' + is_single_line = pos_sp != -1 + if not is_single_line and not is_multi_line: + raise ParseInputError('%s value is empty (not followed by a string or a multiline """)' % line) + if is_single_line and is_multi_line: + raise ParseInputError('%s value is both single-line and followed by """' % line[:pos_sp]) + if is_multi_line and not allow_multiline: + raise ParseInputError("%s value should be single-line" % line) + + if is_single_line: + return [line[pos_sp + 1:].strip()] + + self.line_idx += 2 + s_multiline = [] + while self.line_idx < len(lines) and lines[self.line_idx] != '"""': + s_multiline.append(lines[self.line_idx]) + self.line_idx = self.line_idx + 1 + return s_multiline + + def get_compiled_fif_filename(self): + return self.artifacts_folder + "/compiled.fif" + + def get_runner_fif_filename(self): + return self.artifacts_folder + "/runner.fif" + + def run_and_check(self): + cmd_args = [TOLK_EXECUTABLE, "-o", self.get_compiled_fif_filename()] + if self.experimental_options: + cmd_args = cmd_args + ["-x", self.experimental_options] + res = subprocess.run(cmd_args + [self.tolk_filename], capture_output=True, timeout=10) + exit_code = res.returncode + stderr = str(res.stderr, "utf-8") + stdout = str(res.stdout, "utf-8") + + if exit_code == 0 and self.compilation_should_fail: + raise TolkCompilationSucceededError("compilation succeeded, but it should have failed") + + if exit_code != 0 and self.compilation_should_fail: + for should_include in self.stderr_includes: + should_include.check(stderr) + return + + if exit_code != 0 and not self.compilation_should_fail: + raise TolkCompilationFailedError("tolk exit_code = %d" % exit_code, stderr) + + with open(self.get_runner_fif_filename(), "w") as fd: + fd.write("\"%s\" include ) +target_link_libraries(tolk PUBLIC git ton_crypto_core) +if (WINGETOPT_FOUND) + target_link_libraries_system(tolk wingetopt) +endif () +if (${TOLK_DEBUG}) # -DTOLK_DEBUG=1 in CMake options => #define TOLK_DEBUG (for development purposes) + message(STATUS "TOLK_DEBUG is ON") + target_compile_definitions(tolk PRIVATE TOLK_DEBUG=1) +endif() + +if (USE_EMSCRIPTEN) + add_executable(tolkfiftlib tolk-wasm.cpp ${TOLK_SOURCE}) + target_include_directories(tolkfiftlib PUBLIC $) + target_link_libraries(tolkfiftlib PUBLIC fift-lib git) + target_link_options(tolkfiftlib PRIVATE + -sEXPORTED_RUNTIME_METHODS=FS,ccall,cwrap,UTF8ToString,stringToUTF8,lengthBytesUTF8,addFunction,removeFunction,setValue + -sEXPORTED_FUNCTIONS=_tolk_compile,_version,_malloc,_free,_setThrew + -sEXPORT_NAME=CompilerModule + -sERROR_ON_UNDEFINED_SYMBOLS=0 + -sFILESYSTEM=1 -lnodefs.js + -Oz + -sIGNORE_MISSING_MAIN=1 + -sAUTO_NATIVE_LIBRARIES=0 + -sMODULARIZE=1 + -sTOTAL_MEMORY=33554432 + -sALLOW_MEMORY_GROWTH=1 + -sALLOW_TABLE_GROWTH=1 + --embed-file ${CMAKE_CURRENT_SOURCE_DIR}/../crypto/fift/lib@/fiftlib + -fexceptions + ) + target_compile_options(tolkfiftlib PRIVATE -fexceptions -fno-stack-protector) +endif () + +install(TARGETS tolk RUNTIME DESTINATION bin) diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp new file mode 100644 index 000000000..c1add6839 --- /dev/null +++ b/tolk/abscode.cpp @@ -0,0 +1,483 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "compiler-state.h" + +namespace tolk { + +/* + * + * ABSTRACT CODE + * + */ + +TmpVar::TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc) + : v_type(_type), idx(_idx), sym_idx(sym_idx), coord(0), where(loc) { + if (!_type) { + v_type = TypeExpr::new_hole(); + } +} + +void TmpVar::set_location(SrcLocation loc) { + where = loc; +} + +void TmpVar::dump(std::ostream& os) const { + show(os); + os << " : " << v_type << " (width "; + v_type->show_width(os); + os << ")"; + if (coord > 0) { + os << " = _" << (coord >> 8) << '.' << (coord & 255); + } else if (coord < 0) { + int n = (~coord >> 8), k = (~coord & 0xff); + if (k) { + os << " = (_" << n << ".._" << (n + k - 1) << ")"; + } else { + os << " = ()"; + } + } + os << std::endl; +} + +void TmpVar::show(std::ostream& os, int omit_idx) const { + if (!is_unnamed()) { + os << G.symbols.get_name(sym_idx); + if (omit_idx >= 2) { + return; + } + } + os << '_' << idx; +} + +std::ostream& operator<<(std::ostream& os, const TmpVar& var) { + var.show(os); + return os; +} + +void VarDescr::show_value(std::ostream& os) const { + if (val & _Int) { + os << 'i'; + } + if (val & _Const) { + os << 'c'; + } + if (val & _Zero) { + os << '0'; + } + if (val & _NonZero) { + os << '!'; + } + if (val & _Pos) { + os << '>'; + } + if (val & _Neg) { + os << '<'; + } + if (val & _Even) { + os << 'E'; + } + if (val & _Odd) { + os << 'O'; + } + if (val & _Finite) { + os << 'f'; + } + if (val & _Nan) { + os << 'N'; + } + if (int_const.not_null()) { + os << '=' << int_const; + } +} + +void VarDescr::show(std::ostream& os, const char* name) const { + if (flags & _Last) { + os << '*'; + } + if (flags & _Unused) { + os << '?'; + } + if (name) { + os << name; + } + os << '_' << idx; + show_value(os); +} + +void VarDescr::set_const(long long value) { + return set_const(td::make_refint(value)); +} + +void VarDescr::set_const(td::RefInt256 value) { + int_const = std::move(value); + if (!int_const->signed_fits_bits(257)) { + int_const.write().invalidate(); + } + val = _Const | _Int; + int s = sgn(int_const); + if (s < -1) { + val |= _Nan | _NonZero; + } else if (s < 0) { + val |= _NonZero | _Neg | _Finite; + } else if (s > 0) { + val |= _NonZero | _Pos | _Finite; + } else { + val |= _Zero | _Neg | _Pos | _Finite; + } + if (val & _Finite) { + val |= int_const->get_bit(0) ? _Odd : _Even; + } +} + +void VarDescr::set_const(std::string value) { + str_const = value; + val = _Const; +} + +void VarDescr::set_const_nan() { + set_const(td::make_refint()); +} + +void VarDescr::operator|=(const VarDescr& y) { + val &= y.val; + if (is_int_const() && y.is_int_const() && cmp(int_const, y.int_const) != 0) { + val &= ~_Const; + } + if (!(val & _Const)) { + int_const.clear(); + } +} + +void VarDescr::operator&=(const VarDescr& y) { + val |= y.val; + if (y.int_const.not_null() && int_const.is_null()) { + int_const = y.int_const; + } +} + +void VarDescr::set_value(const VarDescr& y) { + val = y.val; + int_const = y.int_const; +} + +void VarDescr::set_value(VarDescr&& y) { + val = y.val; + int_const = std::move(y.int_const); +} + +void VarDescr::clear_value() { + val = 0; + int_const.clear(); +} + +void VarDescrList::show(std::ostream& os) const { + if (unreachable) { + os << " "; + } + os << "["; + for (const auto& v : list) { + os << ' ' << v; + } + os << " ]\n"; +} + +void Op::split_vars(const std::vector& vars) { + split_var_list(left, vars); + split_var_list(right, vars); + for (auto& op : block0) { + op.split_vars(vars); + } + for (auto& op : block1) { + op.split_vars(vars); + } +} + +void Op::split_var_list(std::vector& var_list, const std::vector& vars) { + int new_size = 0, changes = 0; + for (var_idx_t v : var_list) { + int c = vars.at(v).coord; + if (c < 0) { + ++changes; + new_size += (~c & 0xff); + } else { + ++new_size; + } + } + if (!changes) { + return; + } + std::vector new_var_list; + new_var_list.reserve(new_size); + for (var_idx_t v : var_list) { + int c = vars.at(v).coord; + if (c < 0) { + int n = (~c >> 8), k = (~c & 0xff); + while (k-- > 0) { + new_var_list.push_back(n++); + } + } else { + new_var_list.push_back(v); + } + } + var_list = std::move(new_var_list); +} + +void Op::show(std::ostream& os, const std::vector& vars, std::string pfx, int mode) const { + if (mode & 2) { + os << pfx << " ["; + for (const auto& v : var_info.list) { + os << ' '; + if (v.flags & VarDescr::_Last) { + os << '*'; + } + if (v.flags & VarDescr::_Unused) { + os << '?'; + } + os << vars[v.idx]; + if (mode & 4) { + os << ':'; + v.show_value(os); + } + } + os << " ]\n"; + } + std::string dis = disabled() ? " " : ""; + if (noreturn()) { + dis += " "; + } + if (impure()) { + dis += " "; + } + switch (cl) { + case _Undef: + os << pfx << dis << "???\n"; + break; + case _Nop: + os << pfx << dis << "NOP\n"; + break; + case _Call: + os << pfx << dis << "CALL: "; + show_var_list(os, left, vars); + os << " := " << (fun_ref ? fun_ref->name() : "(null)") << " "; + if ((mode & 4) && args.size() == right.size()) { + show_var_list(os, args, vars); + } else { + show_var_list(os, right, vars); + } + os << std::endl; + break; + case _CallInd: + os << pfx << dis << "CALLIND: "; + show_var_list(os, left, vars); + os << " := EXEC "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _Let: + os << pfx << dis << "LET "; + show_var_list(os, left, vars); + os << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _Tuple: + os << pfx << dis << "MKTUPLE "; + show_var_list(os, left, vars); + os << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _UnTuple: + os << pfx << dis << "UNTUPLE "; + show_var_list(os, left, vars); + os << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _IntConst: + os << pfx << dis << "CONST "; + show_var_list(os, left, vars); + os << " := " << int_const << std::endl; + break; + case _SliceConst: + os << pfx << dis << "SCONST "; + show_var_list(os, left, vars); + os << " := " << str_const << std::endl; + break; + case _Import: + os << pfx << dis << "IMPORT "; + show_var_list(os, left, vars); + os << std::endl; + break; + case _Return: + os << pfx << dis << "RETURN "; + show_var_list(os, left, vars); + os << std::endl; + break; + case _GlobVar: + os << pfx << dis << "GLOBVAR "; + show_var_list(os, left, vars); + os << " := " << (fun_ref ? fun_ref->name() : "(null)") << std::endl; + break; + case _SetGlob: + os << pfx << dis << "SETGLOB "; + os << (fun_ref ? fun_ref->name() : "(null)") << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _Repeat: + os << pfx << dis << "REPEAT "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << std::endl; + break; + case _If: + os << pfx << dis << "IF "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << " ELSE "; + show_block(os, block1.get(), vars, pfx, mode); + os << std::endl; + break; + case _While: + os << pfx << dis << "WHILE "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << " DO "; + show_block(os, block1.get(), vars, pfx, mode); + os << std::endl; + break; + case _Until: + os << pfx << dis << "UNTIL "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << std::endl; + break; + case _Again: + os << pfx << dis << "AGAIN "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << std::endl; + break; + default: + os << pfx << dis << " "; + show_var_list(os, left, vars); + os << " -- "; + show_var_list(os, right, vars); + os << std::endl; + break; + } +} + +void Op::show_var_list(std::ostream& os, const std::vector& idx_list, + const std::vector& vars) const { + if (!idx_list.size()) { + os << "()"; + } else if (idx_list.size() == 1) { + os << vars.at(idx_list[0]); + } else { + os << "(" << vars.at(idx_list[0]); + for (std::size_t i = 1; i < idx_list.size(); i++) { + os << "," << vars.at(idx_list[i]); + } + os << ")"; + } +} + +void Op::show_var_list(std::ostream& os, const std::vector& list, const std::vector& vars) const { + auto n = list.size(); + if (!n) { + os << "()"; + } else { + os << "( "; + for (std::size_t i = 0; i < list.size(); i++) { + if (i) { + os << ", "; + } + if (list[i].is_unused()) { + os << '?'; + } + os << vars.at(list[i].idx) << ':'; + list[i].show_value(os); + } + os << " )"; + } +} + +void Op::show_block(std::ostream& os, const Op* block, const std::vector& vars, std::string pfx, int mode) { + os << "{" << std::endl; + std::string pfx2 = pfx + " "; + for (const Op& op : block) { + op.show(os, vars, pfx2, mode); + } + os << pfx << "}"; +} + +std::ostream& operator<<(std::ostream& os, const CodeBlob& code) { + code.print(os); + return os; +} + +// flags: +1 = show variable definition locations; +2 = show vars after each op; +4 = show var abstract value info after each op; +8 = show all variables at start +void CodeBlob::print(std::ostream& os, int flags) const { + os << "CODE BLOB: " << var_cnt << " variables, " << in_var_cnt << " input\n"; + if ((flags & 8) != 0) { + for (const auto& var : vars) { + var.dump(os); + if (var.where.is_defined() && (flags & 1) != 0) { + var.where.show(os); + os << " defined here:\n"; + var.where.show_context(os); + } + } + } + os << "------- BEGIN --------\n"; + for (const auto& op : ops) { + op.show(os, vars, "", flags); + } + os << "-------- END ---------\n\n"; +} + +var_idx_t CodeBlob::create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation location) { + vars.emplace_back(var_cnt, var_type, sym_idx, location); + return var_cnt++; +} + +bool CodeBlob::import_params(FormalArgList arg_list) { + if (var_cnt || in_var_cnt) { + return false; + } + std::vector list; + for (const auto& par : arg_list) { + TypeExpr* arg_type; + SymDef* arg_sym; + SrcLocation arg_loc; + std::tie(arg_type, arg_sym, arg_loc) = par; + list.push_back(create_var(arg_type, arg_sym ? arg_sym->sym_idx : 0, arg_loc)); + } + emplace_back(loc, Op::_Import, list); + in_var_cnt = var_cnt; + return true; +} + +} // namespace tolk diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp new file mode 100644 index 000000000..719df9b7d --- /dev/null +++ b/tolk/analyzer.cpp @@ -0,0 +1,947 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "compiler-state.h" + +namespace tolk { + +/* + * + * ANALYZE AND PREPROCESS ABSTRACT CODE + * + */ + +void CodeBlob::simplify_var_types() { + for (TmpVar& var : vars) { + TypeExpr::remove_indirect(var.v_type); + var.v_type->recompute_width(); + } +} + +int CodeBlob::split_vars(bool strict) { + int n = var_cnt, changes = 0; + for (int j = 0; j < var_cnt; j++) { + TmpVar& var = vars[j]; + if (strict && var.v_type->minw != var.v_type->maxw) { + throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"}; + } + std::vector comp_types; + int k = var.v_type->extract_components(comp_types); + tolk_assert(k <= 254 && n <= 0x7fff00); + tolk_assert((unsigned)k == comp_types.size()); + if (k != 1) { + var.coord = ~((n << 8) + k); + for (int i = 0; i < k; i++) { + auto v = create_var(comp_types[i], vars[j].sym_idx, vars[j].where); + tolk_assert(v == n + i); + tolk_assert(vars[v].idx == v); + vars[v].coord = ((int)j << 8) + i + 1; + } + n += k; + ++changes; + } else if (strict && var.v_type->minw != 1) { + throw ParseError{var.where, + "cannot work with variable or variable component of width greater than one"}; + } + } + if (!changes) { + return 0; + } + for (auto& op : ops) { + op.split_vars(vars); + } + return changes; +} + +bool CodeBlob::compute_used_code_vars() { + VarDescrList empty_var_info; + return compute_used_code_vars(ops, empty_var_info, true); +} + +bool CodeBlob::compute_used_code_vars(std::unique_ptr& ops_ptr, const VarDescrList& var_info, bool edit) const { + tolk_assert(ops_ptr); + if (!ops_ptr->next) { + tolk_assert(ops_ptr->cl == Op::_Nop); + return ops_ptr->set_var_info(var_info); + } + // here and below, bitwise | (not logical ||) are used to execute both left and right parts + return static_cast(compute_used_code_vars(ops_ptr->next, var_info, edit)) | + static_cast(ops_ptr->compute_used_vars(*this, edit)); +} + +bool operator==(const VarDescrList& x, const VarDescrList& y) { + if (x.size() != y.size()) { + return false; + } + for (std::size_t i = 0; i < x.size(); i++) { + if (x.list[i].idx != y.list[i].idx || x.list[i].flags != y.list[i].flags) { + return false; + } + } + return true; +} + +bool same_values(const VarDescr& x, const VarDescr& y) { + if (x.val != y.val || x.int_const.is_null() != y.int_const.is_null()) { + return false; + } + if (x.int_const.not_null() && cmp(x.int_const, y.int_const) != 0) { + return false; + } + return true; +} + +bool same_values(const VarDescrList& x, const VarDescrList& y) { + if (x.size() != y.size()) { + return false; + } + for (std::size_t i = 0; i < x.size(); i++) { + if (x.list[i].idx != y.list[i].idx || !same_values(x.list[i], y.list[i])) { + return false; + } + } + return true; +} + +bool Op::set_var_info(const VarDescrList& new_var_info) { + if (var_info == new_var_info) { + return false; + } + var_info = new_var_info; + return true; +} + +bool Op::set_var_info(VarDescrList&& new_var_info) { + if (var_info == new_var_info) { + return false; + } + var_info = std::move(new_var_info); + return true; +} + +bool Op::set_var_info_except(const VarDescrList& new_var_info, const std::vector& var_list) { + if (!var_list.size()) { + return set_var_info(new_var_info); + } + VarDescrList tmp_info{new_var_info}; + tmp_info -= var_list; + return set_var_info(tmp_info); +} + +bool Op::set_var_info_except(VarDescrList&& new_var_info, const std::vector& var_list) { + if (var_list.size()) { + new_var_info -= var_list; + } + return set_var_info(std::move(new_var_info)); +} +std::vector sort_unique_vars(const std::vector& var_list) { + std::vector vars{var_list}, unique_vars; + std::sort(vars.begin(), vars.end()); + vars.erase(std::unique(vars.begin(), vars.end()), vars.end()); + return vars; +} + +VarDescr* VarDescrList::operator[](var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + return it != list.end() && it->idx == idx ? &*it : nullptr; +} + +const VarDescr* VarDescrList::operator[](var_idx_t idx) const { + auto it = std::lower_bound(list.begin(), list.end(), idx); + return it != list.end() && it->idx == idx ? &*it : nullptr; +} + +std::size_t VarDescrList::count(const std::vector idx_list) const { + std::size_t res = 0; + for (var_idx_t idx : idx_list) { + if (operator[](idx)) { + ++res; + } + } + return res; +} + +std::size_t VarDescrList::count_used(const std::vector idx_list) const { + std::size_t res = 0; + for (var_idx_t idx : idx_list) { + auto v = operator[](idx); + if (v && !v->is_unused()) { + ++res; + } + } + return res; +} + +VarDescrList& VarDescrList::operator-=(var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it != list.end() && it->idx == idx) { + list.erase(it); + } + return *this; +} + +VarDescrList& VarDescrList::operator-=(const std::vector& idx_list) { + for (var_idx_t idx : idx_list) { + *this -= idx; + } + return *this; +} + +VarDescrList& VarDescrList::add_var(var_idx_t idx, bool unused) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it == list.end() || it->idx != idx) { + list.emplace(it, idx, VarDescr::_Last | (unused ? VarDescr::_Unused : 0)); + } else if (it->is_unused() && !unused) { + it->clear_unused(); + } + return *this; +} + +VarDescrList& VarDescrList::add_vars(const std::vector& idx_list, bool unused) { + for (var_idx_t idx : idx_list) { + add_var(idx, unused); + } + return *this; +} + +VarDescr& VarDescrList::add(var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it == list.end() || it->idx != idx) { + it = list.emplace(it, idx); + } + return *it; +} + +VarDescr& VarDescrList::add_newval(var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it == list.end() || it->idx != idx) { + return *list.emplace(it, idx); + } else { + it->clear_value(); + return *it; + } +} + +VarDescrList& VarDescrList::clear_last() { + for (auto& var : list) { + if (var.flags & VarDescr::_Last) { + var.flags &= ~VarDescr::_Last; + } + } + return *this; +} + +VarDescrList VarDescrList::operator+(const VarDescrList& y) const { + VarDescrList res; + auto it1 = list.cbegin(); + auto it2 = y.list.cbegin(); + while (it1 != list.cend() && it2 != y.list.cend()) { + if (it1->idx < it2->idx) { + res.list.push_back(*it1++); + } else if (it1->idx > it2->idx) { + res.list.push_back(*it2++); + } else { + res.list.push_back(*it1++); + res.list.back() += *it2++; + } + } + while (it1 != list.cend()) { + res.list.push_back(*it1++); + } + while (it2 != y.list.cend()) { + res.list.push_back(*it2++); + } + return res; +} + +VarDescrList& VarDescrList::operator+=(const VarDescrList& y) { + return *this = *this + y; +} + +VarDescrList VarDescrList::operator|(const VarDescrList& y) const { + if (y.unreachable) { + return *this; + } + if (unreachable) { + return y; + } + VarDescrList res; + auto it1 = list.cbegin(); + auto it2 = y.list.cbegin(); + while (it1 != list.cend() && it2 != y.list.cend()) { + if (it1->idx < it2->idx) { + it1++; + } else if (it1->idx > it2->idx) { + it2++; + } else { + res.list.push_back(*it1++); + res.list.back() |= *it2++; + } + } + return res; +} + +VarDescrList& VarDescrList::operator|=(const VarDescrList& y) { + if (y.unreachable) { + return *this; + } else { + return *this = *this | y; + } +} + +VarDescrList& VarDescrList::operator&=(const VarDescrList& values) { + for (const VarDescr& vd : values.list) { + VarDescr* item = operator[](vd.idx); + if (item) { + *item &= vd; + } + } + unreachable |= values.unreachable; + return *this; +} + +VarDescrList& VarDescrList::import_values(const VarDescrList& values) { + if (values.unreachable) { + set_unreachable(); + } else + for (auto& vd : list) { + auto new_vd = values[vd.idx]; + if (new_vd) { + vd.set_value(*new_vd); + } else { + vd.clear_value(); + } + } + return *this; +} + +bool Op::std_compute_used_vars(bool disabled) { + // left = OP right + // var_info := (var_info - left) + right + VarDescrList new_var_info{next->var_info}; + new_var_info -= left; + new_var_info.clear_last(); + if (args.size() == right.size() && !disabled) { + for (const VarDescr& arg : args) { + new_var_info.add_var(arg.idx, arg.is_unused()); + } + } else { + new_var_info.add_vars(right, disabled); + } + return set_var_info(std::move(new_var_info)); +} + +bool Op::compute_used_vars(const CodeBlob& code, bool edit) { + tolk_assert(next); + const VarDescrList& next_var_info = next->var_info; + if (cl == _Nop) { + return set_var_info_except(next_var_info, left); + } + switch (cl) { + case _IntConst: + case _SliceConst: + case _GlobVar: + case _Call: + case _CallInd: + case _Tuple: + case _UnTuple: { + // left = EXEC right; + if (!next_var_info.count_used(left) && !impure()) { + // all variables in `left` are not needed + if (edit) { + set_disabled(); + } + return std_compute_used_vars(true); + } + return std_compute_used_vars(); + } + case _SetGlob: { + // GLOB = right + if (right.empty() && edit) { + set_disabled(); + } + return std_compute_used_vars(right.empty()); + } + case _Let: { + // left = right + std::size_t cnt = next_var_info.count_used(left); + tolk_assert(left.size() == right.size()); + auto l_it = left.cbegin(), r_it = right.cbegin(); + VarDescrList new_var_info{next_var_info}; + new_var_info -= left; + new_var_info.clear_last(); + std::vector new_left, new_right; + for (; l_it < left.cend(); ++l_it, ++r_it) { + if (std::find(l_it + 1, left.cend(), *l_it) == left.cend()) { + auto p = next_var_info[*l_it]; + new_var_info.add_var(*r_it, edit && (!p || p->is_unused())); + new_left.push_back(*l_it); + new_right.push_back(*r_it); + } + } + if (new_left.size() < left.size()) { + left = std::move(new_left); + right = std::move(new_right); + } + if (!cnt && edit) { + // all variables in `left` are not needed + set_disabled(); + } + return set_var_info(std::move(new_var_info)); + } + case _Return: { + // return left + if (var_info.count(left) == left.size()) { + return false; + } + std::vector unique_vars = sort_unique_vars(left); + var_info.list.clear(); + for (var_idx_t i : unique_vars) { + var_info.list.emplace_back(i, VarDescr::_Last); + } + return true; + } + case _Import: { + // import left + std::vector unique_vars = sort_unique_vars(left); + var_info.list.clear(); + for (var_idx_t i : unique_vars) { + var_info.list.emplace_back(i, next_var_info[i] ? 0 : VarDescr::_Last); + } + return true; + } + case _If: { + // if (left) then block0 else block1 + // VarDescrList nx_var_info = next_var_info; + // nx_var_info.clear_last(); + code.compute_used_code_vars(block0, next_var_info, edit); + VarDescrList merge_info; + if (block1) { + code.compute_used_code_vars(block1, next_var_info, edit); + merge_info = block0->var_info + block1->var_info; + } else { + merge_info = block0->var_info + next_var_info; + } + merge_info.clear_last(); + merge_info += left; + return set_var_info(std::move(merge_info)); + } + case _While: { + // while (block0 || left) block1; + // ... block0 left { block1 block0 left } next + VarDescrList new_var_info{next_var_info}; + bool changes = false; + do { + VarDescrList after_cond{new_var_info}; + after_cond += left; + code.compute_used_code_vars(block0, after_cond, changes); + code.compute_used_code_vars(block1, block0->var_info, changes); + std::size_t n = new_var_info.size(); + new_var_info += block1->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + new_var_info += left; + code.compute_used_code_vars(block0, new_var_info, edit); + return set_var_info(block0->var_info); + } + case _Until: { + // until (block0 || left); + // .. { block0 left } block0 left next + VarDescrList after_cond_first{next_var_info}; + after_cond_first += left; + code.compute_used_code_vars(block0, after_cond_first, false); + VarDescrList new_var_info{block0->var_info}; + bool changes = false; + do { + VarDescrList after_cond{new_var_info}; + after_cond += next_var_info; + after_cond += left; + code.compute_used_code_vars(block0, after_cond, changes); + std::size_t n = new_var_info.size(); + new_var_info += block0->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + return set_var_info(std::move(new_var_info) + next_var_info); + } + case _Repeat: { + // repeat (left) block0 + // left { block0 } next + VarDescrList new_var_info{next_var_info}; + bool changes = false; + do { + code.compute_used_code_vars(block0, new_var_info, changes); + std::size_t n = new_var_info.size(); + new_var_info += block0->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + tolk_assert(left.size() == 1); + bool last = new_var_info.count_used(left) == 0; + new_var_info += left; + if (last) { + new_var_info[left[0]]->flags |= VarDescr::_Last; + } + return set_var_info(std::move(new_var_info)); + } + case _Again: { + // for(;;) block0 + // { block0 } + VarDescrList new_var_info; + bool changes = false; + do { + code.compute_used_code_vars(block0, new_var_info, changes); + std::size_t n = new_var_info.size(); + new_var_info += block0->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + return set_var_info(std::move(new_var_info)); + } + case _TryCatch: { + code.compute_used_code_vars(block0, next_var_info, edit); + code.compute_used_code_vars(block1, next_var_info, edit); + VarDescrList merge_info = block0->var_info + block1->var_info + next_var_info; + merge_info -= left; + merge_info.clear_last(); + return set_var_info(std::move(merge_info)); + } + default: + std::cerr << "fatal: unknown operation in compute_used_vars()\n"; + throw ParseError{where, "unknown operation"}; + } +} + +bool prune_unreachable(std::unique_ptr& ops) { + if (!ops) { + return true; + } + Op& op = *ops; + if (op.cl == Op::_Nop) { + if (op.next) { + ops = std::move(op.next); + return prune_unreachable(ops); + } + return true; + } + bool reach; + switch (op.cl) { + case Op::_IntConst: + case Op::_SliceConst: + case Op::_GlobVar: + case Op::_SetGlob: + case Op::_Call: + case Op::_CallInd: + case Op::_Tuple: + case Op::_UnTuple: + case Op::_Import: + reach = true; + break; + case Op::_Let: { + reach = true; + break; + } + case Op::_Return: + reach = false; + break; + case Op::_If: { + // if left then block0 else block1; ... + VarDescr* c_var = op.var_info[op.left[0]]; + if (c_var && c_var->always_true()) { + op.block0->last().next = std::move(op.next); + ops = std::move(op.block0); + return prune_unreachable(ops); + } else if (c_var && c_var->always_false()) { + op.block1->last().next = std::move(op.next); + ops = std::move(op.block1); + return prune_unreachable(ops); + } else { + reach = static_cast(prune_unreachable(op.block0)) | static_cast(prune_unreachable(op.block1)); + } + break; + } + case Op::_While: { + // while (block0 || left) block1; + if (!prune_unreachable(op.block0)) { + // computation of block0 never returns + ops = std::move(op.block0); + return prune_unreachable(ops); + } + VarDescr* c_var = op.block0->last().var_info[op.left[0]]; + if (c_var && c_var->always_false()) { + // block1 never executed + op.block0->last().next = std::move(op.next); + ops = std::move(op.block0); + return prune_unreachable(ops); + } else if (c_var && c_var->always_true()) { + if (!prune_unreachable(op.block1)) { + // block1 never returns + op.block0->last().next = std::move(op.block1); + ops = std::move(op.block0); + return false; + } + // infinite loop + op.cl = Op::_Again; + op.block0->last().next = std::move(op.block1); + op.left.clear(); + reach = false; + } else { + if (!prune_unreachable(op.block1)) { + // block1 never returns, while equivalent to block0 ; if left then block1 else next + op.cl = Op::_If; + std::unique_ptr new_op = std::move(op.block0); + op.block0 = std::move(op.block1); + op.block1 = std::make_unique(op.next->where, Op::_Nop); + new_op->last().next = std::move(ops); + ops = std::move(new_op); + } + reach = true; // block1 may be never executed + } + break; + } + case Op::_Repeat: { + // repeat (left) block0 + VarDescr* c_var = op.var_info[op.left[0]]; + if (c_var && c_var->always_nonpos()) { + // loop never executed + ops = std::move(op.next); + return prune_unreachable(ops); + } + if (c_var && c_var->always_pos()) { + if (!prune_unreachable(op.block0)) { + // block0 executed at least once, and it never returns + // replace code with block0 + ops = std::move(op.block0); + return false; + } + } else { + prune_unreachable(op.block0); + } + reach = true; + break; + } + case Op::_Until: + case Op::_Again: { + // do block0 until left; ... + if (!prune_unreachable(op.block0)) { + // block0 never returns, replace loop by block0 + ops = std::move(op.block0); + return false; + } + reach = (op.cl != Op::_Again); + break; + } + case Op::_TryCatch: { + reach = static_cast(prune_unreachable(op.block0)) | static_cast(prune_unreachable(op.block1)); + break; + } + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{op.where, "unknown operation in prune_unreachable()"}; + } + if (reach) { + return prune_unreachable(op.next); + } else { + while (op.next->next) { + op.next = std::move(op.next->next); + } + return false; + } +} + +void CodeBlob::prune_unreachable_code() { + if (prune_unreachable(ops)) { + throw ParseError{loc, "control reaches end of function"}; + } +} + +void CodeBlob::fwd_analyze() { + VarDescrList values; + tolk_assert(ops && ops->cl == Op::_Import); + for (var_idx_t i : ops->left) { + values += i; + if (vars[i].v_type->is_int()) { + values[i]->val |= VarDescr::_Int; + } + } + ops->fwd_analyze(values); +} + +void Op::prepare_args(VarDescrList values) { + if (args.size() != right.size()) { + args.clear(); + for (var_idx_t i : right) { + args.emplace_back(i); + } + } + for (std::size_t i = 0; i < right.size(); i++) { + const VarDescr* val = values[right[i]]; + if (val) { + args[i].set_value(*val); + // args[i].clear_unused(); + } else { + args[i].clear_value(); + } + args[i].clear_unused(); + } +} + +VarDescrList Op::fwd_analyze(VarDescrList values) { + var_info.import_values(values); + switch (cl) { + case _Nop: + case _Import: + break; + case _Return: + values.set_unreachable(); + break; + case _IntConst: { + values.add_newval(left[0]).set_const(int_const); + break; + } + case _SliceConst: { + values.add_newval(left[0]).set_const(str_const); + break; + } + case _Call: { + prepare_args(values); + auto func = dynamic_cast(fun_ref->value); + if (func) { + std::vector res; + res.reserve(left.size()); + for (var_idx_t i : left) { + res.emplace_back(i); + } + AsmOpList tmp; + func->compile(tmp, res, args, where); // abstract interpretation of res := f (args) + int j = 0; + for (var_idx_t i : left) { + values.add_newval(i).set_value(res[j++]); + } + } else { + for (var_idx_t i : left) { + values.add_newval(i); + } + } + break; + } + case _Tuple: + case _UnTuple: + case _GlobVar: + case _CallInd: { + for (var_idx_t i : left) { + values.add_newval(i); + } + break; + } + case _SetGlob: + break; + case _Let: { + std::vector old_val; + tolk_assert(left.size() == right.size()); + for (std::size_t i = 0; i < right.size(); i++) { + const VarDescr* ov = values[right[i]]; + if (!ov && G.is_verbosity(5)) { + std::cerr << "FATAL: error in assignment at right component #" << i << " (no value for _" << right[i] << ")" + << std::endl; + for (auto x : left) { + std::cerr << '_' << x << " "; + } + std::cerr << "= "; + for (auto x : right) { + std::cerr << '_' << x << " "; + } + std::cerr << std::endl; + } + // tolk_assert(ov); + if (ov) { + old_val.push_back(*ov); + } else { + old_val.emplace_back(); + } + } + for (std::size_t i = 0; i < left.size(); i++) { + values.add_newval(left[i]).set_value(std::move(old_val[i])); + } + break; + } + case _If: { + VarDescrList val1 = block0->fwd_analyze(values); + VarDescrList val2 = block1 ? block1->fwd_analyze(std::move(values)) : std::move(values); + values = val1 | val2; + break; + } + case _Repeat: { + bool atl1 = (values[left[0]] && values[left[0]]->always_pos()); + VarDescrList next_values = block0->fwd_analyze(values); + while (true) { + VarDescrList new_values = values | next_values; + if (same_values(new_values, values)) { + break; + } + values = std::move(new_values); + next_values = block0->fwd_analyze(values); + } + if (atl1) { + values = std::move(next_values); + } + break; + } + case _While: { + auto values0 = values; + values = block0->fwd_analyze(values); + if (values[left[0]] && values[left[0]]->always_false()) { + // block1 never executed + block1->fwd_analyze(values); + break; + } + while (true) { + VarDescrList next_values = values | block0->fwd_analyze(values0 | block1->fwd_analyze(values)); + if (same_values(next_values, values)) { + break; + } + values = std::move(next_values); + } + break; + } + case _Until: + case _Again: { + while (true) { + VarDescrList next_values = values | block0->fwd_analyze(values); + if (same_values(next_values, values)) { + break; + } + values = std::move(next_values); + } + values = block0->fwd_analyze(values); + break; + } + case _TryCatch: { + VarDescrList val1 = block0->fwd_analyze(values); + VarDescrList val2 = block1->fwd_analyze(std::move(values)); + values = val1 | val2; + break; + } + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{where, "unknown operation in fwd_analyze()"}; + } + if (next) { + return next->fwd_analyze(std::move(values)); + } else { + return values; + } +} + +void Op::set_disabled(bool flag) { + if (flag) { + flags |= _Disabled; + } else { + flags &= ~_Disabled; + } +} + + +bool Op::set_noreturn(bool flag) { + if (flag) { + flags |= _NoReturn; + } else { + flags &= ~_NoReturn; + } + return flag; +} + +void Op::set_impure(const CodeBlob &code) { + // todo calling this function with `code` is a bad design (flags are assigned after Op is constructed) + // later it's better to check this somewhere in code.emplace_back() + if (code.flags & CodeBlob::_ForbidImpure) { + throw ParseError(where, "an impure operation in a pure function"); + } + flags |= _Impure; +} + +void Op::set_impure(const CodeBlob &code, bool flag) { + if (flag) { + if (code.flags & CodeBlob::_ForbidImpure) { + throw ParseError(where, "an impure operation in a pure function"); + } + flags |= _Impure; + } else { + flags &= ~_Impure; + } +} + + +bool Op::mark_noreturn() { + switch (cl) { + case _Nop: + if (!next) { + return set_noreturn(false); + } + // fallthrough + case _Import: + case _IntConst: + case _SliceConst: + case _Let: + case _Tuple: + case _UnTuple: + case _SetGlob: + case _GlobVar: + case _CallInd: + case _Call: + return set_noreturn(next->mark_noreturn()); + case _Return: + return set_noreturn(); + case _If: + case _TryCatch: + // note, that & | (not && ||) here and below is mandatory to invoke both left and right calls + return set_noreturn((static_cast(block0->mark_noreturn()) & static_cast(block1 && block1->mark_noreturn())) | static_cast(next->mark_noreturn())); + case _Again: + block0->mark_noreturn(); + return set_noreturn(); + case _Until: + return set_noreturn(static_cast(block0->mark_noreturn()) | static_cast(next->mark_noreturn())); + case _While: + block1->mark_noreturn(); + return set_noreturn(static_cast(block0->mark_noreturn()) | static_cast(next->mark_noreturn())); + case _Repeat: + block0->mark_noreturn(); + return set_noreturn(next->mark_noreturn()); + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{where, "unknown operation in mark_noreturn()"}; + } +} + +void CodeBlob::mark_noreturn() { + ops->mark_noreturn(); +} + +} // namespace tolk diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp new file mode 100644 index 000000000..8db75091b --- /dev/null +++ b/tolk/asmops.cpp @@ -0,0 +1,375 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include + +namespace tolk { + +/* + * + * ASM-OP LIST FUNCTIONS + * + */ + +int is_pos_pow2(td::RefInt256 x) { + if (sgn(x) > 0 && !sgn(x & (x - 1))) { + return x->bit_size(false) - 1; + } else { + return -1; + } +} + +int is_neg_pow2(td::RefInt256 x) { + return sgn(x) < 0 ? is_pos_pow2(-x) : 0; +} + +std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg) { + int i = stack_reg.idx; + if (i >= 0) { + if (i < 16) { + return os << 's' << i; + } else { + return os << i << " s()"; + } + } else if (i >= -2) { + return os << "s(" << i << ')'; + } else { + return os << i << " s()"; + } +} + +AsmOp AsmOp::Const(int arg, std::string push_op, td::RefInt256 origin) { + std::ostringstream os; + os << arg << ' ' << push_op; + return AsmOp::Const(os.str(), origin); +} + +AsmOp AsmOp::make_stk2(int a, int b, const char* str, int delta) { + std::ostringstream os; + os << SReg(a) << ' ' << SReg(b) << ' ' << str; + int c = std::max(a, b) + 1; + return AsmOp::Custom(os.str(), c, c + delta); +} + +AsmOp AsmOp::make_stk3(int a, int b, int c, const char* str, int delta) { + std::ostringstream os; + os << SReg(a) << ' ' << SReg(b) << ' ' << SReg(c) << ' ' << str; + int m = std::max(a, std::max(b, c)) + 1; + return AsmOp::Custom(os.str(), m, m + delta); +} + +AsmOp AsmOp::BlkSwap(int a, int b) { + std::ostringstream os; + if (a == 1 && b == 1) { + return AsmOp::Xchg(0, 1); + } else if (a == 1) { + if (b == 2) { + os << "ROT"; + } else { + os << b << " ROLL"; + } + } else if (b == 1) { + if (a == 2) { + os << "-ROT"; + } else { + os << a << " -ROLL"; + } + } else { + os << a << " " << b << " BLKSWAP"; + } + return AsmOp::Custom(os.str(), a + b, a + b); +} + +AsmOp AsmOp::BlkPush(int a, int b) { + std::ostringstream os; + if (a == 1) { + return AsmOp::Push(b); + } else if (a == 2 && b == 1) { + os << "2DUP"; + } else { + os << a << " " << b << " BLKPUSH"; + } + return AsmOp::Custom(os.str(), b + 1, a + b + 1); +} + +AsmOp AsmOp::BlkDrop(int a) { + std::ostringstream os; + if (a == 1) { + return AsmOp::Pop(); + } else if (a == 2) { + os << "2DROP"; + } else { + os << a << " BLKDROP"; + } + return AsmOp::Custom(os.str(), a, 0); +} + +AsmOp AsmOp::BlkDrop2(int a, int b) { + if (!b) { + return BlkDrop(a); + } + std::ostringstream os; + os << a << " " << b << " BLKDROP2"; + return AsmOp::Custom(os.str(), a + b, b); +} + +AsmOp AsmOp::BlkReverse(int a, int b) { + std::ostringstream os; + os << a << " " << b << " REVERSE"; + return AsmOp::Custom(os.str(), a + b, a + b); +} + +AsmOp AsmOp::Tuple(int a) { + switch (a) { + case 1: + return AsmOp::Custom("SINGLE", 1, 1); + case 2: + return AsmOp::Custom("PAIR", 2, 1); + case 3: + return AsmOp::Custom("TRIPLE", 3, 1); + } + std::ostringstream os; + os << a << " TUPLE"; + return AsmOp::Custom(os.str(), a, 1); +} + +AsmOp AsmOp::UnTuple(int a) { + switch (a) { + case 1: + return AsmOp::Custom("UNSINGLE", 1, 1); + case 2: + return AsmOp::Custom("UNPAIR", 1, 2); + case 3: + return AsmOp::Custom("UNTRIPLE", 1, 3); + } + std::ostringstream os; + os << a << " UNTUPLE"; + return AsmOp::Custom(os.str(), 1, a); +} + +AsmOp AsmOp::IntConst(td::RefInt256 x) { + if (x->signed_fits_bits(8)) { + return AsmOp::Const(dec_string(x) + " PUSHINT", x); + } + if (!x->is_valid()) { + return AsmOp::Const("PUSHNAN", x); + } + int k = is_pos_pow2(x); + if (k >= 0) { + return AsmOp::Const(k, "PUSHPOW2", x); + } + k = is_pos_pow2(x + 1); + if (k >= 0) { + return AsmOp::Const(k, "PUSHPOW2DEC", x); + } + k = is_pos_pow2(-x); + if (k >= 0) { + return AsmOp::Const(k, "PUSHNEGPOW2", x); + } + if (!x->mod_pow2_short(23)) { + return AsmOp::Const(dec_string(x) + " PUSHINTX", x); + } + return AsmOp::Const(dec_string(x) + " PUSHINT", x); +} + +AsmOp AsmOp::BoolConst(bool f) { + return AsmOp::Const(f ? "TRUE" : "FALSE"); +} + +AsmOp AsmOp::Parse(std::string custom_op) { + if (custom_op == "NOP") { + return AsmOp::Nop(); + } else if (custom_op == "SWAP") { + return AsmOp::Xchg(1); + } else if (custom_op == "DROP") { + return AsmOp::Pop(0); + } else if (custom_op == "NIP") { + return AsmOp::Pop(1); + } else if (custom_op == "DUP") { + return AsmOp::Push(0); + } else if (custom_op == "OVER") { + return AsmOp::Push(1); + } else { + return AsmOp::Custom(custom_op); + } +} + +AsmOp AsmOp::Parse(std::string custom_op, int args, int retv) { + auto res = Parse(custom_op); + if (res.is_custom()) { + res.a = args; + res.b = retv; + } + return res; +} + +void AsmOp::out(std::ostream& os) const { + if (!op.empty()) { + os << op; + return; + } + switch (t) { + case a_none: + break; + case a_xchg: + if (!a && !(b & -2)) { + os << (b ? "SWAP" : "NOP"); + break; + } + os << SReg(a) << ' ' << SReg(b) << " XCHG"; + break; + case a_push: + if (!(a & -2)) { + os << (a ? "OVER" : "DUP"); + break; + } + os << SReg(a) << " PUSH"; + break; + case a_pop: + if (!(a & -2)) { + os << (a ? "NIP" : "DROP"); + break; + } + os << SReg(a) << " POP"; + break; + default: + throw Fatal{"unknown assembler operation"}; + } +} + +void AsmOp::out_indent_nl(std::ostream& os, bool no_eol) const { + for (int i = 0; i < indent; i++) { + os << " "; + } + out(os); + if (!no_eol) { + os << std::endl; + } +} + +std::string AsmOp::to_string() const { + if (!op.empty()) { + return op; + } else { + std::ostringstream os; + out(os); + return os.str(); + } +} + +bool AsmOpList::append(const std::vector& ops) { + for (const auto& op : ops) { + if (!append(op)) { + return false; + } + } + return true; +} + +const_idx_t AsmOpList::register_const(Const new_const) { + if (new_const.is_null()) { + return not_const; + } + unsigned idx; + for (idx = 0; idx < constants_.size(); idx++) { + if (!td::cmp(new_const, constants_[idx])) { + return idx; + } + } + constants_.push_back(std::move(new_const)); + return (const_idx_t)idx; +} + +Const AsmOpList::get_const(const_idx_t idx) { + if ((unsigned)idx < constants_.size()) { + return constants_[idx]; + } else { + return {}; + } +} + +void AsmOpList::show_var(std::ostream& os, var_idx_t idx) const { + if (!var_names_ || (unsigned)idx >= var_names_->size()) { + os << '_' << idx; + } else { + var_names_->at(idx).show(os, 2); + } +} + +void AsmOpList::show_var_ext(std::ostream& os, std::pair idx_pair) const { + auto i = idx_pair.first; + auto j = idx_pair.second; + if (!var_names_ || (unsigned)i >= var_names_->size()) { + os << '_' << i; + } else { + var_names_->at(i).show(os, 2); + // if (!var_names_->at(i).v_type->is_int()) { + // os << '<'; var_names_->at(i).v_type->print(os); os << '>'; + // } + } + if ((unsigned)j < constants_.size() && constants_[j].not_null()) { + os << '=' << constants_[j]; + } +} + +void AsmOpList::out(std::ostream& os, int mode) const { + if (!(mode & 2)) { + for (const auto& op : list_) { + op.out_indent_nl(os); + } + } else { + std::size_t n = list_.size(); + for (std::size_t i = 0; i < n; i++) { + const auto& op = list_[i]; + if (!op.is_comment() && i + 1 < n && list_[i + 1].is_comment()) { + op.out_indent_nl(os, true); + os << '\t'; + do { + i++; + } while (i + 1 < n && list_[i + 1].is_comment()); + list_[i].out(os); + os << std::endl; + } else { + op.out_indent_nl(os, false); + } + } + } +} + +bool apply_op(StackTransform& trans, const AsmOp& op) { + if (!trans.is_valid()) { + return false; + } + switch (op.t) { + case AsmOp::a_none: + return true; + case AsmOp::a_xchg: + return trans.apply_xchg(op.a, op.b, true); + case AsmOp::a_push: + return trans.apply_push(op.a); + case AsmOp::a_pop: + return trans.apply_pop(op.a); + case AsmOp::a_const: + return !op.a && op.b == 1 && trans.apply_push_newconst(); + case AsmOp::a_custom: + return op.is_gconst() && trans.apply_push_newconst(); + default: + return false; + } +} + +} // namespace tolk diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp new file mode 100644 index 000000000..1a1d199ec --- /dev/null +++ b/tolk/ast-from-tokens.cpp @@ -0,0 +1,1220 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "ast-from-tokens.h" +#include "ast.h" +#include "platform-utils.h" +#include "type-expr.h" +#include "tolk-version.h" + +/* + * Here we construct AST for a tolk file. + * While constructing, no global state is modified. + * Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on. + * There was no way to perform any more or less semantic analysis. + * Implementing AST gives a giant advance for future modifications and stability. + */ + +namespace tolk { + +// given a token, determine whether it's <, or >, or similar +static bool is_comparison_binary_op(TokenType tok) { + return tok == tok_lt || tok == tok_gt || tok == tok_leq || tok == tok_geq || tok == tok_eq || tok == tok_neq || tok == tok_spaceship; +} + +// same as above, but to detect bitwise operators: & | ^ +static bool is_bitwise_binary_op(TokenType tok) { + return tok == tok_bitwise_and || tok == tok_bitwise_or || tok == tok_bitwise_xor; +} + +// same as above, but to detect logical operators: && || +static bool is_logical_binary_op(TokenType tok) { + return tok == tok_logical_and || tok == tok_logical_or; +} + +// same as above, but to detect addition/subtraction +static bool is_add_or_sub_binary_op(TokenType tok) { + return tok == tok_plus || tok == tok_minus; +} + +// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected) +// it would better be a warning, but we decided to make it a strict error +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_lower_precedence(SrcLocation loc, std::string_view op_lower, std::string_view op_higher) { + std::string name_lower = static_cast(op_lower); + std::string name_higher = static_cast(op_higher); + throw ParseError(loc, name_lower + " has lower precedence than " + name_higher + + ", probably this code won't work as you expected. " + "Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error."); +} + +// fire an error for a case "arg1 & arg2 | arg3" +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_mix_and_or_no_parenthesis(SrcLocation loc, std::string_view op1, std::string_view op2) { + std::string name1 = static_cast(op1); + std::string name2 = static_cast(op2); + throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis may lead to accidental errors. " + "Use parenthesis to emphasize operator precedence."); +} + +// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence +// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected +// the only way to suppress this error for the programmer is to use parenthesis +// (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator}, +// that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis) +static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyV lhs, AnyV rhs) { + // handle "flags & 0xFF != 0" (rhs = "0xFF != 0") + if (rhs->type == ast_binary_operator && is_comparison_binary_op(rhs->as()->tok)) { + fire_error_lower_precedence(loc, operator_name, rhs->as()->operator_name); + } + + // handle "0 != flags & 0xFF" (lhs = "0 != flags") + if (lhs->type == ast_binary_operator && is_comparison_binary_op(lhs->as()->tok)) { + fire_error_lower_precedence(loc, operator_name, lhs->as()->operator_name); + } +} + +// similar to above, but detect potentially invalid usage of && and || +// since anyway, using parenthesis when both && and || occur in the same expression, +// && and || have equal operator precedence in Tolk +static void diagnose_and_or_precedence(SrcLocation loc, AnyV lhs, TokenType rhs_tok, std::string_view rhs_operator_name) { + if (auto lhs_op = lhs->try_as()) { + // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") + if (is_bitwise_binary_op(lhs_op->tok) && is_bitwise_binary_op(rhs_tok) && lhs_op->tok != rhs_tok) { + fire_error_mix_and_or_no_parenthesis(loc, lhs_op->operator_name, rhs_operator_name); + } + + // handle "arg1 && arg2 || arg3" (lhs = "arg1 && arg2") + if (is_logical_binary_op(lhs_op->tok) && is_logical_binary_op(rhs_tok) && lhs_op->tok != rhs_tok) { + fire_error_mix_and_or_no_parenthesis(loc, lhs_op->operator_name, rhs_operator_name); + } + } +} + +// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) +static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyV rhs) { + if (rhs->type == ast_binary_operator && is_add_or_sub_binary_op(rhs->as()->tok)) { + fire_error_lower_precedence(loc, bitshift_operator_name, rhs->as()->operator_name); + } +} + +// fire an error for FunC-style variable declaration, like "int i" +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_FunC_style_var_declaration(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + std::string type_str = static_cast(lex.cur_str()); // int / slice / etc. + lex.next(); + std::string var_name = lex.tok() == tok_identifier ? static_cast(lex.cur_str()) : "name"; + throw ParseError(loc, "can't parse; probably, you use FunC-like declarations; valid syntax is `var " + var_name + ": " + type_str + " = ...`"); +} + +// replace (a == null) and similar to isNull(a) (call of a built-in function) +static AnyV maybe_replace_eq_null_with_isNull_call(V v) { + bool has_null = v->get_lhs()->type == ast_null_keyword || v->get_rhs()->type == ast_null_keyword; + bool replace = has_null && (v->tok == tok_eq || v->tok == tok_neq); + if (!replace) { + return v; + } + + auto v_ident = createV(v->loc, "__isNull"); // built-in function + AnyV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); + AnyV v_arg = createV(v->loc, v_null, false); + AnyV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_arg})); + if (v->tok == tok_neq) { + v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); + } + return v_isNull; +} + + +/* + * + * PARSE SOURCE + * + */ + +// TE ::= TA | TA -> TE +// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] +static TypeExpr* parse_type(Lexer& lex, V genericsT_list); + +static TypeExpr* parse_type1(Lexer& lex, V genericsT_list) { + switch (lex.tok()) { + case tok_int: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Int); + case tok_cell: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Cell); + case tok_slice: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Slice); + case tok_builder: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Builder); + case tok_continuation: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Continutaion); + case tok_tuple: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Tuple); + case tok_auto: + lex.next(); + return TypeExpr::new_hole(); + case tok_void: + lex.next(); + return TypeExpr::new_tensor({}); + case tok_bool: + lex.error("bool type is not supported yet"); + case tok_self: + lex.error("`self` type can be used only as a return type of a function (enforcing it to be chainable)"); + case tok_identifier: + if (int idx = genericsT_list ? genericsT_list->lookup_idx(lex.cur_str()) : -1; idx != -1) { + lex.next(); + return genericsT_list->get_item(idx)->created_type; + } + break; + case tok_oppar: { + lex.next(); + if (lex.tok() == tok_clpar) { + lex.next(); + return TypeExpr::new_unit(); + } + std::vector sub{1, parse_type(lex, genericsT_list)}; + while (lex.tok() == tok_comma) { + lex.next(); + sub.push_back(parse_type(lex, genericsT_list)); + } + lex.expect(tok_clpar, "`)`"); + return TypeExpr::new_tensor(std::move(sub)); + } + case tok_opbracket: { + lex.next(); + if (lex.tok() == tok_clbracket) { + lex.next(); + return TypeExpr::new_tuple({}); + } + std::vector sub{1, parse_type(lex, genericsT_list)}; + while (lex.tok() == tok_comma) { + lex.next(); + sub.push_back(parse_type(lex, genericsT_list)); + } + lex.expect(tok_clbracket, "`]`"); + return TypeExpr::new_tuple(std::move(sub)); + } + default: + break; + } + lex.unexpected(""); +} + +static TypeExpr* parse_type(Lexer& lex, V genericsT_list) { + TypeExpr* res = parse_type1(lex, genericsT_list); + if (lex.tok() == tok_arrow) { + lex.next(); + TypeExpr* to = parse_type(lex, genericsT_list); + return TypeExpr::new_map(res, to); + } + return res; +} + +AnyV parse_expr(Lexer& lex); + +static AnyV parse_parameter(Lexer& lex, V genericsT_list, bool is_first) { + SrcLocation loc = lex.cur_location(); + + // optional keyword `mutate` meaning that a function will mutate a passed argument (like passed by reference) + bool declared_as_mutate = false; + bool is_param_self = false; + if (lex.tok() == tok_mutate) { + lex.next(); + declared_as_mutate = true; + } + + // parameter name (or underscore for an unnamed parameter) + std::string_view param_name; + if (lex.tok() == tok_identifier) { + param_name = lex.cur_str(); + } else if (lex.tok() == tok_self) { + if (!is_first) { + lex.error("`self` can only be the first parameter"); + } + param_name = "self"; + is_param_self = true; + } else if (lex.tok() != tok_underscore) { + lex.unexpected("parameter name"); + } + auto v_ident = createV(lex.cur_location(), param_name); + lex.next(); + + // parameter type after colon, also mandatory (even explicit ":auto") + lex.expect(tok_colon, "`: `"); + TypeExpr* param_type = parse_type(lex, genericsT_list); + if (declared_as_mutate && !param_type->has_fixed_width()) { + throw ParseError(loc, "`mutate` parameter must be strictly typed"); + } + if (is_param_self && !param_type->has_fixed_width()) { + throw ParseError(loc, "`self` parameter must be strictly typed"); + } + + return createV(loc, v_ident, param_type, declared_as_mutate); +} + +static AnyV parse_global_var_declaration(Lexer& lex, const std::vector>& annotations) { + if (!annotations.empty()) { + lex.error("@annotations are not applicable to global var declaration"); + } + SrcLocation loc = lex.cur_location(); + lex.expect(tok_global, "`global`"); + lex.check(tok_identifier, "global variable name"); + auto v_ident = createV(lex.cur_location(), lex.cur_str()); + lex.next(); + lex.expect(tok_colon, "`:`"); + TypeExpr* declared_type = parse_type(lex, nullptr); + if (lex.tok() == tok_comma) { + lex.error("multiple declarations are not allowed, split globals on separate lines"); + } + if (lex.tok() == tok_assign) { + lex.error("assigning to a global is not allowed at declaration"); + } + lex.expect(tok_semicolon, "`;`"); + return createV(loc, v_ident, declared_type); +} + +static AnyV parse_constant_declaration(Lexer& lex, const std::vector>& annotations) { + if (!annotations.empty()) { + lex.error("@annotations are not applicable to global var declaration"); + } + SrcLocation loc = lex.cur_location(); + lex.expect(tok_const, "`const`"); + lex.check(tok_identifier, "constant name"); + auto v_ident = createV(lex.cur_location(), lex.cur_str()); + lex.next(); + TypeExpr *declared_type = nullptr; + if (lex.tok() == tok_colon) { + lex.next(); + if (lex.tok() == tok_int) { + declared_type = TypeExpr::new_atomic(TypeExpr::_Int); + lex.next(); + } else if (lex.tok() == tok_slice) { + declared_type = TypeExpr::new_atomic(TypeExpr::_Slice); + lex.next(); + } else { + lex.error("a constant can be int or slice only"); + } + } + lex.expect(tok_assign, "`=`"); + AnyV init_value = parse_expr(lex); + if (lex.tok() == tok_comma) { + lex.error("multiple declarations are not allowed, split constants on separate lines"); + } + lex.expect(tok_semicolon, "`;`"); + return createV(loc, v_ident, declared_type, init_value); +} + +// "parameters" are at function declaration: `fun f(param1: int, mutate param2: slice)` +static V parse_parameter_list(Lexer& lex, V genericsT_list) { + SrcLocation loc = lex.cur_location(); + std::vector params; + lex.expect(tok_oppar, "parameter list"); + if (lex.tok() != tok_clpar) { + params.push_back(parse_parameter(lex, genericsT_list, true)); + while (lex.tok() == tok_comma) { + lex.next(); + params.push_back(parse_parameter(lex, genericsT_list, false)); + } + } + lex.expect(tok_clpar, "`)`"); + return createV(loc, std::move(params)); +} + +// "arguments" are at function call: `f(arg1, mutate arg2)` +static AnyV parse_argument(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + + // keyword `mutate` is necessary when a parameter is declared `mutate` (to make mutation obvious for the reader) + bool passed_as_mutate = false; + if (lex.tok() == tok_mutate) { + lex.next(); + passed_as_mutate = true; + } + + AnyV expr = parse_expr(lex); + return createV(loc, expr, passed_as_mutate); +} + +static V parse_argument_list(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + std::vector args; + lex.expect(tok_oppar, "`(`"); + if (lex.tok() != tok_clpar) { + args.push_back(parse_argument(lex)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_argument(lex)); + } + } + lex.expect(tok_clpar, "`)`"); + return createV(loc, std::move(args)); +} + +// parse (expr) / [expr] / identifier / number +static AnyV parse_expr100(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + switch (lex.tok()) { + case tok_oppar: { + lex.next(); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, {}); + } + AnyV first = parse_expr(lex); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, first); + } + std::vector items(1, first); + while (lex.tok() == tok_comma) { + lex.next(); + items.emplace_back(parse_expr(lex)); + } + lex.expect(tok_clpar, "`)`"); + return createV(loc, std::move(items)); + } + case tok_opbracket: { + lex.next(); + if (lex.tok() == tok_clbracket) { + lex.next(); + return createV(loc, {}); + } + std::vector items(1, parse_expr(lex)); + while (lex.tok() == tok_comma) { + lex.next(); + items.emplace_back(parse_expr(lex)); + } + lex.expect(tok_clbracket, "`]`"); + return createV(loc, std::move(items)); + } + case tok_int_const: { + std::string_view int_val = lex.cur_str(); + lex.next(); + return createV(loc, int_val); + } + case tok_string_const: { + std::string_view str_val = lex.cur_str(); + lex.next(); + char modifier = 0; + if (lex.tok() == tok_string_modifier) { + modifier = lex.cur_str()[0]; + lex.next(); + } + return createV(loc, str_val, modifier); + } + case tok_underscore: { + lex.next(); + return createV(loc); + } + case tok_true: { + lex.next(); + return createV(loc, true); + } + case tok_false: { + lex.next(); + return createV(loc, false); + } + case tok_null: { + lex.next(); + return createV(loc); + } + case tok_self: { + lex.next(); + return createV(loc); + } + case tok_identifier: { + std::string_view str_val = lex.cur_str(); + lex.next(); + return createV(loc, str_val); + } + default: { + // show a proper error for `int i` (FunC-style declarations) + TokenType t = lex.tok(); + if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_tuple) { + fire_error_FunC_style_var_declaration(lex); + } + lex.unexpected(""); + } + } +} + +// parse E(args) +static AnyV parse_expr90(Lexer& lex) { + AnyV res = parse_expr100(lex); + if (lex.tok() == tok_oppar) { + return createV(res->loc, res, parse_argument_list(lex)); + } + return res; +} + +// parse E.method(...) (left-to-right) +static AnyV parse_expr80(Lexer& lex) { + AnyV lhs = parse_expr90(lex); + while (lex.tok() == tok_dot) { + SrcLocation loc = lex.cur_location(); + lex.next(); + lex.check(tok_identifier, "method name"); + std::string_view method_name = lex.cur_str(); + lex.next(); + lhs = createV(loc, method_name, lhs, parse_argument_list(lex)); + } + return lhs; +} + +// parse ! ~ - + E (unary) +static AnyV parse_expr75(Lexer& lex) { + TokenType t = lex.tok(); + if (t == tok_logical_not || t == tok_bitwise_not || t == tok_minus || t == tok_plus) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr75(lex); + return createV(loc, operator_name, t, rhs); + } + return parse_expr80(lex); +} + +// parse E * / % ^/ ~/ E (left-to-right) +static AnyV parse_expr30(Lexer& lex) { + AnyV lhs = parse_expr75(lex); + TokenType t = lex.tok(); + while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr75(lex); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E + - E (left-to-right) +static AnyV parse_expr20(Lexer& lex) { + AnyV lhs = parse_expr30(lex); + TokenType t = lex.tok(); + while (t == tok_minus || t == tok_plus) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr30(lex); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E << >> ~>> ^>> E (left-to-right) +static AnyV parse_expr17(Lexer& lex) { + AnyV lhs = parse_expr20(lex); + TokenType t = lex.tok(); + while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr20(lex); + diagnose_addition_in_bitshift(loc, operator_name, rhs); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E == < > <= >= != <=> E (left-to-right) +static AnyV parse_expr15(Lexer& lex) { + AnyV lhs = parse_expr17(lex); + TokenType t = lex.tok(); + if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr17(lex); + lhs = createV(loc, operator_name, t, lhs, rhs); + if (t == tok_eq || t == tok_neq) { + lhs = maybe_replace_eq_null_with_isNull_call(lhs->as()); + } + } + return lhs; +} + +// parse E & | ^ E (left-to-right) +static AnyV parse_expr14(Lexer& lex) { + AnyV lhs = parse_expr15(lex); + TokenType t = lex.tok(); + while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr15(lex); + diagnose_bitwise_precedence(loc, operator_name, lhs, rhs); + diagnose_and_or_precedence(loc, lhs, t, operator_name); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E && || E (left-to-right) +static AnyV parse_expr13(Lexer& lex) { + AnyV lhs = parse_expr14(lex); + TokenType t = lex.tok(); + while (t == tok_logical_and || t == tok_logical_or) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr14(lex); + diagnose_and_or_precedence(loc, lhs, t, operator_name); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E = += -= E and E ? E : E (right-to-left) +static AnyV parse_expr10(Lexer& lex) { + AnyV lhs = parse_expr13(lex); + TokenType t = lex.tok(); + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || + t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || + t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor || + t == tok_assign) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr10(lex); + return createV(loc, operator_name, t, lhs, rhs); + } + if (t == tok_question) { + SrcLocation loc = lex.cur_location(); + lex.next(); + AnyV when_true = parse_expr10(lex); + lex.expect(tok_colon, "`:`"); + AnyV when_false = parse_expr10(lex); + return createV(loc, lhs, when_true, when_false); + } + return lhs; +} + +AnyV parse_expr(Lexer& lex) { + return parse_expr10(lex); +} + +AnyV parse_statement(Lexer& lex); + +static AnyV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_oppar) { + lex.next(); + AnyV first = parse_var_declaration_lhs(lex, is_immutable); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, first); + } + std::vector args(1, first); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_var_declaration_lhs(lex, is_immutable)); + } + lex.expect(tok_clpar, "`)`"); + return createV(loc, std::move(args)); + } + if (lex.tok() == tok_opbracket) { + lex.next(); + std::vector args(1, parse_var_declaration_lhs(lex, is_immutable)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_var_declaration_lhs(lex, is_immutable)); + } + lex.expect(tok_clbracket, "`]`"); + return createV(loc, std::move(args)); + } + if (lex.tok() == tok_identifier) { + auto v_ident = createV(loc, lex.cur_str()); + TypeExpr* declared_type = nullptr; + bool marked_as_redef = false; + lex.next(); + if (lex.tok() == tok_colon) { + lex.next(); + declared_type = parse_type(lex, nullptr); + } else if (lex.tok() == tok_redef) { + lex.next(); + marked_as_redef = true; + } + return createV(loc, v_ident, declared_type, is_immutable, marked_as_redef); + } + if (lex.tok() == tok_underscore) { + TypeExpr* declared_type = nullptr; + lex.next(); + if (lex.tok() == tok_colon) { + lex.next(); + declared_type = parse_type(lex, nullptr); + } + return createV(loc, createV(loc), declared_type, true, false); + } + lex.unexpected("variable name"); +} + +static AnyV parse_local_vars_declaration(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + bool is_immutable = lex.tok() == tok_val; + lex.next(); + + AnyV lhs = parse_var_declaration_lhs(lex, is_immutable); + if (lex.tok() != tok_assign) { + lex.error("variables declaration must be followed by assignment: `var xxx = ...`"); + } + lex.next(); + AnyV assigned_val = parse_expr(lex); + + if (lex.tok() == tok_comma) { + lex.error("multiple declarations are not allowed, split variables on separate lines"); + } + lex.expect(tok_semicolon, "`;`"); + return createV(loc, lhs, assigned_val); +} + +static V parse_sequence(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_opbrace, "`{`"); + std::vector items; + while (lex.tok() != tok_clbrace) { + items.push_back(parse_statement(lex)); + } + SrcLocation loc_end = lex.cur_location(); + lex.expect(tok_clbrace, "`}`"); + return createV(loc, loc_end, items); +} + +static AnyV parse_return_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_return, "`return`"); + AnyV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void) + ? createV(lex.cur_location(), {}) + : parse_expr(lex); + lex.expect(tok_semicolon, "`;`"); + return createV(loc, child); +} + +static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_if, "`if`"); + + lex.expect(tok_oppar, "`(`"); + AnyV cond = parse_expr(lex); + lex.expect(tok_clpar, "`)`"); + // replace if(!expr) with ifnot(expr) (this should be done later, but for now, let this be right at parsing time) + if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { + is_ifnot = !is_ifnot; + cond = v_not->get_rhs(); + } + + V if_body = parse_sequence(lex); + V else_body = nullptr; + if (lex.tok() == tok_else) { // else if(e) { } or else { } + lex.next(); + if (lex.tok() == tok_if) { + AnyV v_inner_if = parse_if_statement(lex, false); + else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); + } else { + else_body = parse_sequence(lex); + } + } else { // no 'else', create empty block + else_body = createV(lex.cur_location(), lex.cur_location(), {}); + } + return createV(loc, is_ifnot, cond, if_body, else_body); +} + +static AnyV parse_repeat_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_repeat, "`repeat`"); + lex.expect(tok_oppar, "`(`"); + AnyV cond = parse_expr(lex); + lex.expect(tok_clpar, "`)`"); + V body = parse_sequence(lex); + return createV(loc, cond, body); +} + +static AnyV parse_while_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_while, "`while`"); + lex.expect(tok_oppar, "`(`"); + AnyV cond = parse_expr(lex); + lex.expect(tok_clpar, "`)`"); + V body = parse_sequence(lex); + return createV(loc, cond, body); +} + +static AnyV parse_do_while_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_do, "`do`"); + V body = parse_sequence(lex); + lex.expect(tok_while, "`while`"); + lex.expect(tok_oppar, "`(`"); + AnyV cond = parse_expr(lex); + lex.expect(tok_clpar, "`)`"); + lex.expect(tok_semicolon, "`;`"); + return createV(loc, body, cond); +} + +static AnyV parse_catch_variable(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_identifier) { + std::string_view var_name = lex.cur_str(); + lex.next(); + return createV(loc, var_name); + } + if (lex.tok() == tok_underscore) { + lex.next(); + return createV(loc); + } + lex.unexpected("identifier"); +} + +static AnyV parse_throw_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_throw, "`throw`"); + + AnyV thrown_code, thrown_arg; + if (lex.tok() == tok_oppar) { // throw (code) or throw (code, arg) + lex.next(); + thrown_code = parse_expr(lex); + if (lex.tok() == tok_comma) { + lex.next(); + thrown_arg = parse_expr(lex); + } else { + thrown_arg = createV(loc); + } + lex.expect(tok_clpar, "`)`"); + } else { // throw code + thrown_code = parse_expr(lex); + thrown_arg = createV(loc); + } + + lex.expect(tok_semicolon, "`;`"); + return createV(loc, thrown_code, thrown_arg); +} + +static AnyV parse_assert_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_assert, "`assert`"); + + lex.expect(tok_oppar, "`(`"); + AnyV cond = parse_expr(lex); + AnyV thrown_code; + if (lex.tok() == tok_comma) { // assert(cond, code) + lex.next(); + thrown_code = parse_expr(lex); + lex.expect(tok_clpar, "`)`"); + } else { // assert(cond) throw code + lex.expect(tok_clpar, "`)`"); + lex.expect(tok_throw, "`throw excNo` after assert"); + thrown_code = parse_expr(lex); + } + + lex.expect(tok_semicolon, "`;`"); + return createV(loc, cond, thrown_code); +} + +static AnyV parse_try_catch_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_try, "`try`"); + V try_body = parse_sequence(lex); + + std::vector catch_args; + lex.expect(tok_catch, "`catch`"); + SrcLocation catch_loc = lex.cur_location(); + if (lex.tok() == tok_oppar) { + lex.next(); + catch_args.push_back(parse_catch_variable(lex)); + if (lex.tok() == tok_comma) { // catch (excNo, arg) + lex.next(); + catch_args.push_back(parse_catch_variable(lex)); + } else { // catch (excNo) -> catch (excNo, _) + catch_args.push_back(createV(catch_loc)); + } + lex.expect(tok_clpar, "`)`"); + } else { // catch -> catch (_, _) + catch_args.push_back(createV(catch_loc)); + catch_args.push_back(createV(catch_loc)); + } + V catch_expr = createV(catch_loc, std::move(catch_args)); + + V catch_body = parse_sequence(lex); + return createV(loc, try_body, catch_expr, catch_body); +} + +AnyV parse_statement(Lexer& lex) { + switch (lex.tok()) { + case tok_var: + case tok_val: + return parse_local_vars_declaration(lex); + case tok_opbrace: + return parse_sequence(lex); + case tok_return: + return parse_return_statement(lex); + case tok_if: + return parse_if_statement(lex, false); + case tok_repeat: + return parse_repeat_statement(lex); + case tok_do: + return parse_do_while_statement(lex); + case tok_while: + return parse_while_statement(lex); + case tok_throw: + return parse_throw_statement(lex); + case tok_assert: + return parse_assert_statement(lex); + case tok_try: + return parse_try_catch_statement(lex); + case tok_semicolon: { + SrcLocation loc = lex.cur_location(); + lex.next(); + return createV(loc); + } + case tok_break: + case tok_continue: + lex.error("break/continue from loops are not supported yet"); + default: { + AnyV expr = parse_expr(lex); + lex.expect(tok_semicolon, "`;`"); + return expr; + } + } +} + +static AnyV parse_func_body(Lexer& lex) { + return parse_sequence(lex); +} + +static AnyV parse_asm_func_body(Lexer& lex, V param_list) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_asm, "`asm`"); + size_t n_params = param_list->size(); + if (n_params > 16) { + throw ParseError{loc, "assembler built-in function can have at most 16 arguments"}; + } + std::vector arg_order, ret_order; + if (lex.tok() == tok_oppar) { + lex.next(); + while (lex.tok() == tok_identifier || lex.tok() == tok_self) { + int arg_idx = param_list->lookup_idx(lex.cur_str()); + if (arg_idx == -1) { + lex.unexpected("parameter name"); + } + arg_order.push_back(arg_idx); + lex.next(); + } + if (lex.tok() == tok_arrow) { + lex.next(); + while (lex.tok() == tok_int_const) { + int ret_idx = std::atoi(static_cast(lex.cur_str()).c_str()); + ret_order.push_back(ret_idx); + lex.next(); + } + } + lex.expect(tok_clpar, "`)`"); + } + std::vector asm_commands; + lex.check(tok_string_const, "\"ASM COMMAND\""); + while (lex.tok() == tok_string_const) { + std::string_view asm_command = lex.cur_str(); + asm_commands.push_back(createV(lex.cur_location(), asm_command, 0)); + lex.next(); + } + lex.expect(tok_semicolon, "`;`"); + return createV(loc, std::move(arg_order), std::move(ret_order), std::move(asm_commands)); +} + +static AnyV parse_genericsT_list(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + std::vector genericsT_items; + lex.expect(tok_lt, "`<`"); + int idx = 0; + while (true) { + lex.check(tok_identifier, "T"); + std::string_view nameT = lex.cur_str(); + TypeExpr* type = TypeExpr::new_var(idx++); + genericsT_items.emplace_back(createV(lex.cur_location(), type, nameT)); + lex.next(); + if (lex.tok() != tok_comma) { + break; + } + lex.next(); + } + lex.expect(tok_gt, "`>`"); + return createV{loc, std::move(genericsT_items)}; +} + +static V parse_annotation(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.check(tok_annotation_at, "`@`"); + std::string_view name = lex.cur_str(); + AnnotationKind kind = Vertex::parse_kind(name); + lex.next(); + + V v_arg = nullptr; + if (lex.tok() == tok_oppar) { + SrcLocation loc_args = lex.cur_location(); + lex.next(); + std::vector args; + args.push_back(parse_expr(lex)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_expr(lex)); + } + lex.expect(tok_clpar, "`)`"); + v_arg = createV(loc_args, std::move(args)); + } + + switch (kind) { + case AnnotationKind::unknown: + throw ParseError(loc, "unknown annotation " + static_cast(name)); + case AnnotationKind::inline_simple: + case AnnotationKind::inline_ref: + case AnnotationKind::pure: + case AnnotationKind::deprecated: + if (v_arg) { + throw ParseError(v_arg->loc, "arguments aren't allowed for " + static_cast(name)); + } + v_arg = createV(loc, {}); + break; + case AnnotationKind::method_id: + if (!v_arg || v_arg->size() != 1 || v_arg->get_item(0)->type != ast_int_const) { + throw ParseError(loc, "expecting `(number)` after " + static_cast(name)); + } + break; + } + + return createV(loc, kind, v_arg); +} + +static AnyV parse_function_declaration(Lexer& lex, const std::vector>& annotations) { + SrcLocation loc = lex.cur_location(); + bool is_get_method = lex.tok() == tok_get; + lex.next(); + if (is_get_method && lex.tok() == tok_fun) { + lex.next(); // 'get f()' and 'get fun f()' both correct + } + + lex.check(tok_identifier, "function name identifier"); + + std::string_view f_name = lex.cur_str(); + bool is_entrypoint = + f_name == "main" || f_name == "onInternalMessage" || f_name == "onExternalMessage" || + f_name == "onRunTickTock" || f_name == "onSplitPrepare" || f_name == "onSplitInstall"; + bool is_FunC_entrypoint = + f_name == "recv_internal" || f_name == "recv_external" || + f_name == "run_ticktock" || f_name == "split_prepare" || f_name == "split_install"; + if (is_FunC_entrypoint) { + lex.error("this is a reserved FunC/Fift identifier; you need `onInternalMessage`"); + } + + auto v_ident = createV(lex.cur_location(), f_name); + lex.next(); + + V genericsT_list = nullptr; + if (lex.tok() == tok_lt) { // 'fun f' + genericsT_list = parse_genericsT_list(lex)->as(); + } + + V v_param_list = parse_parameter_list(lex, genericsT_list)->as(); + bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->get_identifier()->name == "self"; + int n_mutate_params = v_param_list->get_mutate_params_count(); + + TypeExpr* ret_type = nullptr; + bool returns_self = false; + if (lex.tok() == tok_colon) { // : (if absent, it means "auto infer", not void) + lex.next(); + if (lex.tok() == tok_self) { + if (!accepts_self) { + lex.error("only a member function can return `self` (which accepts `self` first parameter)"); + } + lex.next(); + returns_self = true; + ret_type = TypeExpr::new_unit(); + } else { + ret_type = parse_type(lex, genericsT_list); + } + } + + if (is_entrypoint && (is_get_method || genericsT_list || n_mutate_params || accepts_self)) { + throw ParseError(loc, "invalid declaration of a reserved function"); + } + if (is_get_method && (genericsT_list || n_mutate_params || accepts_self)) { + throw ParseError(loc, "get methods can't have `mutate` and `self` params"); + } + + if (n_mutate_params) { + std::vector ret_tensor_items; + ret_tensor_items.reserve(1 + n_mutate_params); + for (AnyV v_param : v_param_list->get_params()) { + if (v_param->as()->declared_as_mutate) { + ret_tensor_items.emplace_back(v_param->as()->param_type); + } + } + ret_tensor_items.emplace_back(ret_type ? ret_type : TypeExpr::new_hole()); + ret_type = TypeExpr::new_tensor(std::move(ret_tensor_items)); + } + + AnyV v_body = nullptr; + + if (lex.tok() == tok_builtin) { + v_body = createV(lex.cur_location()); + lex.next(); + lex.expect(tok_semicolon, "`;`"); + } else if (lex.tok() == tok_opbrace) { + v_body = parse_func_body(lex); + } else if (lex.tok() == tok_asm) { + if (!ret_type) { + lex.error("asm function must specify return type"); + } + v_body = parse_asm_func_body(lex, v_param_list); + } else { + lex.unexpected("{ function body }"); + } + + auto f_declaration = createV(loc, v_ident, v_param_list, v_body); + f_declaration->ret_type = ret_type ? ret_type : TypeExpr::new_hole(); + f_declaration->is_entrypoint = is_entrypoint; + f_declaration->genericsT_list = genericsT_list; + f_declaration->marked_as_get_method = is_get_method; + f_declaration->marked_as_builtin = v_body->type == ast_empty; + f_declaration->accepts_self = accepts_self; + f_declaration->returns_self = returns_self; + + for (auto v_annotation : annotations) { + switch (v_annotation->kind) { + case AnnotationKind::inline_simple: + f_declaration->marked_as_inline = true; + break; + case AnnotationKind::inline_ref: + f_declaration->marked_as_inline_ref = true; + break; + case AnnotationKind::pure: + f_declaration->marked_as_pure = true; + break; + case AnnotationKind::method_id: + if (is_get_method || genericsT_list || is_entrypoint || n_mutate_params || accepts_self) { + v_annotation->error("@method_id can be specified only for regular functions"); + } + f_declaration->method_id = v_annotation->get_arg()->get_item(0)->as(); + break; + case AnnotationKind::deprecated: + // no special handling + break; + + default: + v_annotation->error("this annotation is not applicable to functions"); + } + } + + return f_declaration; +} + +static AnyV parse_tolk_required_version(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.next_special(tok_semver, "semver"); // syntax: "tolk 0.6" + std::string semver = static_cast(lex.cur_str()); + lex.next(); + + // for simplicity, there is no syntax ">= version" and so on, just strict compare + if (TOLK_VERSION != semver && TOLK_VERSION != semver + ".0") { // 0.6 = 0.6.0 + loc.show_warning("the contract is written in Tolk v" + semver + ", but you use Tolk compiler v" + TOLK_VERSION + "; probably, it will lead to compilation errors or hash changes"); + } + + return createV(loc, tok_eq, semver); // semicolon is not necessary +} + +static AnyV parse_import_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_import, "`import`"); + lex.check(tok_string_const, "source file name"); + std::string_view rel_filename = lex.cur_str(); + if (rel_filename.empty()) { + lex.error("imported file name is an empty string"); + } + auto v_str = createV(lex.cur_location(), rel_filename, 0); + lex.next(); + return createV(loc, v_str); // semicolon is not necessary +} + +// the main (exported) function +AnyV parse_src_file_to_ast(const SrcFile* file) { + std::vector toplevel_declarations; + std::vector> annotations; + Lexer lex(file); + + while (!lex.is_eof()) { + switch (lex.tok()) { + case tok_tolk: + if (!annotations.empty()) { + lex.unexpected("declaration after @annotations"); + } + toplevel_declarations.push_back(parse_tolk_required_version(lex)); + break; + case tok_import: + if (!annotations.empty()) { + lex.unexpected("declaration after @annotations"); + } + toplevel_declarations.push_back(parse_import_statement(lex)); + break; + case tok_semicolon: + if (!annotations.empty()) { + lex.unexpected("declaration after @annotations"); + } + lex.next(); // don't add ast_empty, no need + break; + + case tok_annotation_at: + annotations.push_back(parse_annotation(lex)); + break; + case tok_global: + toplevel_declarations.push_back(parse_global_var_declaration(lex, annotations)); + annotations.clear(); + break; + case tok_const: + toplevel_declarations.push_back(parse_constant_declaration(lex, annotations)); + annotations.clear(); + break; + case tok_fun: + case tok_get: + toplevel_declarations.push_back(parse_function_declaration(lex, annotations)); + annotations.clear(); + break; + + case tok_export: + case tok_struct: + case tok_enum: + case tok_operator: + case tok_infix: + lex.error("`" + static_cast(lex.cur_str()) +"` is not supported yet"); + + default: + lex.unexpected("fun or get"); + } + } + + return createV(file, std::move(toplevel_declarations)); +} + +} // namespace tolk diff --git a/tolk/ast-from-tokens.h b/tolk/ast-from-tokens.h new file mode 100644 index 000000000..5f380c569 --- /dev/null +++ b/tolk/ast-from-tokens.h @@ -0,0 +1,27 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" + +namespace tolk { + +struct ASTNodeBase; + +const ASTNodeBase* parse_src_file_to_ast(const SrcFile* file); + +} // namespace tolk diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h new file mode 100644 index 000000000..478994e8b --- /dev/null +++ b/tolk/ast-replacer.h @@ -0,0 +1,160 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" +#include "platform-utils.h" + +/* + * A module of implementing traversing a vertex tree and replacing any vertex to another. + * For example, to replace "beginCell()" call to "begin_cell()" in a function body (in V) + * regardless of the place this call is performed, you need to iterate over all the function AST, + * to find ast_function_call(beginCell), create ast_function_call(begin_cell) instead and to replace + * a pointer inside its parent. + * Inheriting from ASTVisitor makes this task quite simple, without any boilerplate. + * + * If you need just to traverse a vertex tree without replacing vertices, + * consider another api: ast-visitor.h. + */ + +namespace tolk { + +class ASTReplacer { +protected: + GNU_ATTRIBUTE_ALWAYS_INLINE static AnyV replace_children(const ASTNodeLeaf* v) { + return v; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeUnary* v) { + auto* v_mutable = const_cast(v); + v_mutable->child = replace(v_mutable->child); + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeBinary* v) { + auto* v_mutable = const_cast(v); + v_mutable->lhs = replace(v->lhs); + v_mutable->rhs = replace(v->rhs); + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeVararg* v) { + auto* v_mutable = const_cast(v); + for (AnyV& child : v_mutable->children) { + child = replace(child); + } + return v_mutable; + } + +public: + virtual ~ASTReplacer() = default; + + virtual AnyV replace(AnyV v) = 0; +}; + +class ASTReplacerInFunctionBody : public ASTReplacer { +protected: + using parent = ASTReplacerInFunctionBody; + + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + + AnyV replace(AnyV v) final { + switch (v->type) { + case ast_empty: return replace(v->as()); + case ast_parenthesized_expr: return replace(v->as()); + case ast_tensor: return replace(v->as()); + case ast_tensor_square: return replace(v->as()); + case ast_identifier: return replace(v->as()); + case ast_int_const: return replace(v->as()); + case ast_string_const: return replace(v->as()); + case ast_bool_const: return replace(v->as()); + case ast_null_keyword: return replace(v->as()); + case ast_self_keyword: return replace(v->as()); + case ast_function_call: return replace(v->as()); + case ast_dot_method_call: return replace(v->as()); + case ast_underscore: return replace(v->as()); + case ast_unary_operator: return replace(v->as()); + case ast_binary_operator: return replace(v->as()); + case ast_ternary_operator: return replace(v->as()); + case ast_return_statement: return replace(v->as()); + case ast_sequence: return replace(v->as()); + case ast_repeat_statement: return replace(v->as()); + case ast_while_statement: return replace(v->as()); + case ast_do_while_statement: return replace(v->as()); + case ast_throw_statement: return replace(v->as()); + case ast_assert_statement: return replace(v->as()); + case ast_try_catch_statement: return replace(v->as()); + case ast_if_statement: return replace(v->as()); + case ast_local_var: return replace(v->as()); + case ast_local_vars_declaration: return replace(v->as()); + case ast_asm_body: return replace(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::visit"); + } + } + +public: + void start_replacing_in_function(V v) { + replace(v->get_body()); + } +}; + +class ASTReplacerAllFunctionsInFile : public ASTReplacerInFunctionBody { +protected: + using parent = ASTReplacerAllFunctionsInFile; + + virtual bool should_enter_function(V v) = 0; + +public: + void start_replacing_in_file(V v_file) { + for (AnyV v : v_file->get_toplevel_declarations()) { + if (auto v_function = v->try_as()) { + if (should_enter_function(v_function)) { + replace(v_function->get_body()); + } + } + } + } +}; + +} // namespace tolk diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h new file mode 100644 index 000000000..759873b04 --- /dev/null +++ b/tolk/ast-stringifier.h @@ -0,0 +1,254 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#ifdef TOLK_DEBUG + +#include "ast.h" +#include "ast-visitor.h" +#include + +/* + * ASTStringifier is used to print out the whole vertex tree in a human-readable format. + * To stringify any vertex, call v->debug_print(), which uses this class. + */ + +namespace tolk { + +class ASTStringifier final : public ASTVisitor { + constexpr static std::pair name_pairs[] = { + {ast_empty, "ast_empty"}, + {ast_parenthesized_expr, "ast_parenthesized_expr"}, + {ast_tensor, "ast_tensor"}, + {ast_tensor_square, "ast_tensor_square"}, + {ast_identifier, "ast_identifier"}, + {ast_int_const, "ast_int_const"}, + {ast_string_const, "ast_string_const"}, + {ast_bool_const, "ast_bool_const"}, + {ast_null_keyword, "ast_null_keyword"}, + {ast_self_keyword, "ast_self_keyword"}, + {ast_argument, "ast_argument"}, + {ast_argument_list, "ast_argument_list"}, + {ast_function_call, "ast_function_call"}, + {ast_dot_method_call, "ast_dot_method_call"}, + {ast_global_var_declaration, "ast_global_var_declaration"}, + {ast_constant_declaration, "ast_constant_declaration"}, + {ast_underscore, "ast_underscore"}, + {ast_unary_operator, "ast_unary_operator"}, + {ast_binary_operator, "ast_binary_operator"}, + {ast_ternary_operator, "ast_ternary_operator"}, + {ast_return_statement, "ast_return_statement"}, + {ast_sequence, "ast_sequence"}, + {ast_repeat_statement, "ast_repeat_statement"}, + {ast_while_statement, "ast_while_statement"}, + {ast_do_while_statement, "ast_do_while_statement"}, + {ast_throw_statement, "ast_throw_statement"}, + {ast_assert_statement, "ast_assert_statement"}, + {ast_try_catch_statement, "ast_try_catch_statement"}, + {ast_if_statement, "ast_if_statement"}, + {ast_genericsT_item, "ast_genericsT_item"}, + {ast_genericsT_list, "ast_genericsT_list"}, + {ast_parameter, "ast_parameter"}, + {ast_parameter_list, "ast_parameter_list"}, + {ast_asm_body, "ast_asm_body"}, + {ast_annotation, "ast_annotation"}, + {ast_function_declaration, "ast_function_declaration"}, + {ast_local_var, "ast_local_var"}, + {ast_local_vars_declaration, "ast_local_vars_declaration"}, + {ast_tolk_required_version, "ast_tolk_required_version"}, + {ast_import_statement, "ast_import_statement"}, + {ast_tolk_file, "ast_tolk_file"}, + }; + + static_assert(std::size(name_pairs) == ast_tolk_file + 1, "name_pairs needs to be updated"); + + constexpr static std::pair annotation_kinds[] = { + {AnnotationKind::inline_simple, "@inline"}, + {AnnotationKind::inline_ref, "@inline_ref"}, + {AnnotationKind::method_id, "@method_id"}, + {AnnotationKind::pure, "@pure"}, + {AnnotationKind::deprecated, "@deprecated"}, + }; + + static_assert(std::size(annotation_kinds) == static_cast(AnnotationKind::unknown), "annotation_kinds needs to be updated"); + + template + constexpr static const char* ast_node_type_to_string() { + return name_pairs[node_type].second; + } + + int depth = 0; + std::string out; + bool colored = false; + + template + void handle_vertex(V v) { + out += std::string(depth * 2, ' '); + out += ast_node_type_to_string(); + if (std::string postfix = specific_str(v); !postfix.empty()) { + out += colored ? " \x1b[34m" : " // "; + out += postfix; + out += colored ? "\x1b[0m" : ""; + } + out += '\n'; + depth++; + visit_children(v); + depth--; + } + + static std::string specific_str(AnyV v) { + switch (v->type) { + case ast_identifier: + return static_cast(v->as()->name); + case ast_int_const: + return static_cast(v->as()->int_val); + case ast_string_const: + if (char modifier = v->as()->modifier) { + return "\"" + static_cast(v->as()->str_val) + "\"" + std::string(1, modifier); + } else { + return "\"" + static_cast(v->as()->str_val) + "\""; + } + case ast_function_call: { + if (auto v_lhs = v->as()->get_called_f()->try_as()) { + return static_cast(v_lhs->name) + "()"; + } + return {}; + } + case ast_dot_method_call: + return static_cast(v->as()->method_name); + case ast_global_var_declaration: + return static_cast(v->as()->get_identifier()->name); + case ast_constant_declaration: + return static_cast(v->as()->get_identifier()->name); + case ast_unary_operator: + return static_cast(v->as()->operator_name); + case ast_binary_operator: + return static_cast(v->as()->operator_name); + case ast_sequence: + return "↓" + std::to_string(v->as()->get_items().size()); + case ast_if_statement: + return v->as()->is_ifnot ? "ifnot" : ""; + case ast_annotation: + return annotation_kinds[static_cast(v->as()->kind)].second; + case ast_parameter: { + std::ostringstream os; + os << v->as()->param_type; + return static_cast(v->as()->get_identifier()->name) + ": " + os.str(); + } + case ast_function_declaration: { + std::string param_names; + for (int i = 0; i < v->as()->get_num_params(); i++) { + if (!param_names.empty()) + param_names += ","; + param_names += v->as()->get_param(i)->get_identifier()->name; + } + return "fun " + static_cast(v->as()->get_identifier()->name) + "(" + param_names + ")"; + } + case ast_local_var: { + std::ostringstream os; + os << v->as()->declared_type; + if (auto v_ident = v->as()->get_identifier()->try_as()) { + return static_cast(v_ident->name) + ":" + os.str(); + } + return "_: " + os.str(); + } + case ast_tolk_required_version: + return static_cast(v->as()->semver); + case ast_import_statement: + return static_cast(v->as()->get_file_leaf()->str_val); + case ast_tolk_file: + return v->as()->file->rel_filename; + default: + return {}; + } + } + +public: + explicit ASTStringifier(bool colored) : colored(colored) { + } + + std::string to_string_with_children(AnyV v) { + out.clear(); + visit(v); + return std::move(out); + } + + static std::string to_string_without_children(AnyV v) { + std::string result = ast_node_type_to_string(v->type); + if (std::string postfix = specific_str(v); !postfix.empty()) { + result += ' '; + result += specific_str(v); + } + return result; + } + + static const char* ast_node_type_to_string(ASTNodeType node_type) { + return name_pairs[node_type].second; + } + + void visit(AnyV v) override { + switch (v->type) { + case ast_empty: return handle_vertex(v->as()); + case ast_parenthesized_expr: return handle_vertex(v->as()); + case ast_tensor: return handle_vertex(v->as()); + case ast_tensor_square: return handle_vertex(v->as()); + case ast_identifier: return handle_vertex(v->as()); + case ast_int_const: return handle_vertex(v->as()); + case ast_string_const: return handle_vertex(v->as()); + case ast_bool_const: return handle_vertex(v->as()); + case ast_null_keyword: return handle_vertex(v->as()); + case ast_self_keyword: return handle_vertex(v->as()); + case ast_argument: return handle_vertex(v->as()); + case ast_argument_list: return handle_vertex(v->as()); + case ast_function_call: return handle_vertex(v->as()); + case ast_dot_method_call: return handle_vertex(v->as()); + case ast_global_var_declaration: return handle_vertex(v->as()); + case ast_constant_declaration: return handle_vertex(v->as()); + case ast_underscore: return handle_vertex(v->as()); + case ast_unary_operator: return handle_vertex(v->as()); + case ast_binary_operator: return handle_vertex(v->as()); + case ast_ternary_operator: return handle_vertex(v->as()); + case ast_return_statement: return handle_vertex(v->as()); + case ast_sequence: return handle_vertex(v->as()); + case ast_repeat_statement: return handle_vertex(v->as()); + case ast_while_statement: return handle_vertex(v->as()); + case ast_do_while_statement: return handle_vertex(v->as()); + case ast_throw_statement: return handle_vertex(v->as()); + case ast_assert_statement: return handle_vertex(v->as()); + case ast_try_catch_statement: return handle_vertex(v->as()); + case ast_if_statement: return handle_vertex(v->as()); + case ast_genericsT_item: return handle_vertex(v->as()); + case ast_genericsT_list: return handle_vertex(v->as()); + case ast_parameter: return handle_vertex(v->as()); + case ast_parameter_list: return handle_vertex(v->as()); + case ast_asm_body: return handle_vertex(v->as()); + case ast_annotation: return handle_vertex(v->as()); + case ast_function_declaration: return handle_vertex(v->as()); + case ast_local_var: return handle_vertex(v->as()); + case ast_local_vars_declaration: return handle_vertex(v->as()); + case ast_tolk_required_version: return handle_vertex(v->as()); + case ast_import_statement: return handle_vertex(v->as()); + case ast_tolk_file: return handle_vertex(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTStringifier::visit"); + } + } +}; + +} // namespace tolk + +#endif // TOLK_DEBUG diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h new file mode 100644 index 000000000..d0a7bfaf6 --- /dev/null +++ b/tolk/ast-visitor.h @@ -0,0 +1,155 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" +#include "platform-utils.h" + +/* + * A module implementing base functionality of read-only traversing a vertex tree. + * Since a vertex in general doesn't store a vector of children, iterating is possible only for concrete node_type. + * E.g., for ast_if_statement, visit nodes cond, if-body and else-body. For ast_string_const, nothing. And so on. + * Visitors below are helpers to inherit from and handle specific vertex types. + * + * Note, that absence of "children" in ASTNodeBase is not a drawback. Instead, it encourages you to think + * about types and match the type system. + * + * The visitor is read-only, it does not modify visited nodes (except if you purposely call mutating methods). + * For example, if you want to replace "beginCell()" call with "begin_cell", a visitor isn't enough for you. + * To replace vertices, consider another API: ast-replacer.h. + */ + +namespace tolk { + +class ASTVisitor { +protected: + GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTNodeLeaf* v) { + static_cast(v); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeUnary* v) { + visit(v->child); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeBinary* v) { + visit(v->lhs); + visit(v->rhs); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeVararg* v) { + for (AnyV child : v->children) { + visit(child); + } + } + + virtual void visit(AnyV v) = 0; + +public: + virtual ~ASTVisitor() = default; +}; + +class ASTVisitorFunctionBody : public ASTVisitor { +protected: + using parent = ASTVisitorFunctionBody; + + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + + void visit(AnyV v) final { + switch (v->type) { + case ast_empty: return visit(v->as()); + case ast_parenthesized_expr: return visit(v->as()); + case ast_tensor: return visit(v->as()); + case ast_tensor_square: return visit(v->as()); + case ast_identifier: return visit(v->as()); + case ast_int_const: return visit(v->as()); + case ast_string_const: return visit(v->as()); + case ast_bool_const: return visit(v->as()); + case ast_null_keyword: return visit(v->as()); + case ast_self_keyword: return visit(v->as()); + case ast_function_call: return visit(v->as()); + case ast_dot_method_call: return visit(v->as()); + case ast_underscore: return visit(v->as()); + case ast_unary_operator: return visit(v->as()); + case ast_binary_operator: return visit(v->as()); + case ast_ternary_operator: return visit(v->as()); + case ast_return_statement: return visit(v->as()); + case ast_sequence: return visit(v->as()); + case ast_repeat_statement: return visit(v->as()); + case ast_while_statement: return visit(v->as()); + case ast_do_while_statement: return visit(v->as()); + case ast_throw_statement: return visit(v->as()); + case ast_assert_statement: return visit(v->as()); + case ast_try_catch_statement: return visit(v->as()); + case ast_if_statement: return visit(v->as()); + case ast_local_var: return visit(v->as()); + case ast_local_vars_declaration: return visit(v->as()); + case ast_asm_body: return visit(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTVisitorFunctionBody::visit"); + } + } + +public: + void start_visiting_function(V v_function) { + visit(v_function->get_body()); + } +}; + +class ASTVisitorAllFunctionsInFile : public ASTVisitorFunctionBody { +protected: + using parent = ASTVisitorAllFunctionsInFile; + + virtual bool should_enter_function(V v) = 0; + +public: + void start_visiting_file(V v_file) { + for (AnyV v : v_file->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + if (should_enter_function(v_func)) { + visit(v_func->get_body()); + } + } + } + } +}; + +} // namespace tolk diff --git a/tolk/ast.cpp b/tolk/ast.cpp new file mode 100644 index 000000000..b1af51005 --- /dev/null +++ b/tolk/ast.cpp @@ -0,0 +1,103 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "ast.h" +#include "ast-stringifier.h" +#include + +namespace tolk { + +static_assert(sizeof(ASTNodeBase) == 12); + +#ifdef TOLK_DEBUG + +std::string ASTNodeBase::to_debug_string(bool colored) const { + ASTStringifier s(colored); + return s.to_string_with_children(this); +} + +void ASTNodeBase::debug_print() const { + std::cerr << to_debug_string(true) << std::endl; +} + +#endif // TOLK_DEBUG + +UnexpectedASTNodeType::UnexpectedASTNodeType(AnyV v_unexpected, const char* place_where): v_unexpected(v_unexpected) { + message = "Unexpected ASTNodeType "; +#ifdef TOLK_DEBUG + message += ASTStringifier::ast_node_type_to_string(v_unexpected->type); + message += " "; +#endif + message += "in "; + message += place_where; +} + +void ASTNodeBase::error(const std::string& err_msg) const { + throw ParseError(loc, err_msg); +} + +AnnotationKind Vertex::parse_kind(std::string_view name) { + if (name == "@pure") { + return AnnotationKind::pure; + } + if (name == "@inline") { + return AnnotationKind::inline_simple; + } + if (name == "@inline_ref") { + return AnnotationKind::inline_ref; + } + if (name == "@method_id") { + return AnnotationKind::method_id; + } + if (name == "@deprecated") { + return AnnotationKind::deprecated; + } + return AnnotationKind::unknown; +} + +int Vertex::lookup_idx(std::string_view nameT) const { + for (size_t idx = 0; idx < children.size(); ++idx) { + if (children[idx] && children[idx]->as()->nameT == nameT) { + return static_cast(idx); + } + } + return -1; +} + +int Vertex::lookup_idx(std::string_view param_name) const { + for (size_t idx = 0; idx < children.size(); ++idx) { + if (children[idx] && children[idx]->as()->get_identifier()->name == param_name) { + return static_cast(idx); + } + } + return -1; +} + +int Vertex::get_mutate_params_count() const { + int n = 0; + for (AnyV param : children) { + if (param->as()->declared_as_mutate) { + n++; + } + } + return n; +} + +void Vertex::mutate_set_src_file(const SrcFile* file) const { + const_cast(this)->file = file; +} + +} // namespace tolk diff --git a/tolk/ast.h b/tolk/ast.h new file mode 100644 index 000000000..fd2b27cbf --- /dev/null +++ b/tolk/ast.h @@ -0,0 +1,632 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include +#include "platform-utils.h" +#include "src-file.h" +#include "type-expr.h" +#include "lexer.h" + +/* + * Here we introduce AST representation of Tolk source code. + * Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on. + * There was no way to perform any more or less semantic analysis. + * In Tolk, I've implemented parsing .tolk files into AST at first, and then converting this AST + * into legacy representation (see pipe-ast-to-legacy.cpp). + * In the future, more and more code analysis will be moved out of legacy to AST-level. + * + * From the user's point of view, all AST vertices are constant. All API is based on constancy. + * Even though fields of vertex structs are public, they can't be modified, since vertices are accepted by const ref. + * Generally, there are two ways of accepting a vertex: + * * AnyV (= const ASTNodeBase*) + * the only you can do with this vertex is to see v->type (ASTNodeType) and to cast via v->as() + * * V (= const Vertex*) + * a specific type of vertex, you can use its fields and methods + * There is one way of creating a vertex: + * * createV(...constructor_args) (= new Vertex(...)) + * vertices are currently created on a heap, without any custom memory arena, just allocated and never deleted + * + * Having AnyV and knowing its node_type, a call + * v->as() + * will return a typed vertex. + * There is also a shorthand v->try_as() which returns V or nullptr if types don't match: + * if (auto v_int = v->try_as()) + * Note, that there casts are NOT DYNAMIC. ASTNode is not a virtual base, it has no vtable. + * So, as<...>() is just a compile-time casting, without any runtime overhead. + * + * Note, that ASTNodeBase doesn't store any vector of children. That's why there is no way to loop over + * a random (unknown) vertex. Only a concrete Vertex stores its children (if any). + * Hence, to iterate over a custom vertex (e.g., a function body), one should inherit some kind of ASTVisitor. + * Besides read-only visiting, there is a "visit and replace" pattern. + * See ast-visitor.h and ast-replacer.h. + */ + +namespace tolk { + +enum ASTNodeType { + ast_empty, + ast_parenthesized_expr, + ast_tensor, + ast_tensor_square, + ast_identifier, + ast_int_const, + ast_string_const, + ast_bool_const, + ast_null_keyword, + ast_self_keyword, + ast_argument, + ast_argument_list, + ast_function_call, + ast_dot_method_call, + ast_global_var_declaration, + ast_constant_declaration, + ast_underscore, + ast_unary_operator, + ast_binary_operator, + ast_ternary_operator, + ast_return_statement, + ast_sequence, + ast_repeat_statement, + ast_while_statement, + ast_do_while_statement, + ast_throw_statement, + ast_assert_statement, + ast_try_catch_statement, + ast_if_statement, + ast_genericsT_item, + ast_genericsT_list, + ast_parameter, + ast_parameter_list, + ast_asm_body, + ast_annotation, + ast_function_declaration, + ast_local_var, + ast_local_vars_declaration, + ast_tolk_required_version, + ast_import_statement, + ast_tolk_file, +}; + +enum class AnnotationKind { + inline_simple, + inline_ref, + method_id, + pure, + deprecated, + unknown, +}; + +struct ASTNodeBase; + +using AnyV = const ASTNodeBase*; + +template +struct Vertex; + +template +using V = const Vertex*; + +#define createV new Vertex + +struct UnexpectedASTNodeType final : std::exception { + AnyV v_unexpected; + std::string message; + + explicit UnexpectedASTNodeType(AnyV v_unexpected, const char* place_where); + + const char* what() const noexcept override { + return message.c_str(); + } +}; + +// --------------------------------------------------------- + +struct ASTNodeBase { + const ASTNodeType type; + const SrcLocation loc; + + ASTNodeBase(ASTNodeType type, SrcLocation loc) : type(type), loc(loc) {} + + template + V as() const { +#ifdef TOLK_DEBUG + if (type != node_type) { + throw Fatal("v->as<...> to wrong node_type"); + } +#endif + return static_cast>(this); + } + + template + V try_as() const { + return type == node_type ? static_cast>(this) : nullptr; + } + + #ifdef TOLK_DEBUG + std::string to_debug_string() const { return to_debug_string(false); } + std::string to_debug_string(bool colored) const; + void debug_print() const; +#endif + + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD + void error(const std::string& err_msg) const; +}; + +struct ASTNodeLeaf : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + ASTNodeLeaf(ASTNodeType type, SrcLocation loc) + : ASTNodeBase(type, loc) {} +}; + +struct ASTNodeUnary : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyV child; + + ASTNodeUnary(ASTNodeType type, SrcLocation loc, AnyV child) + : ASTNodeBase(type, loc), child(child) {} +}; + +struct ASTNodeBinary : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyV lhs; + AnyV rhs; + + ASTNodeBinary(ASTNodeType type, SrcLocation loc, AnyV lhs, AnyV rhs) + : ASTNodeBase(type, loc), lhs(lhs), rhs(rhs) {} +}; + +struct ASTNodeVararg : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + std::vector children; + + ASTNodeVararg(ASTNodeType type, SrcLocation loc, std::vector children) + : ASTNodeBase(type, loc), children(std::move(children)) {} + +public: + int size() const { return static_cast(children.size()); } + bool empty() const { return children.empty(); } +}; + +// --------------------------------------------------------- + +template<> +struct Vertex final : ASTNodeLeaf { + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_empty, loc) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + AnyV get_expr() const { return child; } + + Vertex(SrcLocation loc, AnyV expr) + : ASTNodeUnary(ast_parenthesized_expr, loc, expr) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTNodeVararg(ast_tensor, loc, std::move(items)) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTNodeVararg(ast_tensor_square, loc, std::move(items)) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view name; + + Vertex(SrcLocation loc, std::string_view name) + : ASTNodeLeaf(ast_identifier, loc), name(name) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view int_val; + + Vertex(SrcLocation loc, std::string_view int_val) + : ASTNodeLeaf(ast_int_const, loc), int_val(int_val) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view str_val; + char modifier; + + Vertex(SrcLocation loc, std::string_view str_val, char modifier) + : ASTNodeLeaf(ast_string_const, loc), str_val(str_val), modifier(modifier) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + bool bool_val; + + Vertex(SrcLocation loc, bool bool_val) + : ASTNodeLeaf(ast_bool_const, loc), bool_val(bool_val) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_null_keyword, loc) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_self_keyword, loc) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + bool passed_as_mutate; // when called `f(mutate arg)`, not `f(arg)` + + AnyV get_expr() const { return child; } + + explicit Vertex(SrcLocation loc, AnyV expr, bool passed_as_mutate) + : ASTNodeUnary(ast_argument, loc, expr), passed_as_mutate(passed_as_mutate) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_arguments() const { return children; } + auto get_arg(int i) const { return children.at(i)->as(); } + + explicit Vertex(SrcLocation loc, std::vector arguments) + : ASTNodeVararg(ast_argument_list, loc, std::move(arguments)) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_called_f() const { return lhs; } + auto get_arg_list() const { return rhs->as(); } + int get_num_args() const { return rhs->as()->size(); } + auto get_arg(int i) const { return rhs->as()->get_arg(i); } + + Vertex(SrcLocation loc, AnyV lhs_f, V arguments) + : ASTNodeBinary(ast_function_call, loc, lhs_f, arguments) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + std::string_view method_name; + + AnyV get_obj() const { return lhs; } + auto get_arg_list() const { return rhs->as(); } + + Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, V arguments) + : ASTNodeBinary(ast_dot_method_call, loc, lhs, arguments), method_name(method_name) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + TypeExpr* declared_type; // may be nullptr + + auto get_identifier() const { return child->as(); } + + Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type) + : ASTNodeUnary(ast_global_var_declaration, loc, name_identifier), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + TypeExpr* declared_type; // may be nullptr + + auto get_identifier() const { return lhs->as(); } + AnyV get_init_value() const { return rhs; } + + Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type, AnyV init_value) + : ASTNodeBinary(ast_constant_declaration, loc, name_identifier, init_value), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_underscore, loc) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + std::string_view operator_name; + TokenType tok; + + AnyV get_rhs() const { return child; } + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV rhs) + : ASTNodeUnary(ast_unary_operator, loc, rhs), operator_name(operator_name), tok(tok) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + std::string_view operator_name; + TokenType tok; + + AnyV get_lhs() const { return lhs; } + AnyV get_rhs() const { return rhs; } + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV lhs, AnyV rhs) + : ASTNodeBinary(ast_binary_operator, loc, lhs, rhs), operator_name(operator_name), tok(tok) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + AnyV get_cond() const { return children.at(0); } + AnyV get_when_true() const { return children.at(1); } + AnyV get_when_false() const { return children.at(2); } + + Vertex(SrcLocation loc, AnyV cond, AnyV when_true, AnyV when_false) + : ASTNodeVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} +}; + +template<> +struct Vertex : ASTNodeUnary { + AnyV get_return_value() const { return child; } + + Vertex(SrcLocation loc, AnyV child) + : ASTNodeUnary(ast_return_statement, loc, child) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + SrcLocation loc_end; + + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, SrcLocation loc_end, std::vector items) + : ASTNodeVararg(ast_sequence, loc, std::move(items)), loc_end(loc_end) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_cond() const { return lhs; } + auto get_body() const { return rhs->as(); } + + Vertex(SrcLocation loc, AnyV cond, V body) + : ASTNodeBinary(ast_repeat_statement, loc, cond, body) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_cond() const { return lhs; } + auto get_body() const { return rhs->as(); } + + Vertex(SrcLocation loc, AnyV cond, V body) + : ASTNodeBinary(ast_while_statement, loc, cond, body) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + auto get_body() const { return lhs->as(); } + AnyV get_cond() const { return rhs; } + + Vertex(SrcLocation loc, V body, AnyV cond) + : ASTNodeBinary(ast_do_while_statement, loc, body, cond) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_thrown_code() const { return lhs; } + AnyV get_thrown_arg() const { return rhs; } // may be ast_empty + bool has_thrown_arg() const { return rhs->type != ast_empty; } + + Vertex(SrcLocation loc, AnyV thrown_code, AnyV thrown_arg) + : ASTNodeBinary(ast_throw_statement, loc, thrown_code, thrown_arg) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_cond() const { return lhs; } + AnyV get_thrown_code() const { return rhs; } + + Vertex(SrcLocation loc, AnyV cond, AnyV thrown_code) + : ASTNodeBinary(ast_assert_statement, loc, cond, thrown_code) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + auto get_try_body() const { return children.at(0)->as(); } + auto get_catch_expr() const { return children.at(1)->as(); } // (excNo, arg), always len 2 + auto get_catch_body() const { return children.at(2)->as(); } + + Vertex(SrcLocation loc, V try_body, V catch_expr, V catch_body) + : ASTNodeVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + bool is_ifnot; // if(!cond), to generate more optimal fift code + + AnyV get_cond() const { return children.at(0); } + auto get_if_body() const { return children.at(1)->as(); } + auto get_else_body() const { return children.at(2)->as(); } // always exists (when else omitted, it's empty) + + Vertex(SrcLocation loc, bool is_ifnot, AnyV cond, V if_body, V else_body) + : ASTNodeVararg(ast_if_statement, loc, {cond, if_body, else_body}), is_ifnot(is_ifnot) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates + std::string_view nameT; + + Vertex(SrcLocation loc, TypeExpr* created_type, std::string_view nameT) + : ASTNodeLeaf(ast_genericsT_item, loc), created_type(created_type), nameT(nameT) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + std::vector get_items() const { return children; } + auto get_item(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector genericsT_items) + : ASTNodeVararg(ast_genericsT_list, loc, std::move(genericsT_items)) {} + + int lookup_idx(std::string_view nameT) const; +}; + +template<> +struct Vertex final : ASTNodeUnary { + TypeExpr* param_type; + bool declared_as_mutate; // declared as `mutate param_name` + + auto get_identifier() const { return child->as(); } // for underscore, name is empty + bool is_underscore() const { return child->as()->name.empty(); } + + Vertex(SrcLocation loc, V name_identifier, TypeExpr* param_type, bool declared_as_mutate) + : ASTNodeUnary(ast_parameter, loc, name_identifier), param_type(param_type), declared_as_mutate(declared_as_mutate) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_params() const { return children; } + auto get_param(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector params) + : ASTNodeVararg(ast_parameter_list, loc, std::move(params)) {} + + int lookup_idx(std::string_view param_name) const; + int get_mutate_params_count() const; + bool has_mutate_params() const { return get_mutate_params_count() > 0; } +}; + +template<> +struct Vertex final : ASTNodeVararg { + std::vector arg_order; + std::vector ret_order; + + const std::vector& get_asm_commands() const { return children; } // ast_string_const[] + + Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) + : ASTNodeVararg(ast_asm_body, loc, std::move(asm_commands)), arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + AnnotationKind kind; + + auto get_arg() const { return child->as(); } + + static AnnotationKind parse_kind(std::string_view name); + + Vertex(SrcLocation loc, AnnotationKind kind, V arg_probably_empty) + : ASTNodeUnary(ast_annotation, loc, arg_probably_empty), kind(kind) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + TypeExpr* declared_type; + bool is_immutable; // declared via 'val', not 'var' + bool marked_as_redef; // var (existing_var redef, new_var: int) = ... + + AnyV get_identifier() const { return child; } // ast_identifier / ast_underscore + + Vertex(SrcLocation loc, AnyV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef) + : ASTNodeUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_lhs() const { return lhs; } // ast_local_var / ast_tensor / ast_tensor_square + AnyV get_assigned_val() const { return rhs; } + + Vertex(SrcLocation loc, AnyV lhs, AnyV assigned_val) + : ASTNodeBinary(ast_local_vars_declaration, loc, lhs, assigned_val) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + auto get_identifier() const { return children.at(0)->as(); } + int get_num_params() const { return children.at(1)->as()->size(); } + auto get_param_list() const { return children.at(1)->as(); } + auto get_param(int i) const { return children.at(1)->as()->get_param(i); } + AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body + + TypeExpr* ret_type = nullptr; + V genericsT_list = nullptr; + bool is_entrypoint = false; + bool marked_as_pure = false; + bool marked_as_builtin = false; + bool marked_as_get_method = false; + bool marked_as_inline = false; + bool marked_as_inline_ref = false; + bool accepts_self = false; + bool returns_self = false; + V method_id = nullptr; + + bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } + + Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body) + : ASTNodeVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + TokenType cmp_tok; + std::string_view semver; + + Vertex(SrcLocation loc, TokenType cmp_tok, std::string_view semver) + : ASTNodeLeaf(ast_tolk_required_version, loc), cmp_tok(cmp_tok), semver(semver) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + const SrcFile* file = nullptr; // assigned after includes have been resolved + + auto get_file_leaf() const { return child->as(); } + + std::string get_file_name() const { return static_cast(child->as()->str_val); } + + void mutate_set_src_file(const SrcFile* file) const; + + Vertex(SrcLocation loc, V file_name) + : ASTNodeUnary(ast_import_statement, loc, file_name) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const SrcFile* const file; + + const std::vector& get_toplevel_declarations() const { return children; } + + Vertex(const SrcFile* file, std::vector toplevel_declarations) + : ASTNodeVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)), file(file) {} +}; + +} // namespace tolk diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp new file mode 100644 index 000000000..d18cfa644 --- /dev/null +++ b/tolk/builtins.cpp @@ -0,0 +1,1242 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "compiler-state.h" + +namespace tolk { +using namespace std::literals::string_literals; + +/* + * + * SYMBOL VALUES + * + */ + +SymDef* define_builtin_func_impl(const std::string& name, SymValAsmFunc* func_val) { + sym_idx_t name_idx = G.symbols.lookup_add(name); + SymDef* def = define_global_symbol(name_idx); + tolk_assert(!def->value); + + def->value = func_val; +#ifdef TOLK_DEBUG + def->value->sym_name = name; +#endif + return def; +} + +// given func_type = `(slice, int) -> slice` and func flags, create SymDef for parameters +// currently (see at the bottom) parameters of built-in functions are unnamed: +// built-in functions are created using a resulting type +static std::vector define_builtin_parameters(const TypeExpr* func_type, int func_flags) { + // `loadInt()`, `storeInt()`: they accept `self` and mutate it; no other options available in built-ins for now + bool is_mutate_self = func_flags & SymValFunc::flagHasMutateParams; + // func_type a map (params_type -> ret_type), probably surrounded by forall (internal representation of ) + TypeExpr* params_type = func_type->constr == TypeExpr::te_ForAll ? func_type->args[0]->args[0] : func_type->args[0]; + std::vector parameters; + + if (params_type->constr == TypeExpr::te_Tensor) { // multiple parameters: it's a tensor + parameters.reserve(params_type->args.size()); + for (int i = 0; i < static_cast(params_type->args.size()); ++i) { + SymDef* sym_def = define_parameter(i, {}); + SymValVariable* sym_val = new SymValVariable(i, params_type->args[i]); + if (i == 0 && is_mutate_self) { + sym_val->flags |= SymValVariable::flagMutateParameter; + } + sym_def->value = sym_val; + parameters.emplace_back(sym_def); + } + } else { // single parameter + SymDef* sym_def = define_parameter(0, {}); + SymValVariable* sym_val = new SymValVariable(0, params_type); + if (is_mutate_self) { + sym_val->flags |= SymValVariable::flagMutateParameter; + } + sym_def->value = sym_val; + parameters.emplace_back(sym_def); + } + + return parameters; +} + +static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) { + return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction)); +} + +static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) { + return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, make_simple_compile(macro), flags | SymValFunc::flagBuiltinFunction)); +} + +static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags, + std::initializer_list arg_order, std::initializer_list ret_order) { + return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction, arg_order, ret_order)); +} + +bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, + SrcLocation where) const { + if (simple_compile) { + return dest.append(simple_compile(out, in, where)); + } else if (ext_compile) { + return ext_compile(dest, out, in); + } else { + return false; + } +} + +/* + * + * DEFINE BUILT-IN FUNCTIONS + * + */ + +int emulate_negate(int a) { + int f = VarDescr::_Pos | VarDescr::_Neg; + if ((a & f) && (~a & f)) { + a ^= f; + } + return a; +} + +int emulate_add(int a, int b) { + if (b & VarDescr::_Zero) { + return a; + } else if (a & VarDescr::_Zero) { + return b; + } + int u = a & b, v = a | b; + int r = VarDescr::_Int; + int t = u & (VarDescr::_Pos | VarDescr::_Neg); + if (v & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet addition always returns finite results! + r |= t | VarDescr::_Finite; + if (t) { + r |= v & VarDescr::_NonZero; + } + r |= v & VarDescr::_Nan; + if (u & (VarDescr::_Odd | VarDescr::_Even)) { + r |= VarDescr::_Even; + } else if (!(~v & (VarDescr::_Odd | VarDescr::_Even))) { + r |= VarDescr::_Odd | VarDescr::_NonZero; + } + return r; +} + +int emulate_sub(int a, int b) { + return emulate_add(a, emulate_negate(b)); +} + +int emulate_mul(int a, int b) { + if ((b & VarDescr::ConstOne) == VarDescr::ConstOne) { + return a; + } else if ((a & VarDescr::ConstOne) == VarDescr::ConstOne) { + return b; + } + int u = a & b, v = a | b; + int r = VarDescr::_Int; + if (v & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet multiplication always yields finite results, if any + r |= VarDescr::_Finite; + if (v & VarDescr::_Zero) { + // non-quiet multiplication + // the result is zero, if any result at all + return VarDescr::ConstZero; + } + if (u & (VarDescr::_Pos | VarDescr::_Neg)) { + r |= VarDescr::_Pos; + } else if (!(~v & (VarDescr::_Pos | VarDescr::_Neg))) { + r |= VarDescr::_Neg; + } + r |= v & VarDescr::_Even; + r |= u & (VarDescr::_Odd | VarDescr::_NonZero); + return r; +} + +int emulate_bitwise_and(int a, int b) { + int both = a & b, any = a | b; + int r = VarDescr::_Int; + if (any & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + r |= VarDescr::_Finite; + if (any & VarDescr::_Zero) { + return VarDescr::ConstZero; + } + r |= both & (VarDescr::_Even | VarDescr::_Odd); + if (both & VarDescr::_Odd) { + r |= VarDescr::_NonZero; + } + return r; +} + +int emulate_bitwise_or(int a, int b) { + if (b & VarDescr::_Zero) { + return a; + } else if (a & VarDescr::_Zero) { + return b; + } + int both = a & b, any = a | b; + int r = VarDescr::_Int; + if (any & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + r |= VarDescr::_Finite; + r |= any & VarDescr::_NonZero; + r |= any & VarDescr::_Odd; + r |= both & VarDescr::_Even; + return r; +} + +int emulate_bitwise_xor(int a, int b) { + if (b & VarDescr::_Zero) { + return a; + } else if (a & VarDescr::_Zero) { + return b; + } + int both = a & b, any = a | b; + int r = VarDescr::_Int; + if (any & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + r |= VarDescr::_Finite; + r |= both & VarDescr::_Even; + if (both & VarDescr::_Odd) { + r |= VarDescr::_Even; + } + return r; +} + +int emulate_bitwise_not(int a) { + if ((a & VarDescr::ConstZero) == VarDescr::ConstZero) { + return VarDescr::ConstTrue; + } + if ((a & VarDescr::ConstTrue) == VarDescr::ConstTrue) { + return VarDescr::ConstZero; + } + int a2 = a; + int f = VarDescr::_Even | VarDescr::_Odd; + if ((a2 & f) && (~a2 & f)) { + a2 ^= f; + } + a2 &= ~(VarDescr::_Zero | VarDescr::_NonZero | VarDescr::_Pos | VarDescr::_Neg); + if ((a & VarDescr::_Neg) && (a & VarDescr::_NonZero)) { + a2 |= VarDescr::_Pos; + } + if (a & VarDescr::_Pos) { + a2 |= VarDescr::_Neg; + } + return a2; +} + +int emulate_lshift(int a, int b) { + if (((a | b) & VarDescr::_Nan) || !(~b & (VarDescr::_Neg | VarDescr::_NonZero))) { + return VarDescr::_Int | VarDescr::_Nan; + } + if (b & VarDescr::_Zero) { + return a; + } + int t = ((b & VarDescr::_NonZero) ? VarDescr::_Even : 0); + t |= b & VarDescr::_Finite; + return emulate_mul(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | t); +} + +int emulate_div(int a, int b) { + if ((b & VarDescr::ConstOne) == VarDescr::ConstOne) { + return a; + } else if ((b & VarDescr::ConstOne) == VarDescr::ConstOne) { + return emulate_negate(a); + } + if (b & VarDescr::_Zero) { + return VarDescr::_Int | VarDescr::_Nan; + } + int u = a & b, v = a | b; + int r = VarDescr::_Int; + if (v & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet division always yields finite results, if any + r |= VarDescr::_Finite; + if (a & VarDescr::_Zero) { + // non-quiet division + // the result is zero, if any result at all + return VarDescr::ConstZero; + } + if (u & (VarDescr::_Pos | VarDescr::_Neg)) { + r |= VarDescr::_Pos; + } else if (!(~v & (VarDescr::_Pos | VarDescr::_Neg))) { + r |= VarDescr::_Neg; + } + return r; +} + +int emulate_rshift(int a, int b) { + if (((a | b) & VarDescr::_Nan) || !(~b & (VarDescr::_Neg | VarDescr::_NonZero))) { + return VarDescr::_Int | VarDescr::_Nan; + } + if (b & VarDescr::_Zero) { + return a; + } + int t = ((b & VarDescr::_NonZero) ? VarDescr::_Even : 0); + t |= b & VarDescr::_Finite; + return emulate_div(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | t); +} + +int emulate_mod(int a, int b, int round_mode = -1) { + if ((b & VarDescr::ConstOne) == VarDescr::ConstOne) { + return VarDescr::ConstZero; + } + if (b & VarDescr::_Zero) { + return VarDescr::_Int | VarDescr::_Nan; + } + int r = VarDescr::_Int; + if ((a | b) & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet division always yields finite results, if any + r |= VarDescr::_Finite; + if (a & VarDescr::_Zero) { + // non-quiet division + // the result is zero, if any result at all + return VarDescr::ConstZero; + } + if (round_mode < 0) { + r |= b & (VarDescr::_Pos | VarDescr::_Neg); + } else if (round_mode > 0) { + r |= emulate_negate(b) & (VarDescr::_Pos | VarDescr::_Neg); + } + if (b & VarDescr::_Even) { + r |= a & (VarDescr::_Even | VarDescr::_Odd); + } + return r; +} + +bool VarDescr::always_less(const VarDescr& other) const { + if (is_int_const() && other.is_int_const()) { + return int_const < other.int_const; + } + return (always_nonpos() && other.always_pos()) || (always_neg() && other.always_nonneg()); +} + +bool VarDescr::always_leq(const VarDescr& other) const { + if (is_int_const() && other.is_int_const()) { + return int_const <= other.int_const; + } + return always_nonpos() && other.always_nonneg(); +} + +bool VarDescr::always_greater(const VarDescr& other) const { + return other.always_less(*this); +} + +bool VarDescr::always_geq(const VarDescr& other) const { + return other.always_leq(*this); +} + +bool VarDescr::always_equal(const VarDescr& other) const { + return is_int_const() && other.is_int_const() && *int_const == *other.int_const; +} + +bool VarDescr::always_neq(const VarDescr& other) const { + if (is_int_const() && other.is_int_const()) { + return *int_const != *other.int_const; + } + return always_greater(other) || always_less(other) || (always_even() && other.always_odd()) || + (always_odd() && other.always_even()); +} + +AsmOp exec_op(std::string op) { + return AsmOp::Custom(op); +} + +AsmOp exec_op(std::string op, int args, int retv = 1) { + return AsmOp::Custom(op, args, retv); +} + +AsmOp exec_arg_op(std::string op, long long arg) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str()); +} + +AsmOp exec_arg_op(std::string op, long long arg, int args, int retv) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str(), args, retv); +} + +AsmOp exec_arg_op(std::string op, td::RefInt256 arg) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str()); +} + +AsmOp exec_arg_op(std::string op, td::RefInt256 arg, int args, int retv) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str(), args, retv); +} + +AsmOp exec_arg2_op(std::string op, long long imm1, long long imm2, int args, int retv) { + std::ostringstream os; + os << imm1 << ' ' << imm2 << ' ' << op; + return AsmOp::Custom(os.str(), args, retv); +} + +AsmOp push_const(td::RefInt256 x) { + return AsmOp::IntConst(std::move(x)); +} + +AsmOp compile_add(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const + y.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_add(x.val, y.val); + if (y.is_int_const() && y.int_const->signed_fits_bits(8)) { + y.unused(); + if (y.always_zero()) { + return AsmOp::Nop(); + } + if (*y.int_const == 1) { + return exec_op("INC", 1); + } + if (*y.int_const == -1) { + return exec_op("DEC", 1); + } + return exec_arg_op("ADDCONST", y.int_const, 1); + } + if (x.is_int_const() && x.int_const->signed_fits_bits(8)) { + x.unused(); + if (x.always_zero()) { + return AsmOp::Nop(); + } + if (*x.int_const == 1) { + return exec_op("INC", 1); + } + if (*x.int_const == -1) { + return exec_op("DEC", 1); + } + return exec_arg_op("ADDCONST", x.int_const, 1); + } + return exec_op("ADD", 2); +} + +AsmOp compile_sub(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const - y.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_sub(x.val, y.val); + if (y.is_int_const() && (-y.int_const)->signed_fits_bits(8)) { + y.unused(); + if (y.always_zero()) { + return {}; + } + if (*y.int_const == 1) { + return exec_op("DEC", 1); + } + if (*y.int_const == -1) { + return exec_op("INC", 1); + } + return exec_arg_op("ADDCONST", -y.int_const, 1); + } + if (x.always_zero()) { + x.unused(); + return exec_op("NEGATE", 1); + } + return exec_op("SUB", 2); +} + +AsmOp compile_unary_minus(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(-x.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + return push_const(r.int_const); + } + r.val = emulate_negate(x.val); + return exec_op("NEGATE", 1); +} + +AsmOp compile_unary_plus(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(x.int_const); + x.unused(); + return push_const(r.int_const); + } + r.val = x.val; + return AsmOp::Nop(); +} + +AsmOp compile_logical_not(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(x.int_const == 0 ? -1 : 0); + x.unused(); + return push_const(r.int_const); + } + r.val = VarDescr::ValBool; + return exec_op("0 EQINT", 1); +} + +AsmOp compile_bitwise_and(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const & y.int_const); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_bitwise_and(x.val, y.val); + return exec_op("AND", 2); +} + +AsmOp compile_bitwise_or(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const | y.int_const); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_bitwise_or(x.val, y.val); + return exec_op("OR", 2); +} + +AsmOp compile_bitwise_xor(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const ^ y.int_const); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_bitwise_xor(x.val, y.val); + return exec_op("XOR", 2); +} + +AsmOp compile_bitwise_not(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(~x.int_const); + x.unused(); + return push_const(r.int_const); + } + r.val = emulate_bitwise_not(x.val); + return exec_op("NOT", 1); +} + +AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where) { + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const * y.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_mul(x.val, y.val); + if (y.is_int_const()) { + int k = is_pos_pow2(y.int_const); + if (y.int_const->signed_fits_bits(8) && k < 0) { + y.unused(); + if (y.always_zero() && x.always_finite()) { + // dubious optimization: NaN * 0 = ? + r.set_const(y.int_const); + x.unused(); + return push_const(r.int_const); + } + if (*y.int_const == 1 && x.always_finite()) { + return AsmOp::Nop(); + } + if (*y.int_const == -1) { + return exec_op("NEGATE", 1); + } + return exec_arg_op("MULCONST", y.int_const, 1); + } + if (k > 0) { + y.unused(); + return exec_arg_op("LSHIFT#", k, 1); + } + if (k == 0) { + y.unused(); + return AsmOp::Nop(); + } + } + if (x.is_int_const()) { + int k = is_pos_pow2(x.int_const); + if (x.int_const->signed_fits_bits(8) && k < 0) { + x.unused(); + if (x.always_zero() && y.always_finite()) { + // dubious optimization: NaN * 0 = ? + r.set_const(x.int_const); + y.unused(); + return push_const(r.int_const); + } + if (*x.int_const == 1 && y.always_finite()) { + return AsmOp::Nop(); + } + if (*x.int_const == -1) { + return exec_op("NEGATE", 1); + } + return exec_arg_op("MULCONST", x.int_const, 1); + } + if (k > 0) { + x.unused(); + return exec_arg_op("LSHIFT#", k, 1); + } + if (k == 0) { + x.unused(); + return AsmOp::Nop(); + } + } + return exec_op("MUL", 2); +} + +AsmOp compile_mul(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 2); + return compile_mul_internal(res[0], args[0], args[1], where); +} + +AsmOp compile_lshift(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (y.is_int_const()) { + auto yv = y.int_const->to_long(); + if (yv < 0 || yv > 256) { + throw ParseError(where, "lshift argument is out of range"); + } else if (x.is_int_const()) { + r.set_const(x.int_const << (int)yv); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + } + r.val = emulate_lshift(x.val, y.val); + if (y.is_int_const()) { + int k = (int)(y.int_const->to_long()); + if (!k /* && x.always_finite() */) { + // dubious optimization: what if x=NaN ? + y.unused(); + return AsmOp::Nop(); + } + y.unused(); + return exec_arg_op("LSHIFT#", k, 1); + } + if (x.is_int_const()) { + auto xv = x.int_const->to_long(); + if (xv == 1) { + x.unused(); + return exec_op("POW2", 1); + } + if (xv == -1) { + x.unused(); + return exec_op("-1 PUSHINT SWAP LSHIFT", 1); + } + } + return exec_op("LSHIFT", 2); +} + +AsmOp compile_rshift(std::vector& res, std::vector& args, SrcLocation where, + int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (y.is_int_const()) { + auto yv = y.int_const->to_long(); + if (yv < 0 || yv > 256) { + throw ParseError(where, "rshift argument is out of range"); + } else if (x.is_int_const()) { + r.set_const(td::rshift(x.int_const, (int)yv, round_mode)); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + } + r.val = emulate_rshift(x.val, y.val); + std::string rshift = (round_mode < 0 ? "RSHIFT" : (round_mode ? "RSHIFTC" : "RSHIFTR")); + if (y.is_int_const()) { + int k = (int)(y.int_const->to_long()); + if (!k /* && x.always_finite() */) { + // dubious optimization: what if x=NaN ? + y.unused(); + return AsmOp::Nop(); + } + y.unused(); + return exec_arg_op(rshift + "#", k, 1); + } + return exec_op(rshift, 2); +} + +AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where, int round_mode) { + if (x.is_int_const() && y.is_int_const()) { + r.set_const(div(x.int_const, y.int_const, round_mode)); + if (!r.int_const->is_valid()) { + throw ParseError(where, *y.int_const == 0 ? "division by zero" : "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_div(x.val, y.val); + if (y.is_int_const()) { + if (*y.int_const == 0) { + throw ParseError(where, "division by zero"); + } + if (*y.int_const == 1 && x.always_finite()) { + y.unused(); + return AsmOp::Nop(); + } + if (*y.int_const == -1) { + y.unused(); + return exec_op("NEGATE", 1); + } + int k = is_pos_pow2(y.int_const); + if (k > 0) { + y.unused(); + std::string op = "RSHIFT"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_arg_op(op + '#', k, 1); + } + } + std::string op = "DIV"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_op(op, 2); +} + +AsmOp compile_div(std::vector& res, std::vector& args, SrcLocation where, int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 2); + return compile_div_internal(res[0], args[0], args[1], where, round_mode); +} + +AsmOp compile_mod(std::vector& res, std::vector& args, SrcLocation where, + int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(mod(x.int_const, y.int_const, round_mode)); + if (!r.int_const->is_valid()) { + throw ParseError(where, *y.int_const == 0 ? "division by zero" : "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_mod(x.val, y.val); + if (y.is_int_const()) { + if (*y.int_const == 0) { + throw ParseError(where, "division by zero"); + } + if ((*y.int_const == 1 || *y.int_const == -1) && x.always_finite()) { + x.unused(); + y.unused(); + r.set_const(td::zero_refint()); + return push_const(r.int_const); + } + int k = is_pos_pow2(y.int_const); + if (k > 0) { + y.unused(); + std::string op = "MODPOW2"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_arg_op(op + '#', k, 1); + } + } + std::string op = "MOD"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_op(op, 2); +} + +AsmOp compile_muldiv(std::vector& res, std::vector& args, SrcLocation where, + int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 3); + VarDescr &r = res[0], &x = args[0], &y = args[1], &z = args[2]; + if (x.is_int_const() && y.is_int_const() && z.is_int_const()) { + r.set_const(muldiv(x.int_const, y.int_const, z.int_const, round_mode)); + if (!r.int_const->is_valid()) { + throw ParseError(where, *z.int_const == 0 ? "division by zero" : "integer overflow"); + } + x.unused(); + y.unused(); + z.unused(); + return push_const(r.int_const); + } + if (x.always_zero() || y.always_zero()) { + // dubious optimization for z=0... + x.unused(); + y.unused(); + z.unused(); + r.set_const(td::make_refint(0)); + return push_const(r.int_const); + } + char c = (round_mode < 0) ? 0 : (round_mode > 0 ? 'C' : 'R'); + r.val = emulate_div(emulate_mul(x.val, y.val), z.val); + if (z.is_int_const()) { + if (*z.int_const == 0) { + throw ParseError(where, "division by zero"); + } + if (*z.int_const == 1) { + z.unused(); + return compile_mul_internal(r, x, y, where); + } + } + if (y.is_int_const() && *y.int_const == 1) { + y.unused(); + return compile_div_internal(r, x, z, where, round_mode); + } + if (x.is_int_const() && *x.int_const == 1) { + x.unused(); + return compile_div_internal(r, y, z, where, round_mode); + } + if (z.is_int_const()) { + int k = is_pos_pow2(z.int_const); + if (k > 0) { + z.unused(); + std::string op = "MULRSHIFT"; + if (c) { + op += c; + } + return exec_arg_op(op + '#', k, 2); + } + } + if (y.is_int_const()) { + int k = is_pos_pow2(y.int_const); + if (k > 0) { + y.unused(); + std::string op = "LSHIFT#DIV"; + if (c) { + op += c; + } + return exec_arg_op(op, k, 2); + } + } + if (x.is_int_const()) { + int k = is_pos_pow2(x.int_const); + if (k > 0) { + x.unused(); + std::string op = "LSHIFT#DIV"; + if (c) { + op += c; + } + return exec_arg_op(op, k, 2); + } + } + std::string op = "MULDIV"; + if (c) { + op += c; + } + return exec_op(op, 3); +} + +int compute_compare(td::RefInt256 x, td::RefInt256 y, int mode) { + int s = td::cmp(x, y); + if (mode == 7) { + return s; + } else { + return -((mode >> (1 - s)) & 1); + } +} + +// return value: +// 4 -> constant 1 +// 2 -> constant 0 +// 1 -> constant -1 +// 3 -> 0 or -1 +int compute_compare(const VarDescr& x, const VarDescr& y, int mode) { + switch (mode) { + case 1: // > + return x.always_greater(y) ? 1 : (x.always_leq(y) ? 2 : 3); + case 2: // = + return x.always_equal(y) ? 1 : (x.always_neq(y) ? 2 : 3); + case 3: // >= + return x.always_geq(y) ? 1 : (x.always_less(y) ? 2 : 3); + case 4: // < + return x.always_less(y) ? 1 : (x.always_geq(y) ? 2 : 3); + case 5: // <> + return x.always_neq(y) ? 1 : (x.always_equal(y) ? 2 : 3); + case 6: // <= + return x.always_leq(y) ? 1 : (x.always_greater(y) ? 2 : 3); + case 7: // <=> + return x.always_less(y) + ? 1 + : (x.always_equal(y) + ? 2 + : (x.always_greater(y) + ? 4 + : (x.always_leq(y) ? 3 : (x.always_geq(y) ? 6 : (x.always_neq(y) ? 5 : 7))))); + default: + return 7; + } +} + +AsmOp compile_cmp_int(std::vector& res, std::vector& args, int mode) { + tolk_assert(mode >= 1 && mode <= 7); + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + int v = compute_compare(x.int_const, y.int_const, mode); + r.set_const(v); + x.unused(); + y.unused(); + return mode == 7 ? push_const(r.int_const) : AsmOp::BoolConst(v != 0); + } + int v = compute_compare(x, y, mode); + // std::cerr << "compute_compare(" << x << ", " << y << ", " << mode << ") = " << v << std::endl; + tolk_assert(v); + if (!(v & (v - 1))) { + r.set_const(v - (v >> 2) - 2); + x.unused(); + y.unused(); + return mode == 7 ? push_const(r.int_const) : AsmOp::BoolConst(v & 1); + } + r.val = ~0; + if (v & 1) { + r.val &= VarDescr::ConstTrue; + } + if (v & 2) { + r.val &= VarDescr::ConstZero; + } + if (v & 4) { + r.val &= VarDescr::ConstOne; + } + // std::cerr << "result: " << r << std::endl; + static const char* cmp_int_names[] = {"", "GTINT", "EQINT", "GTINT", "LESSINT", "NEQINT", "LESSINT"}; + static const char* cmp_names[] = {"", "GREATER", "EQUAL", "GEQ", "LESS", "NEQ", "LEQ", "CMP"}; + static int cmp_int_delta[] = {0, 0, 0, -1, 0, 0, 1}; + if (mode != 7) { + if (y.is_int_const() && y.int_const >= -128 && y.int_const <= 127) { + y.unused(); + return exec_arg_op(cmp_int_names[mode], y.int_const + cmp_int_delta[mode], 1); + } + if (x.is_int_const() && x.int_const >= -128 && x.int_const <= 127) { + x.unused(); + mode = ((mode & 4) >> 2) | (mode & 2) | ((mode & 1) << 2); + return exec_arg_op(cmp_int_names[mode], x.int_const + cmp_int_delta[mode], 1); + } + } + return exec_op(cmp_names[mode], 2); +} + +AsmOp compile_throw(std::vector& res, std::vector& args, SrcLocation) { + tolk_assert(res.empty() && args.size() == 1); + VarDescr& x = args[0]; + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return exec_arg_op("THROW", x.int_const, 0, 0); + } else { + return exec_op("THROWANY", 1, 0); + } +} + +AsmOp compile_throw_if_unless(std::vector& res, std::vector& args, SrcLocation) { + tolk_assert(res.empty() && args.size() == 3); + VarDescr &x = args[0], &y = args[1], &z = args[2]; + if (!z.always_true() && !z.always_false()) { + throw Fatal("invalid usage of built-in symbol"); + } + bool mode = z.always_true(); + z.unused(); + std::string suff = (mode ? "IF" : "IFNOT"); + bool skip_cond = false; + if (y.always_true() || y.always_false()) { + y.unused(); + skip_cond = true; + if (y.always_true() != mode) { + x.unused(); + return AsmOp::Nop(); + } + } + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return skip_cond ? exec_arg_op("THROW", x.int_const, 0, 0) : exec_arg_op("THROW"s + suff, x.int_const, 1, 0); + } else { + return skip_cond ? exec_op("THROWANY", 1, 0) : exec_op("THROWANY"s + suff, 2, 0); + } +} + +AsmOp compile_throw_arg(std::vector& res, std::vector& args, SrcLocation) { + tolk_assert(res.empty() && args.size() == 2); + VarDescr &x = args[1]; + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return exec_arg_op("THROWARG", x.int_const, 1, 0); + } else { + return exec_op("THROWARGANY", 2, 0); + } +} + +AsmOp compile_bool_const(std::vector& res, std::vector& args, bool val) { + tolk_assert(res.size() == 1 && args.empty()); + VarDescr& r = res[0]; + r.set_const(val ? -1 : 0); + return AsmOp::Const(val ? "TRUE" : "FALSE"); +} + +// fun loadInt (mutate s: slice, len: int): int asm(s len -> 1 0) "LDIX"; +// fun loadUint (mutate s: slice, len: int): int asm( -> 1 0) "LDUX"; +// fun preloadInt (s: slice, len: int): int asm "PLDIX"; +// fun preloadUint(s: slice, len: int): int asm "PLDUX"; +AsmOp compile_fetch_int(std::vector& res, std::vector& args, bool fetch, bool sgnd) { + tolk_assert(args.size() == 2 && res.size() == 1 + (unsigned)fetch); + auto &y = args[1], &r = res.back(); + r.val = (sgnd ? VarDescr::FiniteInt : VarDescr::FiniteUInt); + int v = -1; + if (y.is_int_const() && y.int_const >= 0 && y.int_const <= 256) { + v = (int)y.int_const->to_long(); + if (!v) { + r.val = VarDescr::ConstZero; + } + if (v == 1) { + r.val = (sgnd ? VarDescr::ValBool : VarDescr::ValBit); + } + if (v > 0) { + y.unused(); + return exec_arg_op((fetch ? "LD"s : "PLD"s) + (sgnd ? 'I' : 'U'), v, 1, 1 + (unsigned)fetch); + } + } + return exec_op((fetch ? "LD"s : "PLD"s) + (sgnd ? "IX" : "UX"), 2, 1 + (unsigned)fetch); +} + +// fun storeInt (mutate self: builder, x: int, len: int): self asm(x b len) "STIX"; +// fun storeUint (mutate self: builder, x: int, len: int): self asm(x b len) "STUX"; +AsmOp compile_store_int(std::vector& res, std::vector& args, bool sgnd) { + tolk_assert(args.size() == 3 && res.size() == 1); + auto& z = args[2]; + if (z.is_int_const() && z.int_const > 0 && z.int_const <= 256) { + z.unused(); + return exec_arg_op("ST"s + (sgnd ? 'I' : 'U'), z.int_const, 2, 1); + } + return exec_op("ST"s + (sgnd ? "IX" : "UX"), 3, 1); +} + +// fun loadBits (mutate self: slice, len: int): self asm(s len -> 1 0) "LDSLICEX" +// fun preloadBits(self: slice, len: int): slice asm(s len -> 1 0) "PLDSLICEX" +AsmOp compile_fetch_slice(std::vector& res, std::vector& args, bool fetch) { + tolk_assert(args.size() == 2 && res.size() == 1 + (unsigned)fetch); + auto& y = args[1]; + int v = -1; + if (y.is_int_const() && y.int_const > 0 && y.int_const <= 256) { + v = (int)y.int_const->to_long(); + if (v > 0) { + y.unused(); + return exec_arg_op(fetch ? "LDSLICE" : "PLDSLICE", v, 1, 1 + (unsigned)fetch); + } + } + return exec_op(fetch ? "LDSLICEX" : "PLDSLICEX", 2, 1 + (unsigned)fetch); +} + +// fun at(t: tuple, index: int): X asm "INDEXVAR"; +AsmOp compile_tuple_at(std::vector& res, std::vector& args, SrcLocation) { + tolk_assert(args.size() == 2 && res.size() == 1); + auto& y = args[1]; + if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) { + y.unused(); + return exec_arg_op("INDEX", y.int_const, 1, 1); + } + return exec_op("INDEXVAR", 2, 1); +} + +// fun __isNull(X arg): int +AsmOp compile_is_null(std::vector& res, std::vector& args, SrcLocation) { + tolk_assert(args.size() == 1 && res.size() == 1); + res[0].val = VarDescr::ValBool; + return exec_op("ISNULL", 1, 1); +} + + +void define_builtins() { + using namespace std::placeholders; + + TypeExpr* Unit = TypeExpr::new_unit(); + TypeExpr* Int = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* Slice = TypeExpr::new_atomic(TypeExpr::_Slice); + TypeExpr* Builder = TypeExpr::new_atomic(TypeExpr::_Builder); + TypeExpr* Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple); + TypeExpr* Int2 = TypeExpr::new_tensor({Int, Int}); + TypeExpr* Int3 = TypeExpr::new_tensor({Int, Int, Int}); + TypeExpr* TupleInt = TypeExpr::new_tensor({Tuple, Int}); + TypeExpr* SliceInt = TypeExpr::new_tensor({Slice, Int}); + TypeExpr* X = TypeExpr::new_var(0); + TypeExpr* arith_bin_op = TypeExpr::new_map(Int2, Int); + TypeExpr* arith_un_op = TypeExpr::new_map(Int, Int); + TypeExpr* impure_un_op = TypeExpr::new_map(Int, Unit); + TypeExpr* fetch_int_op_mutate = TypeExpr::new_map(SliceInt, SliceInt); + TypeExpr* prefetch_int_op = TypeExpr::new_map(SliceInt, Int); + TypeExpr* store_int_mutate = TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), TypeExpr::new_tensor({Builder, Unit})); + TypeExpr* fetch_slice_op_mutate = TypeExpr::new_map(SliceInt, TypeExpr::new_tensor({Slice, Slice})); + TypeExpr* prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice); + TypeExpr* throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); + + define_builtin_func("_+_", arith_bin_op, compile_add, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_-_", arith_bin_op, compile_sub, + SymValFunc::flagMarkedAsPure); + define_builtin_func("-_", arith_un_op, compile_unary_minus, + SymValFunc::flagMarkedAsPure); + define_builtin_func("+_", arith_un_op, compile_unary_plus, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_*_", arith_bin_op, compile_mul, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_<<_", arith_bin_op, compile_lshift, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("!_", arith_un_op, compile_logical_not, + SymValFunc::flagMarkedAsPure); + define_builtin_func("~_", arith_un_op, compile_bitwise_not, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_&_", arith_bin_op, compile_bitwise_and, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_|_", arith_bin_op, compile_bitwise_or, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_+=_", arith_bin_op, compile_add, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_-=_", arith_bin_op, compile_sub, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_*=_", arith_bin_op, compile_mul, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_<<=_", arith_bin_op, compile_lshift, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7), + SymValFunc::flagMarkedAsPure); + define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0), + SymValFunc::flagMarkedAsPure); + define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2), + SymValFunc::flagMarkedAsPure); + define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true), + SymValFunc::flagMarkedAsPure); + define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false), + SymValFunc::flagMarkedAsPure); + define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL"), + SymValFunc::flagMarkedAsPure); + define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null, + SymValFunc::flagMarkedAsPure); + define_builtin_func("__throw", impure_un_op, compile_throw, + 0); + define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, + 0); + define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), compile_throw_if_unless, + 0); + define_builtin_func("loadInt", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, true), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + define_builtin_func("loadUint", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, false), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + define_builtin_func("loadBits", fetch_slice_op_mutate, std::bind(compile_fetch_slice, _1, _2, true), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true), + SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false), + SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false), + SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + define_builtin_func("storeInt", store_int_mutate, std::bind(compile_store_int, _1, _2, true), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); + define_builtin_func("storeUint", store_int_mutate, std::bind(compile_store_int, _1, _2, false), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); + define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at, + SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + define_builtin_func("debugPrint", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + AsmOp::Custom("s0 DUMP DROP", 1, 1), + 0); + define_builtin_func("debugPrintString", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + AsmOp::Custom("STRDUMP DROP", 1, 1), + 0); + define_builtin_func("debugDumpStack", TypeExpr::new_map(Unit, Unit), + AsmOp::Custom("DUMPSTK", 0, 0), + 0); +} + +} // namespace tolk diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp new file mode 100644 index 000000000..9a90a3ed9 --- /dev/null +++ b/tolk/codegen.cpp @@ -0,0 +1,908 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "compiler-state.h" + +namespace tolk { + +/* + * + * GENERATE TVM STACK CODE + * + */ + +StackLayout Stack::vars() const { + StackLayout res; + res.reserve(s.size()); + for (auto x : s) { + res.push_back(x.first); + } + return res; +} + +int Stack::find(var_idx_t var, int from) const { + for (int i = from; i < depth(); i++) { + if (at(i).first == var) { + return i; + } + } + return -1; +} + +// finds var in [from .. to) +int Stack::find(var_idx_t var, int from, int to) const { + for (int i = from; i < depth() && i < to; i++) { + if (at(i).first == var) { + return i; + } + } + return -1; +} + +// finds var outside [from .. to) +int Stack::find_outside(var_idx_t var, int from, int to) const { + from = std::max(from, 0); + if (from >= to) { + return find(var); + } else { + int t = find(var, 0, from); + return t >= 0 ? t : find(var, to); + } +} + +int Stack::find_const(const_idx_t cst, int from) const { + for (int i = from; i < depth(); i++) { + if (at(i).second == cst) { + return i; + } + } + return -1; +} + +void Stack::forget_const() { + for (auto& vc : s) { + if (vc.second != not_const) { + vc.second = not_const; + } + } +} + +void Stack::issue_pop(int i) { + validate(i); + if (output_enabled()) { + o << AsmOp::Pop(i); + } + at(i) = get(0); + s.pop_back(); + modified(); +} + +void Stack::issue_push(int i) { + validate(i); + if (output_enabled()) { + o << AsmOp::Push(i); + } + s.push_back(get(i)); + modified(); +} + +void Stack::issue_xchg(int i, int j) { + validate(i); + validate(j); + if (i != j && get(i) != get(j)) { + if (output_enabled()) { + o << AsmOp::Xchg(i, j); + } + std::swap(at(i), at(j)); + modified(); + } +} + +int Stack::drop_vars_except(const VarDescrList& var_info, int excl_var) { + int dropped = 0, changes; + do { + changes = 0; + int n = depth(); + for (int i = 0; i < n; i++) { + var_idx_t idx = at(i).first; + if (((!var_info[idx] || var_info[idx]->is_unused()) && idx != excl_var) || find(idx, 0, i - 1) >= 0) { + // unneeded + issue_pop(i); + changes = 1; + break; + } + } + dropped += changes; + } while (changes); + return dropped; +} + +void Stack::show(int flags) { + std::ostringstream os; + for (auto i : s) { + os << ' '; + o.show_var_ext(os, i); + } + o << AsmOp::Comment(os.str()); + mode |= _Shown; +} + +void Stack::forget_var(var_idx_t idx) { + for (auto& x : s) { + if (x.first == idx) { + x = std::make_pair(_Garbage, not_const); + modified(); + } + } +} + +void Stack::push_new_var(var_idx_t idx) { + forget_var(idx); + s.emplace_back(idx, not_const); + modified(); +} + +void Stack::push_new_const(var_idx_t idx, const_idx_t cidx) { + forget_var(idx); + s.emplace_back(idx, cidx); + modified(); +} + +void Stack::assign_var(var_idx_t new_idx, var_idx_t old_idx) { + int i = find(old_idx); + tolk_assert(i >= 0 && "variable not found in stack"); + if (new_idx != old_idx) { + at(i).first = new_idx; + modified(); + } +} + +void Stack::do_copy_var(var_idx_t new_idx, var_idx_t old_idx) { + int i = find(old_idx); + tolk_assert(i >= 0 && "variable not found in stack"); + if (find(old_idx, i + 1) < 0) { + issue_push(i); + tolk_assert(at(0).first == old_idx); + } + assign_var(new_idx, old_idx); +} + +void Stack::enforce_state(const StackLayout& req_stack) { + int k = (int)req_stack.size(); + for (int i = 0; i < k; i++) { + var_idx_t x = req_stack[i]; + if (i < depth() && s[i].first == x) { + continue; + } + while (depth() > 0 && std::find(req_stack.cbegin(), req_stack.cend(), get(0).first) == req_stack.cend()) { + // current TOS entry is unused in req_stack, drop it + issue_pop(0); + } + int j = find(x); + if (j >= depth() - i) { + issue_push(j); + j = 0; + } + issue_xchg(j, depth() - i - 1); + tolk_assert(s[i].first == x); + } + while (depth() > k) { + issue_pop(0); + } + tolk_assert(depth() == k); + for (int i = 0; i < k; i++) { + tolk_assert(s[i].first == req_stack[i]); + } +} + +void Stack::merge_const(const Stack& req_stack) { + tolk_assert(s.size() == req_stack.s.size()); + for (std::size_t i = 0; i < s.size(); i++) { + tolk_assert(s[i].first == req_stack.s[i].first); + if (s[i].second != req_stack.s[i].second) { + s[i].second = not_const; + } + } +} + +void Stack::merge_state(const Stack& req_stack) { + enforce_state(req_stack.vars()); + merge_const(req_stack); +} + +void Stack::rearrange_top(const StackLayout& top, std::vector last) { + while (last.size() < top.size()) { + last.push_back(false); + } + int k = (int)top.size(); + for (int i = 0; i < k; i++) { + for (int j = i + 1; j < k; j++) { + if (top[i] == top[j]) { + last[i] = false; + break; + } + } + } + int ss = 0; + for (int i = 0; i < k; i++) { + if (last[i]) { + ++ss; + } + } + for (int i = 0; i < k; i++) { + var_idx_t x = top[i]; + // find s(j) containing x with j not in [ss, ss+i) + int j = find_outside(x, ss, ss + i); + if (last[i]) { + // rearrange x to be at s(ss-1) + issue_xchg(--ss, j); + tolk_assert(get(ss).first == x); + } else { + // create a new copy of x + issue_push(j); + issue_xchg(0, ss); + tolk_assert(get(ss).first == x); + } + } + tolk_assert(!ss); +} + +void Stack::rearrange_top(var_idx_t top, bool last) { + int i = find(top); + if (last) { + issue_xchg(0, i); + } else { + issue_push(i); + } + tolk_assert(get(0).first == top); +} + +bool Op::generate_code_step(Stack& stack) { + stack.opt_show(); + stack.drop_vars_except(var_info); + stack.opt_show(); + bool inline_func = stack.mode & Stack::_InlineFunc; + switch (cl) { + case _Nop: + case _Import: + return true; + case _Return: { + stack.enforce_state(left); + if (stack.o.retalt_ && (stack.mode & Stack::_NeedRetAlt)) { + stack.o << "RETALT"; + } + stack.opt_show(); + return false; + } + case _IntConst: { + auto p = next->var_info[left[0]]; + if (!p || p->is_unused()) { + return true; + } + auto cidx = stack.o.register_const(int_const); + int i = stack.find_const(cidx); + if (i < 0) { + stack.o << push_const(int_const); + stack.push_new_const(left[0], cidx); + } else { + tolk_assert(stack.at(i).second == cidx); + stack.do_copy_var(left[0], stack[i]); + } + return true; + } + case _SliceConst: { + auto p = next->var_info[left[0]]; + if (!p || p->is_unused()) { + return true; + } + stack.o << AsmOp::Const("x{" + str_const + "} PUSHSLICE"); + stack.push_new_var(left[0]); + return true; + } + case _GlobVar: + if (dynamic_cast(fun_ref->value)) { + bool used = false; + for (auto i : left) { + auto p = next->var_info[i]; + if (p && !p->is_unused()) { + used = true; + } + } + if (!used || disabled()) { + return true; + } + std::string name = G.symbols.get_name(fun_ref->sym_idx); + stack.o << AsmOp::Custom(name + " GETGLOB", 0, 1); + if (left.size() != 1) { + tolk_assert(left.size() <= 15); + stack.o << AsmOp::UnTuple((int)left.size()); + } + for (auto i : left) { + stack.push_new_var(i); + } + return true; + } else { + tolk_assert(left.size() == 1); + auto p = next->var_info[left[0]]; + if (!p || p->is_unused() || disabled()) { + return true; + } + stack.o << "CONT:<{"; + stack.o.indent(); + auto func = dynamic_cast(fun_ref->value); + if (func) { + // TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly) + std::vector args0, res; + TypeExpr::remove_indirect(func->sym_type); + tolk_assert(func->get_type()->is_map()); + auto wr = func->get_type()->args.at(0)->get_width(); + auto wl = func->get_type()->args.at(1)->get_width(); + tolk_assert(wl >= 0 && wr >= 0); + for (int i = 0; i < wl; i++) { + res.emplace_back(0); + } + for (int i = 0; i < wr; i++) { + args0.emplace_back(0); + } + func->compile(stack.o, res, args0, where); // compile res := f (args0) + } else { + std::string name = G.symbols.get_name(fun_ref->sym_idx); + stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + } + stack.o.undent(); + stack.o << "}>"; + stack.push_new_var(left.at(0)); + return true; + } + case _Let: { + tolk_assert(left.size() == right.size()); + int i = 0; + std::vector active; + active.reserve(left.size()); + for (std::size_t k = 0; k < left.size(); k++) { + var_idx_t y = left[k]; // "y" = "x" + auto p = next->var_info[y]; + active.push_back(p && !p->is_unused()); + } + for (std::size_t k = 0; k < left.size(); k++) { + if (!active[k]) { + continue; + } + var_idx_t x = right[k]; // "y" = "x" + bool is_last = true; + for (std::size_t l = k + 1; l < right.size(); l++) { + if (right[l] == x && active[l]) { + is_last = false; + } + } + if (is_last) { + auto info = var_info[x]; + is_last = (info && info->is_last()); + } + if (is_last) { + stack.assign_var(--i, x); + } else { + stack.do_copy_var(--i, x); + } + } + i = 0; + for (std::size_t k = 0; k < left.size(); k++) { + if (active[k]) { + stack.assign_var(left[k], --i); + } + } + return true; + } + case _Tuple: + case _UnTuple: { + if (disabled()) { + return true; + } + std::vector last; + for (var_idx_t x : right) { + last.push_back(var_info[x] && var_info[x]->is_last()); + } + stack.rearrange_top(right, std::move(last)); + stack.opt_show(); + int k = (int)stack.depth() - (int)right.size(); + tolk_assert(k >= 0); + if (cl == _Tuple) { + stack.o << AsmOp::Tuple((int)right.size()); + tolk_assert(left.size() == 1); + } else { + stack.o << AsmOp::UnTuple((int)left.size()); + tolk_assert(right.size() == 1); + } + stack.s.resize(k); + for (int i = 0; i < (int)left.size(); i++) { + stack.push_new_var(left.at(i)); + } + return true; + } + case _Call: + case _CallInd: { + if (disabled()) { + return true; + } + // fun_ref can be nullptr for Op::_CallInd (invoke a variable, not a function) + SymValFunc* func = (fun_ref ? dynamic_cast(fun_ref->value) : nullptr); + auto arg_order = (func ? func->get_arg_order() : nullptr); + auto ret_order = (func ? func->get_ret_order() : nullptr); + tolk_assert(!arg_order || arg_order->size() == right.size()); + tolk_assert(!ret_order || ret_order->size() == left.size()); + std::vector right1; + if (args.size()) { + tolk_assert(args.size() == right.size()); + for (int i = 0; i < (int)right.size(); i++) { + int j = arg_order ? arg_order->at(i) : i; + const VarDescr& arg = args.at(j); + if (!arg.is_unused()) { + tolk_assert(var_info[arg.idx] && !var_info[arg.idx]->is_unused()); + right1.push_back(arg.idx); + } + } + } else if (arg_order) { + for (int i = 0; i < (int)right.size(); i++) { + right1.push_back(right.at(arg_order->at(i))); + } + } else { + right1 = right; + } + std::vector last; + for (var_idx_t x : right1) { + last.push_back(var_info[x] && var_info[x]->is_last()); + } + stack.rearrange_top(right1, std::move(last)); + stack.opt_show(); + int k = (int)stack.depth() - (int)right1.size(); + tolk_assert(k >= 0); + for (int i = 0; i < (int)right1.size(); i++) { + if (stack.s[k + i].first != right1[i]) { + std::cerr << stack.o; + } + tolk_assert(stack.s[k + i].first == right1[i]); + } + auto exec_callxargs = [&](int args, int ret) { + if (args <= 15 && ret <= 15) { + stack.o << exec_arg2_op("CALLXARGS", args, ret, args + 1, ret); + } else { + tolk_assert(args <= 254 && ret <= 254); + stack.o << AsmOp::Const(PSTRING() << args << " PUSHINT"); + stack.o << AsmOp::Const(PSTRING() << ret << " PUSHINT"); + stack.o << AsmOp::Custom("CALLXVARARGS", args + 3, ret); + } + }; + if (cl == _CallInd) { + exec_callxargs((int)right.size() - 1, (int)left.size()); + } else if (auto asm_fv = dynamic_cast(fun_ref->value)) { + std::vector res; + res.reserve(left.size()); + for (var_idx_t i : left) { + res.emplace_back(i); + } + asm_fv->compile(stack.o, res, args, where); // compile res := f (args) + } else { + auto fv = dynamic_cast(fun_ref->value); + std::string name = G.symbols.get_name(fun_ref->sym_idx); + if (fv->is_inline() || fv->is_inline_ref()) { + stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); + } else if (fv->code && fv->code->require_callxargs) { + stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2); + exec_callxargs((int)right.size() + 1, (int)left.size()); + } else { + stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + } + } + stack.s.resize(k); + for (int i = 0; i < (int)left.size(); i++) { + int j = ret_order ? ret_order->at(i) : i; + stack.push_new_var(left.at(j)); + } + return true; + } + case _SetGlob: { + tolk_assert(fun_ref && dynamic_cast(fun_ref->value)); + std::vector last; + for (var_idx_t x : right) { + last.push_back(var_info[x] && var_info[x]->is_last()); + } + stack.rearrange_top(right, std::move(last)); + stack.opt_show(); + int k = (int)stack.depth() - (int)right.size(); + tolk_assert(k >= 0); + for (int i = 0; i < (int)right.size(); i++) { + if (stack.s[k + i].first != right[i]) { + std::cerr << stack.o; + } + tolk_assert(stack.s[k + i].first == right[i]); + } + if (right.size() > 1) { + stack.o << AsmOp::Tuple((int)right.size()); + } + if (!right.empty()) { + std::string name = G.symbols.get_name(fun_ref->sym_idx); + stack.o << AsmOp::Custom(name + " SETGLOB", 1, 0); + } + stack.s.resize(k); + return true; + } + case _If: { + if (block0->is_empty() && block1->is_empty()) { + return true; + } + if (!next->noreturn() && (block0->noreturn() != block1->noreturn())) { + stack.o.retalt_ = true; + } + var_idx_t x = left[0]; + stack.rearrange_top(x, var_info[x] && var_info[x]->is_last()); + tolk_assert(stack[0] == x); + stack.opt_show(); + stack.s.pop_back(); + stack.modified(); + if (inline_func && (block0->noreturn() || block1->noreturn())) { + bool is0 = block0->noreturn(); + Op* block_noreturn = is0 ? block0.get() : block1.get(); + Op* block_other = is0 ? block1.get() : block0.get(); + stack.mode &= ~Stack::_InlineFunc; + stack.o << (is0 ? "IF:<{" : "IFNOT:<{"); + stack.o.indent(); + Stack stack_copy{stack}; + block_noreturn->generate_code_all(stack_copy); + stack.o.undent(); + stack.o << "}>ELSE<{"; + stack.o.indent(); + block_other->generate_code_all(stack); + if (!block_other->noreturn()) { + next->generate_code_all(stack); + } + stack.o.undent(); + stack.o << "}>"; + return false; + } + if (block1->is_empty() || block0->is_empty()) { + bool is0 = block1->is_empty(); + Op* block = is0 ? block0.get() : block1.get(); + // if (left) block0; ... + // if (!left) block1; ... + if (block->noreturn()) { + stack.o << (is0 ? "IFJMP:<{" : "IFNOTJMP:<{"); + stack.o.indent(); + Stack stack_copy{stack}; + stack_copy.mode &= ~Stack::_InlineFunc; + stack_copy.mode |= next->noreturn() ? 0 : Stack::_NeedRetAlt; + block->generate_code_all(stack_copy); + stack.o.undent(); + stack.o << "}>"; + return true; + } + stack.o << (is0 ? "IF:<{" : "IFNOT:<{"); + stack.o.indent(); + Stack stack_copy{stack}, stack_target{stack}; + stack_target.disable_output(); + stack_target.drop_vars_except(next->var_info); + stack_copy.mode &= ~Stack::_InlineFunc; + block->generate_code_all(stack_copy); + stack_copy.drop_vars_except(var_info); + stack_copy.opt_show(); + if ((is0 && stack_copy == stack) || (!is0 && stack_copy.vars() == stack.vars())) { + stack.o.undent(); + stack.o << "}>"; + if (!is0) { + stack.merge_const(stack_copy); + } + return true; + } + // stack_copy.drop_vars_except(next->var_info); + stack_copy.enforce_state(stack_target.vars()); + stack_copy.opt_show(); + if (stack_copy.vars() == stack.vars()) { + stack.o.undent(); + stack.o << "}>"; + stack.merge_const(stack_copy); + return true; + } + stack.o.undent(); + stack.o << "}>ELSE<{"; + stack.o.indent(); + stack.merge_state(stack_copy); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + return true; + } + if (block0->noreturn() || block1->noreturn()) { + bool is0 = block0->noreturn(); + Op* block_noreturn = is0 ? block0.get() : block1.get(); + Op* block_other = is0 ? block1.get() : block0.get(); + stack.o << (is0 ? "IFJMP:<{" : "IFNOTJMP:<{"); + stack.o.indent(); + Stack stack_copy{stack}; + stack_copy.mode &= ~Stack::_InlineFunc; + stack_copy.mode |= (block_other->noreturn() || next->noreturn()) ? 0 : Stack::_NeedRetAlt; + block_noreturn->generate_code_all(stack_copy); + stack.o.undent(); + stack.o << "}>"; + block_other->generate_code_all(stack); + return !block_other->noreturn(); + } + stack.o << "IF:<{"; + stack.o.indent(); + Stack stack_copy{stack}; + stack_copy.mode &= ~Stack::_InlineFunc; + block0->generate_code_all(stack_copy); + stack_copy.drop_vars_except(next->var_info); + stack_copy.opt_show(); + stack.o.undent(); + stack.o << "}>ELSE<{"; + stack.o.indent(); + stack.mode &= ~Stack::_InlineFunc; + block1->generate_code_all(stack); + stack.merge_state(stack_copy); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + return true; + } + case _Repeat: { + var_idx_t x = left[0]; + //stack.drop_vars_except(block0->var_info, x); + stack.rearrange_top(x, var_info[x] && var_info[x]->is_last()); + tolk_assert(stack[0] == x); + stack.opt_show(); + stack.s.pop_back(); + stack.modified(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + if (true || !next->is_empty()) { + stack.o << "REPEAT:<{"; + stack.o.indent(); + stack.forget_const(); + if (block0->noreturn()) { + Stack stack_copy{stack}; + StackLayout layout1 = stack.vars(); + stack_copy.mode &= ~Stack::_InlineFunc; + stack_copy.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack_copy); + } else { + StackLayout layout1 = stack.vars(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + } + stack.o.undent(); + stack.o << "}>"; + return true; + } else { + stack.o << "REPEATEND"; + stack.forget_const(); + StackLayout layout1 = stack.vars(); + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + return false; + } + } + case _Again: { + stack.drop_vars_except(block0->var_info); + stack.opt_show(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + if (!next->is_empty() || inline_func) { + stack.o << "AGAIN:<{"; + stack.o.indent(); + stack.forget_const(); + StackLayout layout1 = stack.vars(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + return true; + } else { + stack.o << "AGAINEND"; + stack.forget_const(); + StackLayout layout1 = stack.vars(); + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + return false; + } + } + case _Until: { + // stack.drop_vars_except(block0->var_info); + // stack.opt_show(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + if (true || !next->is_empty()) { + stack.o << "UNTIL:<{"; + stack.o.indent(); + stack.forget_const(); + auto layout1 = stack.vars(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + layout1.push_back(left[0]); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + stack.s.pop_back(); + stack.modified(); + return true; + } else { + stack.o << "UNTILEND"; + stack.forget_const(); + StackLayout layout1 = stack.vars(); + block0->generate_code_all(stack); + layout1.push_back(left[0]); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + return false; + } + } + case _While: { + // while (block0 | left) block1; ...next + var_idx_t x = left[0]; + stack.drop_vars_except(block0->var_info); + stack.opt_show(); + StackLayout layout1 = stack.vars(); + bool next_empty = false && next->is_empty(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + stack.o << "WHILE:<{"; + stack.o.indent(); + stack.forget_const(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + stack.rearrange_top(x, !next->var_info[x] && !block1->var_info[x]); + stack.opt_show(); + stack.s.pop_back(); + stack.modified(); + stack.o.undent(); + Stack stack_copy{stack}; + stack.o << (next_empty ? "}>DO:" : "}>DO<{"); + if (!next_empty) { + stack.o.indent(); + } + stack_copy.opt_show(); + block1->generate_code_all(stack_copy); + stack_copy.enforce_state(std::move(layout1)); + stack_copy.opt_show(); + if (!next_empty) { + stack.o.undent(); + stack.o << "}>"; + return true; + } else { + return false; + } + } + case _TryCatch: { + if (block0->is_empty() && block1->is_empty()) { + return true; + } + if (block0->noreturn() || block1->noreturn()) { + stack.o.retalt_ = true; + } + Stack catch_stack{stack.o}; + std::vector catch_vars; + std::vector catch_last; + for (const VarDescr& var : block1->var_info.list) { + if (stack.find(var.idx) >= 0) { + catch_vars.push_back(var.idx); + catch_last.push_back(!block0->var_info[var.idx]); + } + } + const size_t block_size = 255; + for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) { + begin = end >= block_size ? end - block_size : 0; + for (size_t i = begin; i < end; ++i) { + catch_stack.push_new_var(catch_vars[i]); + } + } + catch_stack.push_new_var(left[0]); + catch_stack.push_new_var(left[1]); + stack.rearrange_top(catch_vars, catch_last); + stack.opt_show(); + stack.o << "c4 PUSH"; + stack.o << "c5 PUSH"; + stack.o << "c7 PUSH"; + stack.o << "<{"; + stack.o.indent(); + if (block1->noreturn()) { + catch_stack.mode |= Stack::_NeedRetAlt; + } + block1->generate_code_all(catch_stack); + catch_stack.drop_vars_except(next->var_info); + catch_stack.opt_show(); + stack.o.undent(); + stack.o << "}>CONT"; + stack.o << "c7 SETCONT"; + stack.o << "c5 SETCONT"; + stack.o << "c4 SETCONT"; + for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) { + begin = end >= block_size ? end - block_size : 0; + stack.o << std::to_string(end - begin) + " PUSHINT"; + stack.o << "-1 PUSHINT"; + stack.o << "SETCONTVARARGS"; + } + stack.s.erase(stack.s.end() - catch_vars.size(), stack.s.end()); + stack.modified(); + stack.o << "<{"; + stack.o.indent(); + if (block0->noreturn()) { + stack.mode |= Stack::_NeedRetAlt; + } + block0->generate_code_all(stack); + if (block0->noreturn()) { + stack.s = std::move(catch_stack.s); + } else if (!block1->noreturn()) { + stack.merge_state(catch_stack); + } + stack.opt_show(); + stack.o.undent(); + stack.o << "}>CONT"; + stack.o << "c1 PUSH"; + stack.o << "COMPOSALT"; + stack.o << "SWAP"; + stack.o << "TRY"; + return true; + } + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{where, "unknown operation in generate_code()"}; + } +} + +void Op::generate_code_all(Stack& stack) { + int saved_mode = stack.mode; + auto cont = generate_code_step(stack); + stack.mode = (stack.mode & ~Stack::_ModeSave) | (saved_mode & Stack::_ModeSave); + if (cont && next) { + next->generate_code_all(stack); + } +} + +void CodeBlob::generate_code(AsmOpList& out, int mode) { + Stack stack{out, mode}; + tolk_assert(ops && ops->cl == Op::_Import); + auto args = (int)ops->left.size(); + for (var_idx_t x : ops->left) { + stack.push_new_var(x); + } + ops->generate_code_all(stack); + stack.apply_wrappers(require_callxargs && (mode & Stack::_InlineAny) ? args : -1); +} + +void CodeBlob::generate_code(std::ostream& os, int mode, int indent) { + AsmOpList out_list(indent, &vars); + generate_code(out_list, mode); + if (G.settings.optimization_level >= 2) { + optimize_code(out_list); + } + out_list.out(os, mode); +} + +} // namespace tolk diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp new file mode 100644 index 000000000..fb70022fa --- /dev/null +++ b/tolk/compiler-state.cpp @@ -0,0 +1,56 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "compiler-state.h" +#include +#include + +namespace tolk { + +CompilerState G; // the only mutable global variable in tolk internals + +void ExperimentalOption::mark_deprecated(const char* deprecated_from_v, const char* deprecated_reason) { + this->deprecated_from_v = deprecated_from_v; + this->deprecated_reason = deprecated_reason; +} + +void CompilerSettings::enable_experimental_option(std::string_view name) { + ExperimentalOption* to_enable = nullptr; + + if (name == remove_unused_functions.name) { + to_enable = &remove_unused_functions; + } + + if (to_enable == nullptr) { + std::cerr << "unknown experimental option: " << name << std::endl; + } else if (to_enable->deprecated_from_v) { + std::cerr << "experimental option " << name << " " + << "is deprecated since Tolk v" << to_enable->deprecated_from_v + << ": " << to_enable->deprecated_reason << std::endl; + } else { + to_enable->enabled = true; + } +} + +void CompilerSettings::parse_experimental_options_cmd_arg(const std::string& cmd_arg) { + std::istringstream stream(cmd_arg); + std::string token; + while (std::getline(stream, token, ',')) { + enable_experimental_option(token); + } +} + +} // namespace tolk diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h new file mode 100644 index 000000000..aec1945e2 --- /dev/null +++ b/tolk/compiler-state.h @@ -0,0 +1,89 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" +#include "symtable.h" +#include "td/utils/Status.h" +#include +#include + +namespace tolk { + +// with cmd option -x, the user can pass experimental options to use +class ExperimentalOption { + friend struct CompilerSettings; + + const std::string_view name; + bool enabled = false; + const char* deprecated_from_v = nullptr; // when an option becomes deprecated (after the next compiler release), + const char* deprecated_reason = nullptr; // but the user still passes it, we'll warn to stderr + +public: + explicit ExperimentalOption(std::string_view name) : name(name) {} + + void mark_deprecated(const char* deprecated_from_v, const char* deprecated_reason); + + explicit operator bool() const { return enabled; } +}; + +// CompilerSettings contains settings that can be passed via cmd line or (partially) wasm envelope. +// They are filled once at start and are immutable since the compilation started. +struct CompilerSettings { + enum class FsReadCallbackKind { Realpath, ReadFile }; + + using FsReadCallback = std::function(FsReadCallbackKind, const char*)>; + + int verbosity = 0; + int optimization_level = 2; + bool stack_layout_comments = true; + + std::string output_filename; + std::string boc_output_filename; + std::string stdlib_folder; // a path to tolk-stdlib/; files imported via @stdlib/xxx are there + + FsReadCallback read_callback; + + ExperimentalOption remove_unused_functions{"remove-unused-functions"}; + + void enable_experimental_option(std::string_view name); + void parse_experimental_options_cmd_arg(const std::string& cmd_arg); +}; + +// CompilerState contains a mutable state that is changed while the compilation is going on. +// It's a "global state" of all compilation. +// Historically, in FunC, this global state was spread along many global C++ variables. +// Now, no global C++ variables except `CompilerState G` are present. +struct CompilerState { + CompilerSettings settings; + + SymTable symbols; + int scope_level = 0; + SymDef* sym_def[SymTable::SIZE_PRIME + 1]{}; + SymDef* global_sym_def[SymTable::SIZE_PRIME + 1]{}; + std::vector> symbol_stack; + std::vector scope_opened_at; + + std::vector all_code_functions, all_global_vars, all_get_methods, all_constants; + AllRegisteredSrcFiles all_src_files; + + bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } +}; + +extern CompilerState G; + +} // namespace tolk diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp new file mode 100644 index 000000000..fb085ae9c --- /dev/null +++ b/tolk/gen-abscode.cpp @@ -0,0 +1,429 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "compiler-state.h" + +using namespace std::literals::string_literals; + +namespace tolk { + +/* + * + * EXPRESSIONS + * + */ + +Expr* Expr::copy() const { + auto res = new Expr{*this}; + for (auto& arg : res->args) { + arg = arg->copy(); + } + return res; +} + +Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { + sym = lookup_symbol(name_idx); + if (!sym) { + } +} + +void Expr::deduce_type() { + if (e_type) { + return; + } + switch (cls) { + case _Apply: { + if (!sym) { + return; + } + SymValFunc* sym_val = dynamic_cast(sym->value); + if (!sym_val || !sym_val->get_type()) { + return; + } + std::vector arg_types; + arg_types.reserve(args.size()); + for (const Expr* arg : args) { + arg_types.push_back(arg->e_type); + } + TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_tensor(arg_types), TypeExpr::new_hole()); + try { + unify(fun_type, sym_val->sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " + << fun_type->args[0] << ": " << ue; + throw ParseError(here, os.str()); + } + e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + return; + } + case _VarApply: { + tolk_assert(args.size() == 2); + TypeExpr* fun_type = TypeExpr::new_map(args[1]->e_type, TypeExpr::new_hole()); + try { + unify(fun_type, args[0]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type + << ": " << ue; + throw ParseError(here, os.str()); + } + e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + return; + } + case _GrabMutatedVars: { + tolk_assert(args.size() == 2 && args[0]->cls == _Apply && sym); + SymValFunc* called_f = dynamic_cast(sym->value); + tolk_assert(called_f->has_mutate_params()); + TypeExpr* sym_type = called_f->get_type(); + if (sym_type->constr == TypeExpr::te_ForAll) { + TypeExpr::remove_forall(sym_type); + } + tolk_assert(sym_type->args[1]->constr == TypeExpr::te_Tensor); + e_type = sym_type->args[1]->args[sym_type->args[1]->args.size() - 1]; + TypeExpr::remove_indirect(e_type); + return; + } + case _ReturnSelf: { + tolk_assert(args.size() == 2 && sym); + Expr* this_arg = args[1]; + e_type = this_arg->e_type; + TypeExpr::remove_indirect(e_type); + return; + } + case _Letop: { + tolk_assert(args.size() == 2); + try { + // std::cerr << "in assignment: " << args[0]->e_type << " from " << args[1]->e_type << std::endl; + unify(args[0]->e_type, args[1]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " + << args[0]->e_type << ": " << ue; + throw ParseError(here, os.str()); + } + e_type = args[0]->e_type; + TypeExpr::remove_indirect(e_type); + return; + } + case _CondExpr: { + tolk_assert(args.size() == 3); + auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(args[0]->e_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; + throw ParseError(here, os.str()); + } + try { + unify(args[1]->e_type, args[2]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " + << args[2]->e_type << " : " << ue; + throw ParseError(here, os.str()); + } + e_type = args[1]->e_type; + TypeExpr::remove_indirect(e_type); + return; + } + default: + throw Fatal("unexpected cls=" + std::to_string(cls) + " in Expr::deduce_type()"); + } +} + +void Expr::define_new_vars(CodeBlob& code) { + switch (cls) { + case _Tensor: + case _MkTuple: { + for (Expr* item : args) { + item->define_new_vars(code); + } + break; + } + case _Var: + if (val < 0) { + val = code.create_var(e_type, sym->sym_idx, here); + sym->value->idx = val; + } + break; + case _Hole: + if (val < 0) { + val = code.create_tmp_var(e_type, here); + } + break; + default: + break; + } +} + +void Expr::predefine_vars() { + switch (cls) { + case _Tensor: + case _MkTuple: { + for (Expr* item : args) { + item->predefine_vars(); + } + break; + } + case _Var: + if (!sym) { + tolk_assert(val < 0 && here.is_defined()); + sym = define_symbol(~val, false, here); + // std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl; + if (!sym) { + throw ParseError{here, std::string{"redefined variable `"} + G.symbols.get_name(~val) + "`"}; + } + sym->value = new SymValVariable(-1, e_type); + if (is_immutable()) { + dynamic_cast(sym->value)->flags |= SymValVariable::flagImmutable; + } + } + break; + default: + break; + } +} + +var_idx_t Expr::new_tmp(CodeBlob& code) const { + return code.create_tmp_var(e_type, here); +} + +void add_set_globs(CodeBlob& code, std::vector>& globs, SrcLocation here) { + for (const auto& p : globs) { + auto& op = code.emplace_back(here, Op::_SetGlob, std::vector{}, std::vector{ p.second }, p.first); + op.set_impure(code); + } +} + +std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) { + if (lhs->is_mktuple()) { + if (rhs->is_mktuple()) { + return pre_compile_let(code, lhs->args.at(0), rhs->args.at(0), here); + } + auto right = rhs->pre_compile(code); + TypeExpr::remove_indirect(rhs->e_type); + auto unpacked_type = rhs->e_type->args.at(0); + std::vector tmp{code.create_tmp_var(unpacked_type, rhs->here)}; + code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); + auto tvar = new Expr{Expr::_Var, lhs->here}; + tvar->set_val(tmp[0]); + tvar->set_location(rhs->here); + tvar->e_type = unpacked_type; + pre_compile_let(code, lhs->args.at(0), tvar, here); + return tmp; + } + auto right = rhs->pre_compile(code); + std::vector> globs; + auto left = lhs->pre_compile(code, &globs); + for (var_idx_t v : left) { + code.on_var_modification(v, here); + } + code.emplace_back(here, Op::_Let, std::move(left), right); + add_set_globs(code, globs, here); + return right; +} + +std::vector pre_compile_tensor(const std::vector& args, CodeBlob &code, + std::vector> *lval_globs) { + const size_t n = args.size(); + if (n == 0) { // just `()` + return {}; + } + if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments + return args[0]->pre_compile(code, lval_globs); + } + std::vector> res_lists(n); + + struct ModifiedVar { + size_t i, j; + std::unique_ptr* cur_ops; // `LET tmp = v_ij` will be inserted before this + }; + std::vector modified_vars; + for (size_t i = 0; i < n; ++i) { + res_lists[i] = args[i]->pre_compile(code, lval_globs); + for (size_t j = 0; j < res_lists[i].size(); ++j) { + TmpVar& var = code.vars.at(res_lists[i][j]); + if (!lval_globs && !var.is_unnamed()) { + var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable { + if (!done) { + done = true; + modified_vars.push_back({i, j, cur_ops}); + } + }); + } else { + var.on_modification.push_back([](SrcLocation) { + }); + } + } + } + for (const auto& list : res_lists) { + for (var_idx_t v : list) { + tolk_assert(!code.vars.at(v).on_modification.empty()); + code.vars.at(v).on_modification.pop_back(); + } + } + for (size_t idx = modified_vars.size(); idx--; ) { + const ModifiedVar &m = modified_vars[idx]; + var_idx_t orig_v = res_lists[m.i][m.j]; + var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where); + std::unique_ptr op = std::make_unique(code.vars[orig_v].where, Op::_Let); + op->left = {tmp_v}; + op->right = {orig_v}; + op->next = std::move((*m.cur_ops)); + *m.cur_ops = std::move(op); + res_lists[m.i][m.j] = tmp_v; + } + std::vector res; + for (const auto& list : res_lists) { + res.insert(res.end(), list.cbegin(), list.cend()); + } + return res; +} + +std::vector Expr::pre_compile(CodeBlob& code, std::vector>* lval_globs) const { + if (lval_globs && !(cls == _Tensor || cls == _Var || cls == _Hole || cls == _GlobVar)) { + std::cerr << "lvalue expression constructor is " << cls << std::endl; + throw Fatal{"cannot compile lvalue expression with unknown constructor"}; + } + switch (cls) { + case _Tensor: { + return pre_compile_tensor(args, code, lval_globs); + } + case _Apply: { + tolk_assert(sym); + std::vector res = pre_compile_tensor(args, code, lval_globs);; + auto rvect = new_tmp_vect(code); + auto& op = code.emplace_back(here, Op::_Call, rvect, res, sym); + if (flags & _IsImpure) { + op.set_impure(code); + } + return rvect; + } + case _GrabMutatedVars: { + SymValFunc* func_val = dynamic_cast(sym->value); + tolk_assert(func_val && func_val->has_mutate_params()); + tolk_assert(args.size() == 2 && args[0]->cls == _Apply && args[1]->cls == _Tensor); + auto right = args[0]->pre_compile(code); // apply (returning function result and mutated) + std::vector> local_globs; + if (!lval_globs) { + lval_globs = &local_globs; + } + auto left = args[1]->pre_compile(code, lval_globs); // mutated (lvalue) + auto rvect = new_tmp_vect(code); + left.push_back(rvect[0]); + for (var_idx_t v : left) { + code.on_var_modification(v, here); + } + code.emplace_back(here, Op::_Let, std::move(left), std::move(right)); + add_set_globs(code, local_globs, here); + return rvect; + } + case _ReturnSelf: { + tolk_assert(args.size() == 2 && sym); + Expr* this_arg = args[1]; + auto right = args[0]->pre_compile(code); + return this_arg->pre_compile(code); + } + case _Var: + case _Hole: + if (val < 0) { + throw ParseError{here, "unexpected variable definition"}; + } + return {val}; + case _VarApply: + if (args[0]->cls == _GlobFunc) { + auto res = args[1]->pre_compile(code); + auto rvect = new_tmp_vect(code); + auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), args[0]->sym); + if (args[0]->flags & _IsImpure) { + op.set_impure(code); + } + return rvect; + } else { + auto res = args[1]->pre_compile(code); + auto tfunc = args[0]->pre_compile(code); + if (tfunc.size() != 1) { + throw Fatal{"stack tuple used as a function"}; + } + res.push_back(tfunc[0]); + auto rvect = new_tmp_vect(code); + code.emplace_back(here, Op::_CallInd, rvect, std::move(res)); + return rvect; + } + case _Const: { + auto rvect = new_tmp_vect(code); + code.emplace_back(here, Op::_IntConst, rvect, intval); + return rvect; + } + case _GlobFunc: + case _GlobVar: { + if (auto fun_ref = dynamic_cast(sym->value)) { + fun_ref->flags |= SymValFunc::flagUsedAsNonCall; + if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { + throw ParseError(here, "saving `" + sym->name() + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); + } + if (fun_ref->has_mutate_params()) { + throw ParseError(here, "saving `" + sym->name() + "` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly"); + } + } + auto rvect = new_tmp_vect(code); + if (lval_globs) { + lval_globs->push_back({ sym, rvect[0] }); + return rvect; + } else { + code.emplace_back(here, Op::_GlobVar, rvect, std::vector{}, sym); + return rvect; + } + } + case _Letop: { + return pre_compile_let(code, args.at(0), args.at(1), here); + } + case _MkTuple: { + auto left = new_tmp_vect(code); + auto right = args[0]->pre_compile(code); + code.emplace_back(here, Op::_Tuple, left, std::move(right)); + return left; + } + case _CondExpr: { + auto cond = args[0]->pre_compile(code); + tolk_assert(cond.size() == 1); + auto rvect = new_tmp_vect(code); + Op& if_op = code.emplace_back(here, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(here, Op::_Let, rvect, args[1]->pre_compile(code)); + code.close_pop_cur(args[1]->here); + code.push_set_cur(if_op.block1); + code.emplace_back(here, Op::_Let, rvect, args[2]->pre_compile(code)); + code.close_pop_cur(args[2]->here); + return rvect; + } + case _SliceConst: { + auto rvect = new_tmp_vect(code); + code.emplace_back(here, Op::_SliceConst, rvect, strval); + return rvect; + } + default: + std::cerr << "expression constructor is " << cls << std::endl; + throw Fatal{"cannot compile expression with unknown constructor"}; + } +} + +} // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp new file mode 100644 index 000000000..17eb4544c --- /dev/null +++ b/tolk/lexer.cpp @@ -0,0 +1,609 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "lexer.h" +#include "compiler-state.h" +#include "symtable.h" +#include + +namespace tolk { + +// By 'chunk' in lexer I mean a token or a list of tokens parsed simultaneously. +// E.g., when we meet "str", ChunkString is called, it emits tok_string. +// E.g., when we meet "str"x, ChunkString emits not only tok_string, but tok_string_modifier. +// E.g., when we meet //, ChunkInlineComment is called, it emits nothing (just skips a line). +// We store all valid chunks lexers in a prefix tree (LexingTrie), see below. +struct ChunkLexerBase { + ChunkLexerBase(const ChunkLexerBase&) = delete; + ChunkLexerBase &operator=(const ChunkLexerBase&) = delete; + ChunkLexerBase() = default; + + virtual bool parse(Lexer* lex) const = 0; + virtual ~ChunkLexerBase() = default; +}; + +template +static T* singleton() { + static T obj; + return &obj; +} + +// LexingTrie is a prefix tree storing all available Tolk language constructs. +// It's effectively a map of a prefix to ChunkLexerBase. +class LexingTrie { + LexingTrie** next{nullptr}; // either nullptr or [256] + ChunkLexerBase* val{nullptr}; // non-null for leafs + + GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_next_allocated() { + if (next == nullptr) { + next = new LexingTrie*[256]; + std::memset(next, 0, 256 * sizeof(LexingTrie*)); + } + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_symbol_allocated(uint8_t symbol) const { + if (next[symbol] == nullptr) { + next[symbol] = new LexingTrie; + } + } + +public: + // Maps a prefix onto a chunk lexer. + // E.g. " -> ChunkString + // E.g. """ -> ChunkMultilineString + void add_prefix(const char* s, ChunkLexerBase* val) { + LexingTrie* cur = this; + + for (; *s; ++s) { + uint8_t symbol = static_cast(*s); + cur->ensure_next_allocated(); + cur->ensure_symbol_allocated(symbol); + cur = cur->next[symbol]; + } + +#ifdef TOLK_DEBUG + assert(!cur->val); +#endif + cur->val = val; + } + + // Maps a pattern onto a chunk lexer. + // E.g. -[0-9] -> ChunkNegativeNumber + // Internally, it expands the pattern to all possible prefixes: -0, -1, etc. + // (for example, [0-9][a-z_$] gives 10*28=280 prefixes) + void add_pattern(const char* pattern, ChunkLexerBase* val) { + std::vector all_possible_trie{this}; + + for (const char* c = pattern; *c; ++c) { + std::string to_append; + if (*c == '[') { + c++; + while (*c != ']') { // assume that input is corrent, no out-of-string checks + if (*(c + 1) == '-') { + char l = *c, r = *(c + 2); + for (char symbol = l; symbol <= r; ++symbol) { + to_append += symbol; + } + c += 3; + } else { + to_append += *c; + c++; + } + } + } else { + to_append += *c; + } + + std::vector next_all_possible_trie; + next_all_possible_trie.reserve(all_possible_trie.size() * to_append.size()); + for (LexingTrie* cur : all_possible_trie) { + cur->ensure_next_allocated(); + for (uint8_t symbol : to_append) { + cur->ensure_symbol_allocated(symbol); + next_all_possible_trie.emplace_back(cur->next[symbol]); + } + } + all_possible_trie = std::move(next_all_possible_trie); + } + + for (LexingTrie* trie : all_possible_trie) { + trie->val = val; + } + } + + // Looks up a chunk lexer given a string (in practice, s points to cur position in the middle of the file). + // It returns the deepest case: pointing to ", it will return ChunkMultilineString if """, or ChunkString otherwize. + ChunkLexerBase* get_deepest(const char* s) const { + const LexingTrie* best = this; + + for (const LexingTrie* cur = this; cur && cur->next; ++s) { + cur = cur->next[static_cast(*s)]; // if s reaches \0, cur will just become nullptr, and loop will end + if (cur && cur->val) { + best = cur; + } + } + + return best->val; + } +}; + +// +// ---------------------------------------------------------------------- +// A list of valid parsed chunks. +// + +// An inline comment, starting from '//' +struct ChunkInlineComment final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + lex->skip_line(); + return true; + } +}; + +// A multiline comment, starting from '/*' +// Note, that nested comments are not supported. +struct ChunkMultilineComment final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + while (!lex->is_eof()) { + if (lex->char_at() == '*' && lex->char_at(1) == '/') { + lex->skip_chars(2); + return true; + } + lex->skip_chars(1); + } + return true; // it's okay if comment extends past end of file + } +}; + +// A string, starting from " +// Note, that there are no escape symbols inside: the purpose of strings in Tolk just doesn't need it. +// After a closing quote, a string modifier may be present, like "Ef8zMzMzMzMzMzMzMzMzMzM0vF"a. +// If present, it emits a separate tok_string_modifier. +struct ChunkString final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + lex->skip_chars(1); + while (!lex->is_eof() && lex->char_at() != '"' && lex->char_at() != '\n') { + lex->skip_chars(1); + } + if (lex->char_at() != '"') { + lex->error("string extends past end of line"); + } + + std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); + lex->skip_chars(1); + lex->add_token(tok_string_const, str_val); + + if (std::isalpha(lex->char_at())) { + std::string_view modifier_val(lex->c_str(), 1); + lex->skip_chars(1); + lex->add_token(tok_string_modifier, modifier_val); + } + + return true; + } +}; + +// A string starting from """ +// Used for multiline asm constructions. Can not have a postfix modifier. +struct ChunkMultilineString final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + lex->skip_chars(3); + while (!lex->is_eof()) { + if (lex->char_at() == '"' && lex->char_at(1) == '"' && lex->char_at(2) == '"') { + break; + } + lex->skip_chars(1); + } + if (lex->is_eof()) { + lex->error("string extends past end of file"); + } + + std::string_view str_val(str_begin + 3, lex->c_str() - str_begin - 3); + lex->skip_chars(3); + lex->add_token(tok_string_const, str_val); + return true; + } +}; + +// An annotation for a function (in the future, for vars also): +// @inline and others +struct ChunkAnnotation final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + lex->skip_chars(1); + while (std::isalnum(lex->char_at()) || lex->char_at() == '_') { + lex->skip_chars(1); + } + + std::string_view str_val(str_begin, lex->c_str() - str_begin); + lex->add_token(tok_annotation_at, str_val); + return true; + } +}; + +// A number, may be a hex one. +struct ChunkNumber final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + bool hex = false; + if (lex->char_at() == '0' && lex->char_at(1) == 'x') { + lex->skip_chars(2); + hex = true; + } + if (lex->is_eof()) { + return false; + } + while (!lex->is_eof()) { + char c = lex->char_at(); + if (c >= '0' && c <= '9') { + lex->skip_chars(1); + continue; + } + if (!hex) { + break; + } + c |= 0x20; + if (c < 'a' || c > 'f') { + break; + } + lex->skip_chars(1); + } + + std::string_view str_val(str_begin, lex->c_str() - str_begin); + lex->add_token(tok_int_const, str_val); + return true; + } +}; + +// Tokens like !=, &, etc. emit just a simple TokenType. +// Since they are stored in trie, "parsing" them is just skipping len chars. +struct ChunkSimpleToken final : ChunkLexerBase { + TokenType tp; + int len; + + ChunkSimpleToken(TokenType tp, int len) : tp(tp), len(len) {} + + bool parse(Lexer* lex) const override { + std::string_view str_val(lex->c_str(), len); + lex->add_token(tp, str_val); + lex->skip_chars(len); + return true; + } +}; + +// Spaces and other space-like symbols are just skipped. +struct ChunkSkipWhitespace final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + lex->skip_chars(1); + lex->skip_spaces(); + return true; + } +}; + +// Here we handle corner cases of grammar that are requested on demand. +// E.g., for 'tolk >0.5.0', '0.5.0' should be parsed specially to emit tok_semver. +// See TolkLanguageGrammar::parse_next_chunk_special(). +struct ChunkSpecialParsing { + static bool parse_semver(Lexer* lex) { + const char* str_begin = lex->c_str(); + while (std::isdigit(lex->char_at()) || lex->char_at() == '.') { + lex->skip_chars(1); + } + + std::string_view str_val(str_begin, lex->c_str() - str_begin); + if (str_val.empty()) { + return false; + } + lex->add_token(tok_semver, str_val); + return true; + } +}; + +// Anything starting from a valid identifier beginning symbol is parsed as an identifier. +// But if a resulting string is a keyword, a corresponding token is emitted instead of tok_identifier. +struct ChunkIdentifierOrKeyword final : ChunkLexerBase { + // having parsed str up to the valid end, look up whether it's a valid keyword + // in the future, this could be a bit more effective than just comparing strings (e.g. gperf), + // but nevertheless, performance of the naive code below is reasonably good + static TokenType maybe_keyword(std::string_view str) { + switch (str.size()) { + case 1: + if (str == "_") return tok_underscore; + break; + case 2: + if (str == "do") return tok_do; + if (str == "if") return tok_if; + break; + case 3: + if (str == "int") return tok_int; + if (str == "var") return tok_var; + if (str == "fun") return tok_fun; + if (str == "asm") return tok_asm; + if (str == "get") return tok_get; + if (str == "try") return tok_try; + if (str == "val") return tok_val; + break; + case 4: + if (str == "else") return tok_else; + if (str == "true") return tok_true; + if (str == "cell") return tok_cell; + if (str == "null") return tok_null; + if (str == "void") return tok_void; + if (str == "bool") return tok_bool; + if (str == "auto") return tok_auto; + if (str == "self") return tok_self; + if (str == "tolk") return tok_tolk; + if (str == "type") return tok_type; + if (str == "enum") return tok_enum; + break; + case 5: + if (str == "slice") return tok_slice; + if (str == "tuple") return tok_tuple; + if (str == "const") return tok_const; + if (str == "false") return tok_false; + if (str == "redef") return tok_redef; + if (str == "while") return tok_while; + if (str == "break") return tok_break; + if (str == "throw") return tok_throw; + if (str == "catch") return tok_catch; + if (str == "infix") return tok_infix; + break; + case 6: + if (str == "return") return tok_return; + if (str == "assert") return tok_assert; + if (str == "import") return tok_import; + if (str == "global") return tok_global; + if (str == "mutate") return tok_mutate; + if (str == "repeat") return tok_repeat; + if (str == "struct") return tok_struct; + if (str == "export") return tok_export; + break; + case 7: + if (str == "builder") return tok_builder; + if (str == "builtin") return tok_builtin; + break; + case 8: + if (str == "continue") return tok_continue; + if (str == "operator") return tok_operator; + break; + case 12: + if (str == "continuation") return tok_continuation; + break; + default: + break; + } + return tok_empty; + } + + bool parse(Lexer* lex) const override { + const char* sym_begin = lex->c_str(); + lex->skip_chars(1); + while (!lex->is_eof()) { + char c = lex->char_at(); + bool allowed_in_identifier = std::isalnum(c) || c == '_' || c == '$'; + if (!allowed_in_identifier) { + break; + } + lex->skip_chars(1); + } + + std::string_view str_val(sym_begin, lex->c_str() - sym_begin); + if (TokenType kw_tok = maybe_keyword(str_val)) { + lex->add_token(kw_tok, str_val); + } else { + G.symbols.lookup_add(str_val); + lex->add_token(tok_identifier, str_val); + } + return true; + } +}; + +// Like in Kotlin, `backticks` can be used to wrap identifiers (both in declarations/usage, both for vars/functions). +// E.g.: function `do`() { var `with spaces` = 1; } +// This could be useful to use reserved names as identifiers (in a probable codegen from TL, for example). +struct ChunkIdentifierInBackticks final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + lex->skip_chars(1); + while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') { + if (std::isspace(lex->char_at())) { // probably, I'll remove this restriction after rewriting symtable and cur_sym_idx + lex->error("an identifier can't have a space in its name (even inside backticks)"); + } + lex->skip_chars(1); + } + if (lex->char_at() != '`') { + lex->error("unclosed backtick `"); + } + + std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); + lex->skip_chars(1); + G.symbols.lookup_add(str_val); + lex->add_token(tok_identifier, str_val); + return true; + } +}; + +// +// ---------------------------------------------------------------------- +// Here we define a grammar of Tolk. +// All valid chunks prefixes are stored in trie. +// + +struct TolkLanguageGrammar { + static LexingTrie trie; + + static bool parse_next_chunk(Lexer* lex) { + const ChunkLexerBase* best = trie.get_deepest(lex->c_str()); + return best && best->parse(lex); + } + + static bool parse_next_chunk_special(Lexer* lex, TokenType parse_next_as) { + switch (parse_next_as) { + case tok_semver: + return ChunkSpecialParsing::parse_semver(lex); + default: + assert(false); + return false; + } + } + + static void register_token(const char* str, int len, TokenType tp) { + trie.add_prefix(str, new ChunkSimpleToken(tp, len)); + } + + static void init() { + trie.add_prefix("//", singleton()); + trie.add_prefix("/*", singleton()); + trie.add_prefix(R"(")", singleton()); + trie.add_prefix(R"(""")", singleton()); + trie.add_prefix("@", singleton()); + trie.add_prefix(" ", singleton()); + trie.add_prefix("\t", singleton()); + trie.add_prefix("\r", singleton()); + trie.add_prefix("\n", singleton()); + + trie.add_pattern("[0-9]", singleton()); + trie.add_pattern("[a-zA-Z_$]", singleton()); + trie.add_prefix("`", singleton()); + + register_token("+", 1, tok_plus); + register_token("-", 1, tok_minus); + register_token("*", 1, tok_mul); + register_token("/", 1, tok_div); + register_token("%", 1, tok_mod); + register_token("?", 1, tok_question); + register_token(":", 1, tok_colon); + register_token(",", 1, tok_comma); + register_token(";", 1, tok_semicolon); + register_token("(", 1, tok_oppar); + register_token(")", 1, tok_clpar); + register_token("[", 1, tok_opbracket); + register_token("]", 1, tok_clbracket); + register_token("{", 1, tok_opbrace); + register_token("}", 1, tok_clbrace); + register_token("=", 1, tok_assign); + register_token("<", 1, tok_lt); + register_token(">", 1, tok_gt); + register_token("!", 1, tok_logical_not); + register_token("&", 1, tok_bitwise_and); + register_token("|", 1, tok_bitwise_or); + register_token("^", 1, tok_bitwise_xor); + register_token("~", 1, tok_bitwise_not); + register_token(".", 1, tok_dot); + register_token("==", 2, tok_eq); + register_token("!=", 2, tok_neq); + register_token("<=", 2, tok_leq); + register_token(">=", 2, tok_geq); + register_token("<<", 2, tok_lshift); + register_token(">>", 2, tok_rshift); + register_token("&&", 2, tok_logical_and); + register_token("||", 2, tok_logical_or); + register_token("~/", 2, tok_divR); + register_token("^/", 2, tok_divC); + register_token("+=", 2, tok_set_plus); + register_token("-=", 2, tok_set_minus); + register_token("*=", 2, tok_set_mul); + register_token("/=", 2, tok_set_div); + register_token("%=", 2, tok_set_mod); + register_token("&=", 2, tok_set_bitwise_and); + register_token("|=", 2, tok_set_bitwise_or); + register_token("^=", 2, tok_set_bitwise_xor); + register_token("->", 2, tok_arrow); + register_token("<=>", 3, tok_spaceship); + register_token("~>>", 3, tok_rshiftR); + register_token("^>>", 3, tok_rshiftC); + register_token("<<=", 3, tok_set_lshift); + register_token(">>=", 3, tok_set_rshift); + } +}; + +LexingTrie TolkLanguageGrammar::trie; + +// +// ---------------------------------------------------------------------- +// The Lexer class is to be used outside (by parser, which constructs AST from tokens). +// It's streaming. It means, that `next()` parses a next token on demand +// (instead of parsing all file contents to vector and iterating over it). +// Parsing on demand uses effectively less memory. +// Note, that chunks, being parsed, call `add_token()`, and a chunk may add multiple tokens at once. +// That's why a small cirlular buffer for tokens is used. +// `last_token_idx` actually means a number of total tokens added. +// `cur_token_idx` is a number of returned by `next()`. +// It's assumed that an input file has already been loaded, its contents is present and won't be deleted +// (`start`, `cur` and `end`, as well as every Token str_val, points inside file->text). +// + +Lexer::Lexer(const SrcFile* file) + : file(file) + , p_start(file->text.data()) + , p_end(p_start + file->text.size()) + , p_next(p_start) + , location(file) { + next(); +} + +void Lexer::next() { + while (cur_token_idx == last_token_idx && !is_eof()) { + update_location(); + if (!TolkLanguageGrammar::parse_next_chunk(this)) { + error("failed to parse"); + } + } + if (is_eof()) { + add_token(tok_eof, file->text); + } + cur_token = tokens_circularbuf[++cur_token_idx & 7]; +} + +void Lexer::next_special(TokenType parse_next_as, const char* str_expected) { + assert(cur_token_idx == last_token_idx); + skip_spaces(); + update_location(); + if (!TolkLanguageGrammar::parse_next_chunk_special(this, parse_next_as)) { + error(std::string(str_expected) + " expected"); + } + cur_token = tokens_circularbuf[++cur_token_idx & 7]; +} + +void Lexer::error(const std::string& err_msg) const { + throw ParseError(cur_location(), err_msg); +} + +void Lexer::unexpected(const char* str_expected) const { + throw ParseError(cur_location(), "expected " + std::string(str_expected) + ", got `" + std::string(cur_str()) + "`"); +} + +void lexer_init() { + TolkLanguageGrammar::init(); +} + +// todo #ifdef TOLK_PROFILING +// As told above, `next()` produces tokens on demand, while AST is being generated. +// Hence, it's difficult to measure Lexer performance separately. +// This function can be called just to tick Lexer performance, it just scans all input files. +// There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful. +void lexer_measure_performance(const AllSrcFiles& files_to_just_parse) { + for (const SrcFile* file : files_to_just_parse) { + Lexer lex(file); + while (!lex.is_eof()) { + lex.next(); + } + } +} + +} // namespace tolk diff --git a/tolk/lexer.h b/tolk/lexer.h new file mode 100644 index 000000000..8a25f9526 --- /dev/null +++ b/tolk/lexer.h @@ -0,0 +1,234 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "platform-utils.h" +#include "src-file.h" +#include + +namespace tolk { + +enum TokenType { + tok_empty, + + tok_fun, + tok_get, + tok_type, + tok_enum, + tok_struct, + tok_operator, + tok_infix, + + tok_global, + tok_const, + tok_var, + tok_val, + tok_redef, + tok_mutate, + tok_self, + + tok_annotation_at, + tok_colon, + tok_asm, + tok_builtin, + + tok_int_const, + tok_string_const, + tok_string_modifier, + tok_true, + tok_false, + tok_null, + + tok_identifier, + tok_dot, + + tok_plus, + tok_minus, + tok_mul, + tok_div, + tok_mod, + tok_question, + tok_comma, + tok_semicolon, + tok_oppar, + tok_clpar, + tok_opbracket, + tok_clbracket, + tok_opbrace, + tok_clbrace, + tok_assign, + tok_underscore, + tok_lt, + tok_gt, + tok_logical_not, + tok_logical_and, + tok_logical_or, + tok_bitwise_and, + tok_bitwise_or, + tok_bitwise_xor, + tok_bitwise_not, + + tok_eq, + tok_neq, + tok_leq, + tok_geq, + tok_spaceship, + tok_lshift, + tok_rshift, + tok_rshiftR, + tok_rshiftC, + tok_divR, + tok_divC, + tok_set_plus, + tok_set_minus, + tok_set_mul, + tok_set_div, + tok_set_mod, + tok_set_lshift, + tok_set_rshift, + tok_set_bitwise_and, + tok_set_bitwise_or, + tok_set_bitwise_xor, + + tok_return, + tok_repeat, + tok_do, + tok_while, + tok_break, + tok_continue, + tok_try, + tok_catch, + tok_throw, + tok_assert, + tok_if, + tok_else, + + tok_int, + tok_cell, + tok_bool, + tok_slice, + tok_builder, + tok_continuation, + tok_tuple, + tok_auto, + tok_void, + tok_arrow, + + tok_tolk, + tok_semver, + tok_import, + tok_export, + + tok_eof +}; + +// All tolk language is parsed into tokens. +// Lexer::next() returns a Token. +struct Token { + TokenType type = tok_empty; + std::string_view str_val; + + Token() = default; + Token(TokenType type, std::string_view str_val): type(type), str_val(str_val) {} +}; + +// Lexer::next() is a method to be used externally (while parsing tolk file to AST). +// It's streaming: `next()` parses a token on demand. +// For comments, see lexer.cpp, a comment above Lexer constructor. +class Lexer { + Token tokens_circularbuf[8]{}; + int last_token_idx = -1; + int cur_token_idx = -1; + Token cur_token; // = tokens_circularbuf[cur_token_idx & 7] + + const SrcFile* file; + const char *p_start, *p_end, *p_next; + SrcLocation location; + + void update_location() { + location.char_offset = static_cast(p_next - p_start); + } + +public: + + explicit Lexer(const SrcFile* file); + Lexer(const Lexer&) = delete; + Lexer &operator=(const Lexer&) = delete; + + void add_token(TokenType type, std::string_view str) { + tokens_circularbuf[++last_token_idx & 7] = Token(type, str); + } + + void skip_spaces() { + while (std::isspace(*p_next)) { + ++p_next; + } + } + + void skip_line() { + while (p_next < p_end && *p_next != '\n' && *p_next != '\r') { + ++p_next; + } + while (*p_next == '\n' || *p_next == '\r') { + ++p_next; + } + } + + void skip_chars(int n) { + p_next += n; + } + + bool is_eof() const { + return p_next >= p_end; + } + + char char_at() const { return *p_next; } + char char_at(int shift) const { return *(p_next + shift); } + const char* c_str() const { return p_next; } + + TokenType tok() const { return cur_token.type; } + std::string_view cur_str() const { return cur_token.str_val; } + SrcLocation cur_location() const { return location; } + const SrcFile* cur_file() const { return file; } + + void next(); + void next_special(TokenType parse_next_as, const char* str_expected); + + void check(TokenType next_tok, const char* str_expected) const { + if (cur_token.type != next_tok) { + unexpected(str_expected); // unlikely path, not inlined + } + } + void expect(TokenType next_tok, const char* str_expected) { + if (cur_token.type != next_tok) { + unexpected(str_expected); + } + next(); + } + + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD + void unexpected(const char* str_expected) const; + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD + void error(const std::string& err_msg) const; +}; + +void lexer_init(); + +// todo #ifdef TOLK_PROFILING +void lexer_measure_performance(const AllSrcFiles& files_to_just_parse); + +} // namespace tolk diff --git a/tolk/optimize.cpp b/tolk/optimize.cpp new file mode 100644 index 000000000..76d756386 --- /dev/null +++ b/tolk/optimize.cpp @@ -0,0 +1,652 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * PEEPHOLE OPTIMIZER + * + */ + +void Optimizer::set_code(AsmOpConsList code) { + code_ = std::move(code); + unpack(); +} + +void Optimizer::unpack() { + int i = 0, j = 0; + for (AsmOpCons *p = code_.get(); p && i < optimize_depth; p = p->cdr.get(), ++j) { + if (p->car->is_very_custom()) { + break; + } + if (p->car->is_comment()) { + continue; + } + op_cons_[i] = p; + op_[i] = std::move(p->car); + offs_[i] = j; + ++i; + } + l_ = i; + indent_ = (i ? op_[0]->indent : 0); +} + +void Optimizer::pack() { + for (int i = 0; i < l_; i++) { + op_cons_[i]->car = std::move(op_[i]); + op_cons_[i] = nullptr; + } + l_ = 0; +} + +void Optimizer::apply() { + if (!p_ && !q_) { + return; + } + tolk_assert(p_ > 0 && p_ <= l_ && q_ >= 0 && q_ <= optimize_depth && l_ <= optimize_depth); + for (int i = p_; i < l_; i++) { + tolk_assert(op_[i]); + op_cons_[i]->car = std::move(op_[i]); + op_cons_[i] = nullptr; + } + for (int c = offs_[p_ - 1]; c >= 0; --c) { + code_ = std::move(code_->cdr); + } + for (int j = q_ - 1; j >= 0; j--) { + tolk_assert(oq_[j]); + oq_[j]->indent = indent_; + code_ = AsmOpCons::cons(std::move(oq_[j]), std::move(code_)); + } + l_ = 0; +} + +AsmOpConsList Optimizer::extract_code() { + pack(); + return std::move(code_); +} + +void Optimizer::show_head() const { + if (!debug_) { + return; + } + std::cerr << "optimizing"; + for (int i = 0; i < l_; i++) { + if (op_[i]) { + std::cerr << ' ' << *op_[i] << ' '; + } else { + std::cerr << " (null) "; + } + } + std::cerr << std::endl; +} + +void Optimizer::show_left() const { + if (!debug_) { + return; + } + std::cerr << "// *** rewriting"; + for (int i = 0; i < p_; i++) { + if (op_[i]) { + std::cerr << ' ' << *op_[i] << ' '; + } else { + std::cerr << " (null) "; + } + } +} + +void Optimizer::show_right() const { + if (!debug_) { + return; + } + std::cerr << "->"; + for (int i = 0; i < q_; i++) { + if (oq_[i]) { + std::cerr << ' ' << *oq_[i] << ' '; + } else { + std::cerr << " (null) "; + } + } + std::cerr << std::endl; +} + +bool Optimizer::say(std::string str) const { + if (debug_) { + std::cerr << str << std::endl; + } + return true; +} + +bool Optimizer::find_const_op(int* op_idx, int cst) { + for (int i = 0; i < l2_; i++) { + if (op_[i]->is_gconst() && tr_[i].get(0) == cst) { + *op_idx = i; + return true; + } + } + return false; +} + +bool Optimizer::is_push_const(int* i, int* c) const { + return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_push_const(i, c); +} + +// PUSHCONST c ; PUSH s(i+1) ; SWAP -> PUSH s(i) ; PUSHCONST c +bool Optimizer::rewrite_push_const(int i, int c) { + p_ = pb_; + q_ = 2; + int idx = -1; + if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) { + return false; + } + show_left(); + oq_[1] = std::move(op_[idx]); + oq_[0] = std::move(op_[!idx]); + *oq_[0] = AsmOp::Push(i); + show_right(); + return true; +} + +bool Optimizer::is_const_rot(int* c) const { + return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_const_rot(c); +} + +bool Optimizer::rewrite_const_rot(int c) { + p_ = pb_; + q_ = 2; + int idx = -1; + if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) { + return false; + } + show_left(); + oq_[0] = std::move(op_[idx]); + oq_[1] = std::move(op_[!idx]); + *oq_[1] = AsmOp::Custom("ROT", 3, 3); + show_right(); + return true; +} + +bool Optimizer::is_const_pop(int* c, int* i) const { + return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_const_pop(c, i); +} + +bool Optimizer::rewrite_const_pop(int c, int i) { + p_ = pb_; + q_ = 2; + int idx = -1; + if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) { + return false; + } + show_left(); + oq_[0] = std::move(op_[idx]); + oq_[1] = std::move(op_[!idx]); + *oq_[1] = AsmOp::Pop(i); + show_right(); + return true; +} + +bool Optimizer::is_const_push_xchgs() { + if (!(pb_ >= 2 && pb_ <= l2_ && op_[0]->is_gconst())) { + return false; + } + StackTransform t; + int pos = 0, i; + for (i = 1; i < pb_; i++) { + int a, b; + if (op_[i]->is_xchg(&a, &b)) { + if (pos == a) { + pos = b; + } else if (pos == b) { + pos = a; + } else { + t.apply_xchg(a - (a > pos), b - (b > pos)); + } + } else if (op_[i]->is_push(&a)) { + if (pos == a) { + return false; + } + t.apply_push(a - (a > pos)); + ++pos; + } else { + return false; + } + } + if (pos) { + return false; + } + t.apply_push_newconst(); + if (t <= tr_[i - 1]) { + p_ = i; + return true; + } else { + return false; + } +} + +bool Optimizer::rewrite_const_push_xchgs() { + if (!p_) { + return false; + } + show_left(); + auto c_op = std::move(op_[0]); + tolk_assert(c_op->is_gconst()); + StackTransform t; + q_ = 0; + int pos = 0; + for (int i = 1; i < p_; i++) { + int a, b; + if (op_[i]->is_xchg(&a, &b)) { + if (a == pos) { + pos = b; + } else if (b == pos) { + pos = a; + } else { + oq_[q_] = std::move(op_[i]); + if (a > pos) { + oq_[q_]->a = a - 1; + } + if (b > pos) { + oq_[q_]->b = b - 1; + } + tolk_assert(apply_op(t, *oq_[q_])); + ++q_; + } + } else { + tolk_assert(op_[i]->is_push(&a)); + tolk_assert(a != pos); + oq_[q_] = std::move(op_[i]); + if (a > pos) { + oq_[q_]->a = a - 1; + } + tolk_assert(apply_op(t, *oq_[q_])); + ++q_; + ++pos; + } + } + tolk_assert(!pos); + t.apply_push_newconst(); + tolk_assert(t <= tr_[p_ - 1]); + oq_[q_++] = std::move(c_op); + show_right(); + return true; +} + +bool Optimizer::rewrite(int p, AsmOp&& new_op) { + tolk_assert(p > 0 && p <= l_); + p_ = p; + q_ = 1; + show_left(); + oq_[0] = std::move(op_[0]); + *oq_[0] = new_op; + show_right(); + return true; +} + +bool Optimizer::rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2) { + tolk_assert(p > 1 && p <= l_); + p_ = p; + q_ = 2; + show_left(); + oq_[0] = std::move(op_[0]); + *oq_[0] = new_op1; + oq_[1] = std::move(op_[1]); + *oq_[1] = new_op2; + show_right(); + return true; +} + +bool Optimizer::rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3) { + tolk_assert(p > 2 && p <= l_); + p_ = p; + q_ = 3; + show_left(); + oq_[0] = std::move(op_[0]); + *oq_[0] = new_op1; + oq_[1] = std::move(op_[1]); + *oq_[1] = new_op2; + oq_[2] = std::move(op_[2]); + *oq_[2] = new_op3; + show_right(); + return true; +} + +bool Optimizer::rewrite_nop() { + tolk_assert(p_ > 0 && p_ <= l_); + q_ = 0; + show_left(); + show_right(); + return true; +} + +bool Optimizer::is_pred(const std::function& pred, int min_p) { + min_p = std::max(min_p, pb_); + for (int p = l2_; p >= min_p; p--) { + if (pred(tr_[p - 1])) { + p_ = p; + return true; + } + } + return false; +} + +bool Optimizer::is_same_as(const StackTransform& trans, int min_p) { + return is_pred([&trans](const auto& t) { return t >= trans; }, min_p); +} + +// s1 s3 XCHG ; s0 s2 XCHG -> 2SWAP +bool Optimizer::is_2swap() { + static const StackTransform t_2swap{2, 3, 0, 1, 4}; + return is_same_as(t_2swap); +} + +// s3 PUSH ; s3 PUSH -> 2OVER +bool Optimizer::is_2over() { + static const StackTransform t_2over{2, 3, 0}; + return is_same_as(t_2over); +} + +bool Optimizer::is_2dup() { + static const StackTransform t_2dup{0, 1, 0}; + return is_same_as(t_2dup); +} + +bool Optimizer::is_tuck() { + static const StackTransform t_tuck{0, 1, 0, 2}; + return is_same_as(t_tuck); +} + +bool Optimizer::is_2drop() { + static const StackTransform t_2drop{2}; + return is_same_as(t_2drop); +} + +bool Optimizer::is_rot() { + return is_pred([](const auto& t) { return t.is_rot(); }); +} + +bool Optimizer::is_rotrev() { + return is_pred([](const auto& t) { return t.is_rotrev(); }); +} + +bool Optimizer::is_nop() { + return is_pred([](const auto& t) { return t.is_id(); }, 1); +} + +bool Optimizer::is_xchg(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_xchg(i, j) && ((*i < 16 && *j < 16) || (!*i && *j < 256)); }); +} + +bool Optimizer::is_xchg_xchg(int* i, int* j, int* k, int* l) { + return is_pred([i, j, k, l](const auto& t) { + return t.is_xchg_xchg(i, j, k, l) && (*i < 2 && *j < (*i ? 16 : 256) && *k < 2 && *l < (*k ? 16 : 256)); + }) && + (!(p_ == 2 && op_[0]->is_xchg(*i, *j) && op_[1]->is_xchg(*k, *l))); +} + +bool Optimizer::is_push(int* i) { + return is_pred([i](const auto& t) { return t.is_push(i) && *i < 256; }); +} + +bool Optimizer::is_pop(int* i) { + return is_pred([i](const auto& t) { return t.is_pop(i) && *i < 256; }); +} + +bool Optimizer::is_pop_pop(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_pop_pop(i, j) && *i < 256 && *j < 256; }, 3); +} + +bool Optimizer::is_push_rot(int* i) { + return is_pred([i](const auto& t) { return t.is_push_rot(i) && *i < 16; }, 3); +} + +bool Optimizer::is_push_rotrev(int* i) { + return is_pred([i](const auto& t) { return t.is_push_rotrev(i) && *i < 16; }, 3); +} + +bool Optimizer::is_push_xchg(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_push_xchg(i, j, k) && *i < 16 && *j < 16 && *k < 16; }) && + !(p_ == 2 && op_[0]->is_push() && op_[1]->is_xchg()); +} + +bool Optimizer::is_xchg2(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_xchg2(i, j) && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_xcpu(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_xcpu(i, j) && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_puxc(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_puxc(i, j) && *i < 16 && *j < 15; }); +} + +bool Optimizer::is_push2(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_push2(i, j) && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_xchg3(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xchg3(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_xc2pu(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xc2pu(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_xcpuxc(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xcpuxc(i, j, k) && *i < 16 && *j < 16 && *k < 15; }); +} + +bool Optimizer::is_xcpu2(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xcpu2(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_puxc2(int* i, int* j, int* k) { + return is_pred( + [i, j, k](const auto& t) { return t.is_puxc2(i, j, k) && *i < 16 && *j < 15 && *k < 15 && *j + *k != -1; }); +} + +bool Optimizer::is_puxcpu(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_puxcpu(i, j, k) && *i < 16 && *j < 15 && *k < 15; }); +} + +bool Optimizer::is_pu2xc(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_pu2xc(i, j, k) && *i < 16 && *j < 15 && *k < 14; }); +} + +bool Optimizer::is_push3(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_push3(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_blkswap(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_blkswap(i, j) && *i > 0 && *j > 0 && *i <= 16 && *j <= 16; }); +} + +bool Optimizer::is_blkpush(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_blkpush(i, j) && *i > 0 && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_blkdrop(int* i) { + return is_pred([i](const auto& t) { return t.is_blkdrop(i) && *i > 0 && *i < 16; }); +} + +bool Optimizer::is_blkdrop2(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_blkdrop2(i, j) && *i > 0 && *i < 16 && *j > 0 && *j < 16; }); +} + +bool Optimizer::is_reverse(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_reverse(i, j) && *i >= 2 && *i <= 17 && *j < 16; }); +} + +bool Optimizer::is_nip_seq(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_nip_seq(i, j) && *i >= 3 && *i <= 15; }); +} + +bool Optimizer::is_pop_blkdrop(int* i, int* k) { + return is_pred([i, k](const auto& t) { return t.is_pop_blkdrop(i, k) && *i >= *k && *k >= 2 && *k <= 15; }, 3); +} + +bool Optimizer::is_2pop_blkdrop(int* i, int* j, int* k) { + return is_pred( + [i, j, k](const auto& t) { return t.is_2pop_blkdrop(i, j, k) && *i >= *k && *j >= *k && *k >= 2 && *k <= 15; }, + 3); +} + +bool Optimizer::compute_stack_transforms() { + StackTransform trans; + for (int i = 0; i < l_; i++) { + if (!apply_op(trans, *op_[i])) { + l2_ = i; + return true; + } + tr_[i] = trans; + } + l2_ = l_; + return true; +} + +bool Optimizer::show_stack_transforms() const { + show_head(); + // slow version + /* + StackTransform trans2; + std::cerr << "id = " << trans2 << std::endl; + for (int i = 0; i < l_; i++) { + StackTransform op; + if (!apply_op(op, *op_[i])) { + std::cerr << "* (" << *op_[i] << " = invalid)\n"; + break; + } + trans2 *= op; + std::cerr << "* " << *op_[i] << " = " << op << " -> " << trans2 << std::endl; + } + */ + // fast version + StackTransform trans; + for (int i = 0; i < l_; i++) { + std::cerr << trans << std::endl << *op_[i] << " -> "; + if (!apply_op(trans, *op_[i])) { + std::cerr << " " << std::endl; + return true; + } + } + std::cerr << trans << std::endl; + return true; +} + +bool Optimizer::find_at_least(int pb) { + p_ = q_ = 0; + pb_ = pb; + // show_stack_transforms(); + int i, j, k, l, c; + return (is_push_const(&i, &c) && rewrite_push_const(i, c)) || (is_nop() && rewrite_nop()) || + (!(mode_ & 1) && is_const_rot(&c) && rewrite_const_rot(c)) || + (is_const_push_xchgs() && rewrite_const_push_xchgs()) || (is_const_pop(&c, &i) && rewrite_const_pop(c, i)) || + (is_xchg(&i, &j) && rewrite(AsmOp::Xchg(i, j))) || (is_push(&i) && rewrite(AsmOp::Push(i))) || + (is_pop(&i) && rewrite(AsmOp::Pop(i))) || (is_pop_pop(&i, &j) && rewrite(AsmOp::Pop(i), AsmOp::Pop(j))) || + (is_xchg_xchg(&i, &j, &k, &l) && rewrite(AsmOp::Xchg(i, j), AsmOp::Xchg(k, l))) || + (!(mode_ & 1) && + ((is_rot() && rewrite(AsmOp::Custom("ROT", 3, 3))) || (is_rotrev() && rewrite(AsmOp::Custom("-ROT", 3, 3))) || + (is_2dup() && rewrite(AsmOp::Custom("2DUP", 2, 4))) || + (is_2swap() && rewrite(AsmOp::Custom("2SWAP", 2, 4))) || + (is_2over() && rewrite(AsmOp::Custom("2OVER", 2, 4))) || + (is_tuck() && rewrite(AsmOp::Custom("TUCK", 2, 3))) || + (is_2drop() && rewrite(AsmOp::Custom("2DROP", 2, 0))) || (is_xchg2(&i, &j) && rewrite(AsmOp::Xchg2(i, j))) || + (is_xcpu(&i, &j) && rewrite(AsmOp::XcPu(i, j))) || (is_puxc(&i, &j) && rewrite(AsmOp::PuXc(i, j))) || + (is_push2(&i, &j) && rewrite(AsmOp::Push2(i, j))) || (is_blkswap(&i, &j) && rewrite(AsmOp::BlkSwap(i, j))) || + (is_blkpush(&i, &j) && rewrite(AsmOp::BlkPush(i, j))) || (is_blkdrop(&i) && rewrite(AsmOp::BlkDrop(i))) || + (is_push_rot(&i) && rewrite(AsmOp::Push(i), AsmOp::Custom("ROT"))) || + (is_push_rotrev(&i) && rewrite(AsmOp::Push(i), AsmOp::Custom("-ROT"))) || + (is_push_xchg(&i, &j, &k) && rewrite(AsmOp::Push(i), AsmOp::Xchg(j, k))) || + (is_reverse(&i, &j) && rewrite(AsmOp::BlkReverse(i, j))) || + (is_blkdrop2(&i, &j) && rewrite(AsmOp::BlkDrop2(i, j))) || + (is_nip_seq(&i, &j) && rewrite(AsmOp::Xchg(i, j), AsmOp::BlkDrop(i))) || + (is_pop_blkdrop(&i, &k) && rewrite(AsmOp::Pop(i), AsmOp::BlkDrop(k))) || + (is_2pop_blkdrop(&i, &j, &k) && (k >= 3 && k <= 13 && i != j + 1 && i <= 15 && j <= 14 + ? rewrite(AsmOp::Xchg2(j + 1, i), AsmOp::BlkDrop(k + 2)) + : rewrite(AsmOp::Pop(i), AsmOp::Pop(j), AsmOp::BlkDrop(k)))) || + (is_xchg3(&i, &j, &k) && rewrite(AsmOp::Xchg3(i, j, k))) || + (is_xc2pu(&i, &j, &k) && rewrite(AsmOp::Xc2Pu(i, j, k))) || + (is_xcpuxc(&i, &j, &k) && rewrite(AsmOp::XcPuXc(i, j, k))) || + (is_xcpu2(&i, &j, &k) && rewrite(AsmOp::XcPu2(i, j, k))) || + (is_puxc2(&i, &j, &k) && rewrite(AsmOp::PuXc2(i, j, k))) || + (is_puxcpu(&i, &j, &k) && rewrite(AsmOp::PuXcPu(i, j, k))) || + (is_pu2xc(&i, &j, &k) && rewrite(AsmOp::Pu2Xc(i, j, k))) || + (is_push3(&i, &j, &k) && rewrite(AsmOp::Push3(i, j, k))))); +} + +bool Optimizer::find() { + if (!compute_stack_transforms()) { + return false; + } + for (int pb = l_; pb > 0; --pb) { + if (find_at_least(pb)) { + return true; + } + } + return false; +} + +bool Optimizer::optimize() { + bool f = false; + while (find()) { + f = true; + apply(); + unpack(); + } + return f; +} + +AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode) { + Optimizer opt(std::move(op_list), false, mode); + opt.optimize(); + return opt.extract_code(); +} + +AsmOpConsList optimize_code(AsmOpConsList op_list, int mode) { + std::vector> v; + while (op_list) { + if (!op_list->car->is_comment()) { + op_list = optimize_code_head(std::move(op_list), mode); + } + if (op_list) { + v.push_back(std::move(op_list->car)); + op_list = std::move(op_list->cdr); + } + } + for (auto it = v.rbegin(); it < v.rend(); ++it) { + op_list = AsmOpCons::cons(std::move(*it), std::move(op_list)); + } + return std::move(op_list); +} + +void optimize_code(AsmOpList& ops) { + AsmOpConsList op_list; + for (auto it = ops.list_.rbegin(); it < ops.list_.rend(); ++it) { + op_list = AsmOpCons::cons(std::make_unique(std::move(*it)), std::move(op_list)); + } + for (int mode : {1, 1, 1, 1, 0, 0, 0, 0}) { + op_list = optimize_code(std::move(op_list), mode); + } + ops.list_.clear(); + while (op_list) { + ops.list_.push_back(std::move(*(op_list->car))); + op_list = std::move(op_list->cdr); + } +} + +} // namespace tolk diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp new file mode 100644 index 000000000..7257bfb07 --- /dev/null +++ b/tolk/pipe-ast-to-legacy.cpp @@ -0,0 +1,1355 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "src-file.h" +#include "ast.h" +#include "compiler-state.h" +#include "common/refint.h" +#include "openssl/digest.hpp" +#include "crypto/common/util.h" +#include "td/utils/crypto.h" +#include "ton/ton-types.h" + +/* + * In this module, we convert modern AST representation to legacy representation + * (global state, Expr, CodeBlob, etc.) to make the rest of compiling process remain unchanged for now. + * Since time goes, I'll gradually get rid of legacy, since most of the code analysis + * should be done at AST level. + */ + +namespace tolk { + +static int calc_sym_idx(std::string_view sym_name) { + return G.symbols.lookup(sym_name); +} + +void Expr::fire_error_rvalue_expected() const { + // generally, almost all vertices are rvalue, that's why code leading to "not rvalue" + // should be very strange, like `var x = _` + throw ParseError(here, "rvalue expected"); +} + +void Expr::fire_error_lvalue_expected(const std::string& details) const { + // "lvalue expected" is when a user modifies something unmodifiable + // example: `f() = 32` + // example: `loadUint(c.beginParse(), 32)` (since `loadUint()` mutates the first argument) + throw ParseError(here, "lvalue expected (" + details + ")"); +} + +void Expr::fire_error_modifying_immutable(const std::string& details) const { + // "modifying immutable variable" is when a user assigns to a variable declared `val` + // example: `immutable_val = 32` + // example: `(regular_var, immutable_val) = f()` + // for better error message, try to print out variable name if possible + std::string variable_name; + if (cls == _Var || cls == _Const) { + variable_name = sym->name(); + } else if (cls == _Tensor || cls == _MkTuple) { + for (const Expr* arg : (cls == _Tensor ? args : args[0]->args)) { + if (arg->is_immutable() && (arg->cls == _Var || arg->cls == _Const)) { + variable_name = arg->sym->name(); + break; + } + } + } + + if (variable_name == "self") { + throw ParseError(here, "modifying `self` (" + details + "), which is immutable by default; probably, you want to declare `mutate self`"); + } else if (!variable_name.empty()) { + throw ParseError(here, "modifying an immutable variable `" + variable_name + "` (" + details + ")"); + } else { + throw ParseError(here, "modifying an immutable variable (" + details + ")"); + } +} + +GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN +static void fire_error_invalid_mutate_arg_passed(SrcLocation loc, const SymDef* func_sym, const SymDef* param_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { + std::string func_name = func_sym->name(); + std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as()->name : "obj"); + const SymValFunc* func_val = dynamic_cast(func_sym->value); + const SymValVariable* param_val = dynamic_cast(param_sym->value); + + // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` + if (param_val->is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && param_val->idx == 0 && func_val->does_accept_self()) { + throw ParseError(loc, "`" + func_name + "` is a mutating method; consider calling `" + arg_str + "." + func_name + "()`, not `" + func_name + "(" + arg_str + ")`"); + } + // case: `cs.mutating_function()`; suggest: `mutating_function(mutate cs)` or make it a method + if (param_val->is_mutate_parameter() && called_as_method && param_val->idx == 0 && !func_val->does_accept_self()) { + throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; consider calling `" + func_name + "(mutate " + arg_str + ")`, not `" + arg_str + "." + func_name + "`(); alternatively, rename parameter to `self` to make it a method"); + } + // case: `mutating_function(arg)`; suggest: `mutate arg` + if (param_val->is_mutate_parameter() && !arg_passed_as_mutate) { + throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; you need to specify `mutate` when passing an argument, like `mutate " + arg_str + "`"); + } + // case: `usual_function(mutate arg)` + if (!param_val->is_mutate_parameter() && arg_passed_as_mutate) { + throw ParseError(loc, "incorrect `mutate`, since `" + func_name + "` does not mutate this parameter"); + } + throw Fatal("unreachable"); +} + +// parse address like "EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5" +// based on unpack_std_smc_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_friendly_address(const char packed[48], ton::WorkchainId& workchain, ton::StdSmcAddress& addr) { + unsigned char buffer[36]; + if (!td::buff_base64_decode(td::MutableSlice{buffer, 36}, td::Slice{packed, 48}, true)) { + return false; + } + td::uint16 crc = td::crc16(td::Slice{buffer, 34}); + if (buffer[34] != (crc >> 8) || buffer[35] != (crc & 0xff) || (buffer[0] & 0x3f) != 0x11) { + return false; + } + workchain = (td::int8)buffer[1]; + std::memcpy(addr.data(), buffer + 2, 32); + return true; +} + +// parse address like "0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8" +// based on StdAddress::parse_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_raw_address(const std::string& acc_string, int& workchain, ton::StdSmcAddress& addr) { + size_t pos = acc_string.find(':'); + if (pos != std::string::npos) { + td::Result r_wc = td::to_integer_safe(acc_string.substr(0, pos)); + if (r_wc.is_error()) { + return false; + } + workchain = r_wc.move_as_ok(); + pos++; + } else { + pos = 0; + } + if (acc_string.size() != pos + 64) { + return false; + } + + for (int i = 0; i < 64; ++i) { // loop through each hex digit + char c = acc_string[pos + i]; + int x; + if (c >= '0' && c <= '9') { + x = c - '0'; + } else if (c >= 'a' && c <= 'z') { + x = c - 'a' + 10; + } else if (c >= 'A' && c <= 'Z') { + x = c - 'A' + 10; + } else { + return false; + } + + if ((i & 1) == 0) { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0x0F) | (x << 4)); + } else { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0xF0) | x); + } + } + return true; +} + +static Expr* create_expr_apply(SrcLocation loc, SymDef* sym, std::vector&& args) { + Expr* apply = new Expr(Expr::_Apply, sym, std::move(args)); + apply->here = loc; + apply->flags = Expr::_IsRvalue; + apply->deduce_type(); + return apply; +} + +static Expr* create_expr_int_const(SrcLocation loc, int int_val) { + Expr* int_const = new Expr(Expr::_Const, loc); + int_const->intval = td::make_refint(int_val); + int_const->flags = Expr::_IsRvalue; + int_const->e_type = TypeExpr::new_atomic(TypeExpr::_Int); + return int_const; +} + +namespace blk_fl { +enum { end = 1, ret = 2, empty = 4 }; +typedef int val; +constexpr val init = end | empty; +void combine(val& x, const val y) { + x |= y & ret; + x &= y | ~(end | empty); +} +void combine_parallel(val& x, const val y) { + x &= y | ~(ret | empty); + x |= y & end; +} +} // namespace blk_fl + +Expr* process_expr(AnyV v, CodeBlob& code); +blk_fl::val process_statement(AnyV v, CodeBlob& code); + +static void check_global_func(SrcLocation loc, sym_idx_t func_name) { + SymDef* sym_def = lookup_symbol(func_name); + if (!sym_def) { + throw ParseError(loc, "undefined symbol `" + G.symbols.get_name(func_name) + "`"); + } +} + +static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_sym) { + if (!v_usage->loc.is_symbol_from_same_or_builtin_file(used_sym->loc)) { + const SrcFile* declared_in = used_sym->loc.get_src_file(); + bool has_import = false; + for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { + if (import_stmt.imported_file == declared_in) { + has_import = true; + } + } + if (!has_import) { + v_usage->error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + } + } +} + +static Expr* create_new_local_variable(SrcLocation loc, std::string_view var_name, TypeExpr* var_type, bool is_immutable) { + SymDef* sym = lookup_symbol(calc_sym_idx(var_name)); + if (sym) { // creating a new variable, but something found in symtable + if (sym->level != G.scope_level) { + sym = nullptr; // declaring a new variable with the same name, but in another scope + } else { + throw ParseError(loc, "redeclaration of local variable `" + static_cast(var_name) + "`"); + } + } + Expr* x = new Expr{Expr::_Var, loc}; + x->val = ~calc_sym_idx(var_name); + x->e_type = var_type; + x->flags = Expr::_IsLvalue | (is_immutable ? Expr::_IsImmutable : 0); + return x; +} + +static Expr* create_new_underscore_variable(SrcLocation loc, TypeExpr* var_type) { + Expr* x = new Expr{Expr::_Hole, loc}; + x->val = -1; + x->flags = Expr::_IsLvalue; + x->e_type = var_type; + return x; +} + +static Expr* process_expr(V v, CodeBlob& code) { + TokenType t = v->tok; + std::string operator_name = static_cast(v->operator_name); + + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || + t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || + t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { + Expr* x = process_expr(v->get_lhs(), code); + x->chk_rvalue(); + if (!x->is_lvalue()) { + x->fire_error_lvalue_expected("left side of assignment"); + } + if (x->is_immutable()) { + x->fire_error_modifying_immutable("left side of assignment"); + } + SymDef* sym = lookup_symbol(calc_sym_idx("^_" + operator_name + "_")); + Expr* y = process_expr(v->get_rhs(), code); + y->chk_rvalue(); + Expr* z = create_expr_apply(v->loc, sym, {x, y}); + Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; + res->here = v->loc; + res->flags = x->flags | Expr::_IsRvalue; + res->deduce_type(); + return res; + } + if (t == tok_assign) { + Expr* x = process_expr(v->get_lhs(), code); + if (!x->is_lvalue()) { + x->fire_error_lvalue_expected("left side of assignment"); + } + if (x->is_immutable()) { + x->fire_error_modifying_immutable("left side of assignment"); + } + Expr* y = process_expr(v->get_rhs(), code); + y->chk_rvalue(); + x->predefine_vars(); + x->define_new_vars(code); + Expr* res = new Expr{Expr::_Letop, {x, y}}; + res->here = v->loc; + res->flags = x->flags | Expr::_IsRvalue; + res->deduce_type(); + return res; + } + if (t == tok_minus || t == tok_plus || + t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor || + t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship || + t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR || + t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { + Expr* res = process_expr(v->get_lhs(), code); + res->chk_rvalue(); + SymDef* sym = lookup_symbol(calc_sym_idx("_" + operator_name + "_")); + Expr* x = process_expr(v->get_rhs(), code); + x->chk_rvalue(); + res = create_expr_apply(v->loc, sym, {res, x}); + return res; + } + if (t == tok_logical_and || t == tok_logical_or) { + // do the following transformations: + // a && b -> a ? (b != 0) : 0 + // a || b -> a ? 1 : (b != 0) + SymDef* sym_neq = lookup_symbol(calc_sym_idx("_!=_")); + Expr* lhs = process_expr(v->get_lhs(), code); + Expr* rhs = process_expr(v->get_rhs(), code); + Expr* e_neq0 = create_expr_apply(v->loc, sym_neq, {rhs, create_expr_int_const(v->loc, 0)}); + Expr* e_when_true = t == tok_logical_and ? e_neq0 : create_expr_int_const(v->loc, -1); + Expr* e_when_false = t == tok_logical_and ? create_expr_int_const(v->loc, 0) : e_neq0; + Expr* e_ternary = new Expr(Expr::_CondExpr, {lhs, e_when_true, e_when_false}); + e_ternary->here = v->loc; + e_ternary->flags = Expr::_IsRvalue; + e_ternary->deduce_type(); + return e_ternary; + } + + v->error("unsupported binary operator"); +} + +static Expr* process_expr(V v, CodeBlob& code) { + TokenType t = v->tok; + SymDef* sym = lookup_symbol(calc_sym_idx(static_cast(v->operator_name) + "_")); + Expr* x = process_expr(v->get_rhs(), code); + x->chk_rvalue(); + + // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) + // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later) + // in FunC, it was: + // `var fst = -1;` // is constantly 1 + // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on + // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), + // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here + if (t == tok_minus && x->cls == Expr::_Const) { + x->intval = -x->intval; + if (!x->intval->signed_fits_bits(257)) { + v->error("integer overflow"); + } + return x; + } + if (t == tok_plus && x->cls == Expr::_Const) { + return x; + } + + return create_expr_apply(v->loc, sym, {x}); +} + +static Expr* process_expr(V v, CodeBlob& code) { + Expr* cond = process_expr(v->get_cond(), code); + cond->chk_rvalue(); + Expr* x = process_expr(v->get_when_true(), code); + x->chk_rvalue(); + Expr* y = process_expr(v->get_when_false(), code); + y->chk_rvalue(); + Expr* res = new Expr{Expr::_CondExpr, {cond, x, y}}; + res->here = v->loc; + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; +} + +static Expr* process_function_arguments(SymDef* func_sym, V v, Expr* lhs_of_dot_call, CodeBlob& code) { + SymValFunc* func_val = dynamic_cast(func_sym->value); + int delta_self = lhs_of_dot_call ? 1 : 0; + int n_arguments = static_cast(v->get_arguments().size()) + delta_self; + int n_parameters = static_cast(func_val->parameters.size()); + + // Tolk doesn't have optional parameters currently, so just compare counts + if (n_parameters < n_arguments) { + v->error("too many arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + if (n_arguments < n_parameters) { + v->error("too few arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + + std::vector apply_args; + apply_args.reserve(n_arguments); + if (lhs_of_dot_call) { + apply_args.push_back(lhs_of_dot_call); + } + for (int i = delta_self; i < n_arguments; ++i) { + auto v_arg = v->get_arg(i - delta_self); + if (SymDef* param_sym = func_val->parameters[i]) { // can be null (for underscore parameter) + SymValVariable* param_val = dynamic_cast(param_sym->value); + if (param_val->is_mutate_parameter() != v_arg->passed_as_mutate) { + fire_error_invalid_mutate_arg_passed(v_arg->loc, func_sym, param_sym, false, v_arg->passed_as_mutate, v_arg->get_expr()); + } + } + + Expr* arg = process_expr(v_arg->get_expr(), code); + arg->chk_rvalue(); + apply_args.push_back(arg); + } + + Expr* apply = new Expr{Expr::_Apply, func_sym, std::move(apply_args)}; + apply->flags = Expr::_IsRvalue | (!func_val->is_marked_as_pure() * Expr::_IsImpure); + apply->here = v->loc; + apply->deduce_type(); + + return apply; +} + +static Expr* process_function_call(V v, CodeBlob& code) { + // special error for "null()" which is a FunC syntax + if (v->get_called_f()->type == ast_null_keyword) { + v->error("null is not a function: use `null`, not `null()`"); + } + + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + Expr* lhs = process_expr(v->get_called_f(), code); + if (lhs->cls != Expr::_GlobFunc) { + Expr* tensor_arg = new Expr(Expr::_Tensor, v->loc); + std::vector type_list; + type_list.reserve(v->get_num_args()); + for (int i = 0; i < v->get_num_args(); ++i) { + auto v_arg = v->get_arg(i); + if (v_arg->passed_as_mutate) { + v_arg->error("`mutate` used for non-mutate argument"); + } + Expr* arg = process_expr(v_arg->get_expr(), code); + arg->chk_rvalue(); + tensor_arg->pb_arg(arg); + type_list.push_back(arg->e_type); + } + tensor_arg->flags = Expr::_IsRvalue; + tensor_arg->e_type = TypeExpr::new_tensor(std::move(type_list)); + + Expr* var_apply = new Expr{Expr::_VarApply, {lhs, tensor_arg}}; + var_apply->here = v->loc; + var_apply->flags = Expr::_IsRvalue; + var_apply->deduce_type(); + return var_apply; + } + + Expr* apply = process_function_arguments(lhs->sym, v->get_arg_list(), nullptr, code); + + if (dynamic_cast(apply->sym->value)->has_mutate_params()) { + const std::vector& args = apply->args; + SymValFunc* func_val = dynamic_cast(apply->sym->value); + tolk_assert(func_val->parameters.size() == args.size()); + Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); + std::vector type_list; + for (int i = 0; i < static_cast(args.size()); ++i) { + SymDef* param_def = func_val->parameters[i]; + if (param_def && dynamic_cast(param_def->value)->is_mutate_parameter()) { + if (!args[i]->is_lvalue()) { + args[i]->fire_error_lvalue_expected("call a mutating function"); + } + if (args[i]->is_immutable()) { + args[i]->fire_error_modifying_immutable("call a mutating function"); + } + grabbed_vars->pb_arg(args[i]->copy()); + type_list.emplace_back(args[i]->e_type); + } + } + grabbed_vars->flags = Expr::_IsRvalue; + Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, apply->sym, {apply, grabbed_vars}); + grab_mutate->here = v->loc; + grab_mutate->flags = apply->flags; + grab_mutate->deduce_type(); + return grab_mutate; + } + + return apply; +} + +static Expr* process_dot_method_call(V v, CodeBlob& code) { + sym_idx_t name_idx = calc_sym_idx(v->method_name); + check_global_func(v->loc, name_idx); + SymDef* func_sym = lookup_symbol(name_idx); + SymValFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val != nullptr); + + Expr* obj = process_expr(v->get_obj(), code); + obj->chk_rvalue(); + + if (func_val->parameters.empty()) { + v->error("`" + func_sym->name() + "` has no parameters and can not be called as method"); + } + if (!func_val->does_accept_self() && func_val->parameters[0] && dynamic_cast(func_val->parameters[0]->value)->is_mutate_parameter()) { + fire_error_invalid_mutate_arg_passed(v->loc, func_sym, func_val->parameters[0], true, false, v->get_obj()); + } + + Expr* apply = process_function_arguments(func_sym, v->get_arg_list(), obj, code); + + Expr* obj_lval = apply->args[0]; + if (!obj_lval->is_lvalue()) { + if (obj_lval->cls == Expr::_ReturnSelf) { + obj_lval = obj_lval->args[1]; + } else { + Expr* tmp_var = create_new_underscore_variable(v->loc, obj_lval->e_type); + tmp_var->define_new_vars(code); + Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, obj_lval}); + assign_to_tmp_var->here = v->loc; + assign_to_tmp_var->flags = Expr::_IsRvalue; + assign_to_tmp_var->deduce_type(); + apply->args[0] = assign_to_tmp_var; + obj_lval = tmp_var; + } + } + + if (func_val->has_mutate_params()) { + tolk_assert(func_val->parameters.size() == apply->args.size()); + Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); + std::vector type_list; + for (int i = 0; i < static_cast(apply->args.size()); ++i) { + SymDef* param_sym = func_val->parameters[i]; + if (param_sym && dynamic_cast(param_sym->value)->is_mutate_parameter()) { + Expr* ith_arg = apply->args[i]; + if (ith_arg->is_immutable()) { + ith_arg->fire_error_modifying_immutable("call a mutating method"); + } + + Expr* var_to_mutate = nullptr; + if (ith_arg->is_lvalue()) { + var_to_mutate = ith_arg->copy(); + } else if (i == 0) { + var_to_mutate = obj_lval; + } else { + ith_arg->fire_error_lvalue_expected("call a mutating method"); + } + tolk_assert(var_to_mutate->is_lvalue() && !var_to_mutate->is_immutable()); + grabbed_vars->pb_arg(var_to_mutate); + type_list.emplace_back(var_to_mutate->e_type); + } + } + grabbed_vars->flags = Expr::_IsRvalue; + + Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, func_sym, {apply, grabbed_vars}); + grab_mutate->here = v->loc; + grab_mutate->flags = apply->flags; + grab_mutate->deduce_type(); + + apply = grab_mutate; + } + + if (func_val->does_return_self()) { + Expr* self_arg = obj_lval; + tolk_assert(self_arg->is_lvalue()); + + Expr* return_self = new Expr(Expr::_ReturnSelf, func_sym, {apply, self_arg}); + return_self->here = v->loc; + return_self->flags = Expr::_IsRvalue; + return_self->deduce_type(); + + apply = return_self; + } + + return apply; +} + +static Expr* process_expr(V v, CodeBlob& code) { + if (v->empty()) { + Expr* res = new Expr{Expr::_Tensor, {}}; + res->flags = Expr::_IsRvalue; + res->here = v->loc; + res->e_type = TypeExpr::new_unit(); + return res; + } + + Expr* res = process_expr(v->get_item(0), code); + std::vector type_list; + type_list.push_back(res->e_type); + int f = res->flags; + res = new Expr{Expr::_Tensor, {res}}; + for (int i = 1; i < v->size(); ++i) { + Expr* x = process_expr(v->get_item(i), code); + res->pb_arg(x); + f &= (x->flags | Expr::_IsImmutable); + f |= (x->flags & Expr::_IsImmutable); + type_list.push_back(x->e_type); + } + res->here = v->loc; + res->flags = f; + res->e_type = TypeExpr::new_tensor(std::move(type_list)); + return res; +} + +static Expr* process_expr(V v, CodeBlob& code) { + if (v->empty()) { + Expr* res = new Expr{Expr::_Tensor, {}}; + res->flags = Expr::_IsRvalue; + res->here = v->loc; + res->e_type = TypeExpr::new_unit(); + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = Expr::_IsRvalue; + res->here = v->loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + return res; + } + + Expr* res = process_expr(v->get_item(0), code); + std::vector type_list; + type_list.push_back(res->e_type); + int f = res->flags; + res = new Expr{Expr::_Tensor, {res}}; + for (int i = 1; i < v->size(); ++i) { + Expr* x = process_expr(v->get_item(i), code); + res->pb_arg(x); + f &= (x->flags | Expr::_IsImmutable); + f |= (x->flags & Expr::_IsImmutable); + type_list.push_back(x->e_type); + } + res->here = v->loc; + res->flags = f; + res->e_type = TypeExpr::new_tensor(std::move(type_list), false); + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = f; + res->here = v->loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + return res; +} + +static Expr* process_expr(V v) { + Expr* res = new Expr{Expr::_Const, v->loc}; + res->flags = Expr::_IsRvalue; + res->intval = td::string_to_int256(static_cast(v->int_val)); + if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { + v->error("invalid integer constant"); + } + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); + return res; +} + +static Expr* process_expr(V v) { + std::string str = static_cast(v->str_val); + Expr* res; + switch (v->modifier) { + case 0: + case 's': + case 'a': + res = new Expr{Expr::_SliceConst, v->loc}; + res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); + break; + case 'u': + case 'h': + case 'H': + case 'c': + res = new Expr{Expr::_Const, v->loc}; + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); + break; + default: + v->error("invalid string modifier '" + std::string(1, v->modifier) + "'"); + } + res->flags = Expr::_IsRvalue; + switch (v->modifier) { + case 0: { + res->strval = td::hex_encode(str); + break; + } + case 's': { + res->strval = str; + unsigned char buff[128]; + int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); + if (bits < 0) { + v->error("invalid hex bitstring constant '" + str + "'"); + } + break; + } + case 'a': { // MsgAddress + int workchain; + ton::StdSmcAddress addr; + bool correct = (str.size() == 48 && parse_friendly_address(str.data(), workchain, addr)) || + (str.size() != 48 && parse_raw_address(str, workchain, addr)); + if (!correct) { + v->error("invalid standard address '" + str + "'"); + } + if (workchain < -128 || workchain >= 128) { + v->error("anycast addresses not supported"); + } + + unsigned char data[3 + 8 + 256]; // addr_std$10 anycast:(Maybe Anycast) workchain_id:int8 address:bits256 = MsgAddressInt; + td::bitstring::bits_store_long_top(data, 0, static_cast(4) << (64 - 3), 3); + td::bitstring::bits_store_long_top(data, 3, static_cast(workchain) << (64 - 8), 8); + td::bitstring::bits_memcpy(data, 3 + 8, addr.bits().ptr, 0, addr.size()); + res->strval = td::BitSlice{data, sizeof(data)}.to_hex(); + break; + } + case 'u': { + res->intval = td::hex_string_to_int256(td::hex_encode(str)); + if (str.empty()) { + v->error("empty integer ascii-constant"); + } + if (res->intval.is_null()) { + v->error("too long integer ascii-constant"); + } + break; + } + case 'h': + case 'H': { + unsigned char hash[32]; + digest::hash_str(hash, str.data(), str.size()); + res->intval = td::bits_to_refint(hash, (v->modifier == 'h') ? 32 : 256, false); + break; + } + case 'c': { + res->intval = td::make_refint(td::crc32(td::Slice{str})); + break; + } + default: + tolk_assert(false); + } + return res; +} + +static Expr* process_expr(V v) { + SymDef* builtin_sym = lookup_symbol(calc_sym_idx(v->bool_val ? "__true" : "__false")); + return create_expr_apply(v->loc, builtin_sym, {}); +} + +static Expr* process_expr(V v) { + SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__null")); + return create_expr_apply(v->loc, builtin_sym, {}); +} + +static Expr* process_expr(V v, CodeBlob& code) { + if (!code.func_val->does_accept_self()) { + v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + } + SymDef* sym = lookup_symbol(calc_sym_idx("self")); + tolk_assert(sym); + SymValVariable* sym_val = dynamic_cast(sym->value); + Expr* res = new Expr(Expr::_Var, v->loc); + res->sym = sym; + res->val = sym_val->idx; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (sym_val->is_immutable() ? Expr::_IsImmutable : 0); + res->e_type = sym_val->get_type(); + return res; +} + +static Expr* process_identifier(V v) { + SymDef* sym = lookup_symbol(calc_sym_idx(v->name)); + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(v, sym); + Expr* res = new Expr{Expr::_GlobVar, v->loc}; + res->e_type = sym->value->get_type(); + res->sym = sym; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; + return res; + } + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(v, sym); + auto val = dynamic_cast(sym->value); + Expr* res = nullptr; + if (val->get_kind() == SymValConst::IntConst) { + res = new Expr{Expr::_Const, v->loc}; + res->intval = val->get_int_value(); + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); + } else if (val->get_kind() == SymValConst::SliceConst) { + res = new Expr{Expr::_SliceConst, v->loc}; + res->strval = val->get_str_value(); + res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); + } else { + v->error("invalid symbolic constant type"); + } + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImmutable; + res->sym = sym; + return res; + } + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(v, sym); + } + Expr* res = new Expr{Expr::_Var, v->loc}; + if (!sym) { + check_global_func(v->loc, calc_sym_idx(v->name)); + sym = lookup_symbol(calc_sym_idx(v->name)); + tolk_assert(sym); + } + res->sym = sym; + bool impure = false; + bool immutable = false; + if (const SymValFunc* func_val = dynamic_cast(sym->value)) { + res->e_type = func_val->get_type(); + res->cls = Expr::_GlobFunc; + impure = !func_val->is_marked_as_pure(); + } else if (const SymValVariable* var_val = dynamic_cast(sym->value)) { + tolk_assert(var_val->idx >= 0) + res->val = var_val->idx; + res->e_type = var_val->get_type(); + immutable = var_val->is_immutable(); + // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; + } else { + v->error("undefined identifier '" + static_cast(v->name) + "'"); + } + // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0) | (immutable ? Expr::_IsImmutable : 0); + res->deduce_type(); + return res; +} + +Expr* process_expr(AnyV v, CodeBlob& code) { + switch (v->type) { + case ast_binary_operator: + return process_expr(v->as(), code); + case ast_unary_operator: + return process_expr(v->as(), code); + case ast_ternary_operator: + return process_expr(v->as(), code); + case ast_function_call: + return process_function_call(v->as(), code); + case ast_dot_method_call: + return process_dot_method_call(v->as(), code); + case ast_parenthesized_expr: + return process_expr(v->as()->get_expr(), code); + case ast_tensor: + return process_expr(v->as(), code); + case ast_tensor_square: + return process_expr(v->as(), code); + case ast_int_const: + return process_expr(v->as()); + case ast_string_const: + return process_expr(v->as()); + case ast_bool_const: + return process_expr(v->as()); + case ast_null_keyword: + return process_expr(v->as()); + case ast_self_keyword: + return process_expr(v->as(), code); + case ast_identifier: + return process_identifier(v->as()); + case ast_underscore: + return create_new_underscore_variable(v->loc, TypeExpr::new_hole()); + default: + throw UnexpectedASTNodeType(v, "process_expr"); + } +} + +static Expr* process_local_vars_lhs(AnyV v, CodeBlob& code) { + switch (v->type) { + case ast_local_var: { + auto v_var = v->as(); + if (v_var->marked_as_redef) { + Expr* redef_var = process_identifier(v_var->get_identifier()->as()); + if (redef_var->is_immutable()) { + redef_var->fire_error_modifying_immutable("left side of assignment"); + } + return redef_var; + } + TypeExpr* var_type = v_var->declared_type ? v_var->declared_type : TypeExpr::new_hole(); + if (auto v_ident = v->as()->get_identifier()->try_as()) { + return create_new_local_variable(v->loc, v_ident->name, var_type, v_var->is_immutable); + } else { + return create_new_underscore_variable(v->loc, var_type); + } + } + case ast_parenthesized_expr: + return process_local_vars_lhs(v->as()->get_expr(), code); + case ast_tensor: { + std::vector type_list; + Expr* res = new Expr{Expr::_Tensor, v->loc}; + for (AnyV item : v->as()->get_items()) { + Expr* x = process_local_vars_lhs(item, code); + res->pb_arg(x); + res->flags |= x->flags; + type_list.push_back(x->e_type); + } + res->e_type = TypeExpr::new_tensor(std::move(type_list)); + return res; + } + case ast_tensor_square: { + std::vector type_list; + Expr* res = new Expr{Expr::_Tensor, v->loc}; + for (AnyV item : v->as()->get_items()) { + Expr* x = process_local_vars_lhs(item, code); + res->pb_arg(x); + res->flags |= x->flags; + type_list.push_back(x->e_type); + } + res->e_type = TypeExpr::new_tensor(std::move(type_list)); + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = res->args.at(0)->flags; + res->here = v->loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + return res; + } + default: + throw UnexpectedASTNodeType(v, "process_local_vars_lhs"); + } +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* x = process_local_vars_lhs(v->get_lhs(), code); + Expr* y = process_expr(v->get_assigned_val(), code); + y->chk_rvalue(); + x->predefine_vars(); + x->define_new_vars(code); + Expr* res = new Expr{Expr::_Letop, {x, y}}; + res->here = v->loc; + res->flags = x->flags | Expr::_IsRvalue; + res->deduce_type(); + res->chk_rvalue(); + res->pre_compile(code); + return blk_fl::end; +} + +static bool is_expr_valid_as_return_self(Expr* return_expr) { + // `return self` + if (return_expr->cls == Expr::_Var && return_expr->val == 0) { + return true; + } + if (return_expr->cls == Expr::_ReturnSelf) { + return is_expr_valid_as_return_self(return_expr->args[1]); + } + if (return_expr->cls == Expr::_CondExpr) { + return is_expr_valid_as_return_self(return_expr->args[1]) && is_expr_valid_as_return_self(return_expr->args[2]); + } + return false; +} + +// for mutating functions, having `return expr`, transform it to `return (modify_var1, ..., expr)` +static Expr* wrap_return_value_with_mutate_params(SrcLocation loc, CodeBlob& code, Expr* return_expr) { + Expr* tmp_var; + if (return_expr->cls != Expr::_Var) { + // `return complex_expr` - extract this into temporary variable (eval it before return) + // this is mandatory if it assigns to one of modified vars + tmp_var = create_new_underscore_variable(loc, return_expr->e_type); + tmp_var->predefine_vars(); + tmp_var->define_new_vars(code); + Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, return_expr}); + assign_to_tmp_var->here = loc; + assign_to_tmp_var->flags = tmp_var->flags | Expr::_IsRvalue; + assign_to_tmp_var->deduce_type(); + assign_to_tmp_var->pre_compile(code); + } else { + tmp_var = return_expr; + } + + Expr* ret_tensor = new Expr(Expr::_Tensor, loc); + std::vector type_list; + for (SymDef* p_sym: code.func_val->parameters) { + if (p_sym && dynamic_cast(p_sym->value)->is_mutate_parameter()) { + Expr* p_expr = new Expr{Expr::_Var, p_sym->loc}; + p_expr->sym = p_sym; + p_expr->val = p_sym->value->idx; + p_expr->flags = Expr::_IsRvalue; + p_expr->e_type = p_sym->value->get_type(); + ret_tensor->pb_arg(p_expr); + type_list.emplace_back(p_expr->e_type); + } + } + ret_tensor->pb_arg(tmp_var); + type_list.emplace_back(tmp_var->e_type); + ret_tensor->flags = Expr::_IsRvalue; + ret_tensor->e_type = TypeExpr::new_tensor(std::move(type_list)); + return ret_tensor; +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_return_value(), code); + if (code.func_val->does_return_self()) { + if (!is_expr_valid_as_return_self(expr)) { + v->error("invalid return from `self` function"); + } + Expr* var_self = new Expr(Expr::_Var, v->loc); + var_self->flags = Expr::_IsRvalue | Expr::_IsLvalue; + var_self->e_type = code.func_val->parameters[0]->value->get_type(); + Expr* assign_to_self = new Expr(Expr::_Letop, {var_self, expr}); + assign_to_self->here = v->loc; + assign_to_self->flags = Expr::_IsRvalue; + assign_to_self->deduce_type(); + assign_to_self->pre_compile(code); + Expr* empty_tensor = new Expr(Expr::_Tensor, {}); + empty_tensor->here = v->loc; + empty_tensor->flags = Expr::_IsRvalue; + empty_tensor->e_type = TypeExpr::new_tensor({}); + expr = empty_tensor; + } + if (code.func_val->has_mutate_params()) { + expr = wrap_return_value_with_mutate_params(v->loc, code, expr); + } + expr->chk_rvalue(); + try { + unify(expr->e_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; + v->error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); + return blk_fl::ret; +} + +static void append_implicit_ret_stmt(SrcLocation loc_end, CodeBlob& code) { + Expr* expr = new Expr{Expr::_Tensor, {}}; + expr->flags = Expr::_IsRvalue; + expr->here = loc_end; + expr->e_type = TypeExpr::new_unit(); + if (code.func_val->does_return_self()) { + throw ParseError(loc_end, "missing return; forgot `return self`?"); + } + if (code.func_val->has_mutate_params()) { + expr = wrap_return_value_with_mutate_params(loc_end, code, expr); + } + try { + unify(expr->e_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with implicit end-of-block return type " << expr->e_type << ": " << ue; + throw ParseError(loc_end, os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + code.emplace_back(loc_end, Op::_Return, std::move(tmp_vars)); +} + +static blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { + if (!no_new_scope) { + open_scope(v->loc); + } + blk_fl::val res = blk_fl::init; + bool warned = false; + for (AnyV item : v->get_items()) { + if (!(res & blk_fl::end) && !warned) { + item->loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, process_statement(item, code)); + } + if (!no_new_scope) { + close_scope(); + } + return res; +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + v->get_cond()->error("repeat count value is not a singleton"); + } + Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); + code.push_set_cur(repeat_op.block0); + blk_fl::val res = process_vertex(v->get_body(), code); + code.close_pop_cur(v->get_body()->loc_end); + return res | blk_fl::end; +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + Op& while_op = code.emplace_back(v->loc, Op::_While); + code.push_set_cur(while_op.block0); + while_op.left = expr->pre_compile(code); + code.close_pop_cur(v->get_body()->loc); + if (while_op.left.size() != 1) { + v->get_cond()->error("while condition value is not a singleton"); + } + code.push_set_cur(while_op.block1); + blk_fl::val res1 = process_vertex(v->get_body(), code); + code.close_pop_cur(v->get_body()->loc_end); + return res1 | blk_fl::end; +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + Op& until_op = code.emplace_back(v->loc, Op::_Until); + code.push_set_cur(until_op.block0); + open_scope(v->loc); + blk_fl::val res = process_vertex(v->get_body(), code, true); + + // in TVM, there is only "do until", but in Tolk, we want "do while" + // here we negate condition to pass it forward to legacy to Op::_Until + // also, handle common situations as a hardcoded "optimization": replace (a<0) with (a>=0) and so on + // todo these hardcoded conditions should be removed from this place in the future + AnyV cond = v->get_cond(); + AnyV until_cond; + if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { + until_cond = v_not->get_rhs(); + } else if (auto v_eq = cond->try_as(); v_eq && v_eq->tok == tok_eq) { + until_cond = createV(cond->loc, "!=", tok_neq, v_eq->get_lhs(), v_eq->get_rhs()); + } else if (auto v_neq = cond->try_as(); v_neq && v_neq->tok == tok_neq) { + until_cond = createV(cond->loc, "==", tok_eq, v_neq->get_lhs(), v_neq->get_rhs()); + } else if (auto v_leq = cond->try_as(); v_leq && v_leq->tok == tok_leq) { + until_cond = createV(cond->loc, ">", tok_gt, v_leq->get_lhs(), v_leq->get_rhs()); + } else if (auto v_lt = cond->try_as(); v_lt && v_lt->tok == tok_lt) { + until_cond = createV(cond->loc, ">=", tok_geq, v_lt->get_lhs(), v_lt->get_rhs()); + } else if (auto v_geq = cond->try_as(); v_geq && v_geq->tok == tok_geq) { + until_cond = createV(cond->loc, "<", tok_lt, v_geq->get_lhs(), v_geq->get_rhs()); + } else if (auto v_gt = cond->try_as(); v_gt && v_gt->tok == tok_gt) { + until_cond = createV(cond->loc, "<=", tok_geq, v_gt->get_lhs(), v_gt->get_rhs()); + } else { + until_cond = createV(cond->loc, "!", tok_logical_not, cond); + } + + Expr* expr = process_expr(until_cond, code); + expr->chk_rvalue(); + close_scope(); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`while` condition value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + until_op.left = expr->pre_compile(code); + code.close_pop_cur(v->get_body()->loc_end); + if (until_op.left.size() != 1) { + v->get_cond()->error("`while` condition value is not a singleton"); + } + return res & ~blk_fl::empty; +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + std::vector args; + SymDef* builtin_sym; + if (v->has_thrown_arg()) { + builtin_sym = lookup_symbol(calc_sym_idx("__throw_arg")); + args.push_back(process_expr(v->get_thrown_arg(), code)); + args.push_back(process_expr(v->get_thrown_code(), code)); + } else { + builtin_sym = lookup_symbol(calc_sym_idx("__throw")); + args.push_back(process_expr(v->get_thrown_code(), code)); + } + + Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); + apply->flags |= Expr::_IsImpure; + apply->pre_compile(code); + return blk_fl::end; +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + std::vector args(3); + if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { + args[0] = process_expr(v->get_thrown_code(), code); + args[1] = process_expr(v->get_cond()->as()->get_rhs(), code); + args[2] = process_expr(createV(v->loc, true), code); + } else { + args[0] = process_expr(v->get_thrown_code(), code); + args[1] = process_expr(v->get_cond(), code); + args[2] = process_expr(createV(v->loc, false), code); + } + + SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__throw_if_unless")); + Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); + apply->flags |= Expr::_IsImpure; + apply->pre_compile(code); + return blk_fl::end; +} + +static Expr* process_catch_variable(AnyV catch_var, TypeExpr* var_type) { + if (auto v_ident = catch_var->try_as()) { + return create_new_local_variable(catch_var->loc, v_ident->name, var_type, true); + } + return create_new_underscore_variable(catch_var->loc, var_type); +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + code.require_callxargs = true; + Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); + code.push_set_cur(try_catch_op.block0); + blk_fl::val res0 = process_vertex(v->get_try_body(), code); + code.close_pop_cur(v->get_try_body()->loc_end); + code.push_set_cur(try_catch_op.block1); + open_scope(v->get_catch_expr()->loc); + + // transform catch (excNo, arg) into TVM-catch (arg, excNo), where arg is untyped and thus almost useless now + TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); + const std::vector& catch_items = v->get_catch_expr()->get_items(); + tolk_assert(catch_items.size() == 2); + Expr* e_catch = new Expr{Expr::_Tensor, v->get_catch_expr()->loc}; + e_catch->pb_arg(process_catch_variable(catch_items[1], tvm_error_type->args[0])); + e_catch->pb_arg(process_catch_variable(catch_items[0], tvm_error_type->args[1])); + e_catch->flags = Expr::_IsLvalue; + e_catch->e_type = tvm_error_type; + e_catch->predefine_vars(); + e_catch->define_new_vars(code); + try_catch_op.left = e_catch->pre_compile(code); + tolk_assert(try_catch_op.left.size() == 2); + + blk_fl::val res1 = process_vertex(v->get_catch_body(), code); + close_scope(); + code.close_pop_cur(v->get_catch_body()->loc_end); + blk_fl::combine_parallel(res0, res1); + return res0; +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + v->get_cond()->error("condition value is not a singleton"); + } + Op& if_op = code.emplace_back(v->loc, Op::_If, tmp_vars); + code.push_set_cur(if_op.block0); + blk_fl::val res1 = process_vertex(v->get_if_body(), code); + blk_fl::val res2 = blk_fl::init; + code.close_pop_cur(v->get_if_body()->loc_end); + code.push_set_cur(if_op.block1); + res2 = process_vertex(v->get_else_body(), code); + code.close_pop_cur(v->get_else_body()->loc_end); + if (v->is_ifnot) { + std::swap(if_op.block0, if_op.block1); + } + blk_fl::combine_parallel(res1, res2); + return res1; +} + +blk_fl::val process_statement(AnyV v, CodeBlob& code) { + switch (v->type) { + case ast_local_vars_declaration: + return process_vertex(v->as(), code); + case ast_return_statement: + return process_vertex(v->as(), code); + case ast_sequence: + return process_vertex(v->as(), code); + case ast_empty: + return blk_fl::init; + case ast_repeat_statement: + return process_vertex(v->as(), code); + case ast_if_statement: + return process_vertex(v->as(), code); + case ast_do_while_statement: + return process_vertex(v->as(), code); + case ast_while_statement: + return process_vertex(v->as(), code); + case ast_throw_statement: + return process_vertex(v->as(), code); + case ast_assert_statement: + return process_vertex(v->as(), code); + case ast_try_catch_statement: + return process_vertex(v->as(), code); + default: { + Expr* expr = process_expr(v, code); + expr->chk_rvalue(); + expr->pre_compile(code); + return blk_fl::end; + } + } +} + +static FormalArg process_vertex(V v, SymDef* param_sym) { + if (!param_sym) { + return std::make_tuple(v->param_type, nullptr, v->loc); + } + SymDef* new_sym_def = define_symbol(calc_sym_idx(v->get_identifier()->name), true, v->loc); + if (!new_sym_def || new_sym_def->value) { + v->error("redefined parameter"); + } + const SymValVariable* param_val = dynamic_cast(param_sym->value); + new_sym_def->value = new SymValVariable(*param_val); + return std::make_tuple(v->param_type, new_sym_def, v->loc); +} + +static void convert_function_body_to_CodeBlob(V v, V v_body) { + SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); + SymValCodeFunc* sym_val = dynamic_cast(sym_def->value); + tolk_assert(sym_val != nullptr); + + open_scope(v->loc); + CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, sym_val, v->ret_type}; + if (v->marked_as_pure) { + blob->flags |= CodeBlob::_ForbidImpure; + } + FormalArgList legacy_arg_list; + for (int i = 0; i < v->get_num_params(); ++i) { + legacy_arg_list.emplace_back(process_vertex(v->get_param(i), sym_val->parameters[i])); + } + blob->import_params(std::move(legacy_arg_list)); + + blk_fl::val res = blk_fl::init; + bool warned = false; + for (AnyV item : v_body->get_items()) { + if (!(res & blk_fl::end) && !warned) { + item->loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, process_statement(item, *blob)); + } + if (res & blk_fl::end) { + append_implicit_ret_stmt(v_body->loc_end, *blob); + } + + blob->close_blk(v_body->loc_end); + close_scope(); + sym_val->set_code(blob); +} + +static void convert_asm_body_to_AsmOp(V v, V v_body) { + SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); + SymValAsmFunc* sym_val = dynamic_cast(sym_def->value); + tolk_assert(sym_val != nullptr); + + int cnt = v->get_num_params(); + int width = v->ret_type->get_width(); + std::vector asm_ops; + for (AnyV v_child : v_body->get_asm_commands()) { + std::string_view ops = v_child->as()->str_val; // \n\n... + std::string op; + for (char c : ops) { + if (c == '\n' || c == '\r') { + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + op.clear(); + } + } else { + op.push_back(c); + } + } + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + } + } + + sym_val->set_code(std::move(asm_ops)); +} + + +void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) { + for (const SrcFile* file : all_src_files) { + tolk_assert(file->ast); + + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + if (v_func->is_asm_function()) { + convert_asm_body_to_AsmOp(v_func, v_func->get_body()->as()); + } else if (!v_func->marked_as_builtin) { + convert_function_body_to_CodeBlob(v_func, v_func->get_body()->as()); + } + } + } + } +} + +} // namespace tolk diff --git a/tolk/pipe-discover-parse-sources.cpp b/tolk/pipe-discover-parse-sources.cpp new file mode 100644 index 000000000..a8445ae95 --- /dev/null +++ b/tolk/pipe-discover-parse-sources.cpp @@ -0,0 +1,64 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-from-tokens.h" +#include "compiler-state.h" + +namespace tolk { + +AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { + G.all_src_files.locate_and_register_source_file(stdlib_filename, {}); + G.all_src_files.locate_and_register_source_file(entrypoint_filename, {}); + + while (SrcFile* file = G.all_src_files.get_next_unparsed_file()) { + tolk_assert(!file->ast); + + file->ast = parse_src_file_to_ast(file); + // file->ast->debug_print(); + + for (AnyV v_toplevel : file->ast->as()->get_toplevel_declarations()) { + if (auto v_import = v_toplevel->try_as()) { + std::string imported_str = v_import->get_file_name(); + size_t cur_slash_pos = file->rel_filename.rfind('/'); + std::string rel_filename = cur_slash_pos == std::string::npos || imported_str[0] == '@' + ? std::move(imported_str) + : file->rel_filename.substr(0, cur_slash_pos + 1) + imported_str; + + SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); + file->imports.push_back(SrcFile::ImportStatement{imported}); + v_import->mutate_set_src_file(imported); + } + } + } + + // todo #ifdef TOLK_PROFILING + // lexer_measure_performance(G.all_src_files.get_all_files()); + + return G.all_src_files.get_all_files(); +} + +} // namespace tolk diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp new file mode 100644 index 000000000..f83579f4e --- /dev/null +++ b/tolk/pipe-find-unused-symbols.cpp @@ -0,0 +1,88 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "src-file.h" +#include "compiler-state.h" + +/* + * Here we find unused symbols (global functions and variables) to strip them off codegen. + * Note, that currently it's implemented as a standalone step after AST has been transformed to legacy Expr/Op. + * The reason why it's not done on AST level is that symbol resolving is done too late. For instance, + * having `beginCell()` there is not enough information in AST whether if points to a global function + * or it's a local variable application. + * In the future, this should be done on AST level. + */ + +namespace tolk { + +static void mark_function_used_dfs(const std::unique_ptr& op); + +static void mark_function_used(SymValCodeFunc* func_val) { + if (!func_val->code || func_val->is_really_used) { // already handled + return; + } + + func_val->is_really_used = true; + mark_function_used_dfs(func_val->code->ops); +} + +static void mark_global_var_used(SymValGlobVar* glob_val) { + glob_val->is_really_used = true; +} + +static void mark_function_used_dfs(const std::unique_ptr& op) { + if (!op) { + return; + } + // op->fun_ref, despite its name, may actually ref global var + // note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar + // (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also) + if (op->fun_ref) { + if (auto* func_val = dynamic_cast(op->fun_ref->value)) { + mark_function_used(func_val); + } else if (auto* glob_val = dynamic_cast(op->fun_ref->value)) { + mark_global_var_used(glob_val); + } else if (auto* asm_val = dynamic_cast(op->fun_ref->value)) { + } else { + tolk_assert(false); + } + } + mark_function_used_dfs(op->next); + mark_function_used_dfs(op->block0); + mark_function_used_dfs(op->block1); +} + +void pipeline_find_unused_symbols() { + for (SymDef* func_sym : G.all_code_functions) { + auto* func_val = dynamic_cast(func_sym->value); + std::string name = G.symbols.get_name(func_sym->sym_idx); + if (func_val->method_id.not_null() || func_val->is_entrypoint()) { + mark_function_used(func_val); + } + } +} + +} // namespace tolk diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp new file mode 100644 index 000000000..91a99f96a --- /dev/null +++ b/tolk/pipe-generate-fif-output.cpp @@ -0,0 +1,206 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "src-file.h" +#include "ast.h" +#include "compiler-state.h" + +namespace tolk { + +bool SymValCodeFunc::does_need_codegen() const { + // when a function is declared, but not referenced from code in any way, don't generate its body + if (!is_really_used && G.settings.remove_unused_functions) { + return false; + } + // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist + if (flags & flagUsedAsNonCall) { + return true; + } + // currently, there is no inlining, all functions are codegenerated + // (but actually, unused ones are later removed by Fift) + // in the future, we may want to implement a true AST inlining for "simple" functions + return true; +} + +void SymValCodeFunc::set_code(CodeBlob* code) { + this->code = code; +} + +void SymValAsmFunc::set_code(std::vector code) { + this->ext_compile = make_ext_compile(std::move(code)); +} + + +static void generate_output_func(SymDef* func_sym) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val); + std::string name = G.symbols.get_name(func_sym->sym_idx); + if (G.is_verbosity(2)) { + std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; + } + if (!func_val->code) { + throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented"); + } else { + CodeBlob& code = *(func_val->code); + if (G.is_verbosity(3)) { + code.print(std::cerr, 9); + } + code.simplify_var_types(); + if (G.is_verbosity(5)) { + std::cerr << "after simplify_var_types: \n"; + code.print(std::cerr, 0); + } + code.prune_unreachable_code(); + if (G.is_verbosity(5)) { + std::cerr << "after prune_unreachable: \n"; + code.print(std::cerr, 0); + } + code.split_vars(true); + if (G.is_verbosity(5)) { + std::cerr << "after split_vars: \n"; + code.print(std::cerr, 0); + } + for (int i = 0; i < 8; i++) { + code.compute_used_code_vars(); + if (G.is_verbosity(4)) { + std::cerr << "after compute_used_vars: \n"; + code.print(std::cerr, 6); + } + code.fwd_analyze(); + if (G.is_verbosity(5)) { + std::cerr << "after fwd_analyze: \n"; + code.print(std::cerr, 6); + } + code.prune_unreachable_code(); + if (G.is_verbosity(5)) { + std::cerr << "after prune_unreachable: \n"; + code.print(std::cerr, 6); + } + } + code.mark_noreturn(); + if (G.is_verbosity(3)) { + code.print(std::cerr, 15); + } + if (G.is_verbosity(2)) { + std::cerr << "\n---------- resulting code for " << name << " -------------\n"; + } + const char* modifier = ""; + if (func_val->is_inline()) { + modifier = "INLINE"; + } else if (func_val->is_inline_ref()) { + modifier = "REF"; + } + std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n"; + int mode = 0; + if (G.settings.stack_layout_comments) { + mode |= Stack::_StkCmt | Stack::_CptStkCmt; + } + if (func_val->is_inline() && code.ops->noreturn()) { + mode |= Stack::_InlineFunc; + } + if (func_val->is_inline() || func_val->is_inline_ref()) { + mode |= Stack::_InlineAny; + } + code.generate_code(std::cout, mode, 2); + std::cout << std::string(2, ' ') << "}>\n"; + if (G.is_verbosity(2)) { + std::cerr << "--------------\n"; + } + } +} + +void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) { + std::cout << "\"Asm.fif\" include\n"; + std::cout << "// automatically generated from "; + bool need_comma = false; + for (const SrcFile* file : all_src_files) { + if (!file->is_stdlib_file()) { + if (need_comma) { + std::cout << ", "; + } + std::cout << file->rel_filename; + need_comma = true; + } + } + std::cout << std::endl; + std::cout << "PROGRAM{\n"; + + bool has_main_procedure = false; + for (SymDef* func_sym : G.all_code_functions) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val); + if (!func_val->does_need_codegen()) { + if (G.is_verbosity(2)) { + std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n"; + } + continue; + } + + std::string name = G.symbols.get_name(func_sym->sym_idx); + if (func_val->is_entrypoint() && (name == "main" || name == "onInternalMessage")) { + has_main_procedure = true; + } + + std::cout << std::string(2, ' '); + if (func_val->method_id.is_null()) { + std::cout << "DECLPROC " << name << "\n"; + } else { + std::cout << func_val->method_id << " DECLMETHOD " << name << "\n"; + } + } + + if (!has_main_procedure) { + throw Fatal("the contract has no entrypoint; forgot `fun onInternalMessage(...)`?"); + } + + for (SymDef* gvar_sym : G.all_global_vars) { + auto* glob_val = dynamic_cast(gvar_sym->value); + tolk_assert(glob_val); + if (!glob_val->is_really_used && G.settings.remove_unused_functions) { + if (G.is_verbosity(2)) { + std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n"; + } + continue; + } + std::string name = G.symbols.get_name(gvar_sym->sym_idx); + std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n"; + } + + for (SymDef* func_sym : G.all_code_functions) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + if (!func_val->does_need_codegen()) { + continue; + } + generate_output_func(func_sym); + } + + std::cout << "}END>c\n"; + if (!G.settings.boc_output_filename.empty()) { + std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n"; + } +} + +} // namespace tolk diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp new file mode 100644 index 000000000..569d434aa --- /dev/null +++ b/tolk/pipe-register-symbols.cpp @@ -0,0 +1,388 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "platform-utils.h" +#include "src-file.h" +#include "ast.h" +#include "compiler-state.h" +#include "td/utils/crypto.h" +#include + +namespace tolk { + +Expr* process_expr(AnyV v, CodeBlob& code); + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_redefinition_of_symbol(V v_ident, SymDef* existing) { + if (existing->loc.is_stdlib()) { + v_ident->error("redefinition of a symbol from stdlib"); + } else if (existing->loc.is_defined()) { + v_ident->error("redefinition of symbol, previous was at: " + existing->loc.to_string()); + } else { + v_ident->error("redefinition of built-in symbol"); + } +} + +static int calc_sym_idx(std::string_view sym_name) { + return G.symbols.lookup_add(sym_name); +} + +static td::RefInt256 calculate_method_id_for_entrypoint(std::string_view func_name) { + if (func_name == "main" || func_name == "onInternalMessage") { + return td::make_refint(0); + } + if (func_name == "onExternalMessage") { + return td::make_refint(-1); + } + if (func_name == "onRunTickTock") { + return td::make_refint(-2); + } + if (func_name == "onSplitPrepare") { + return td::make_refint(-3); + } + if (func_name == "onSplitInstall") { + return td::make_refint(-4); + } + tolk_assert(false); +} + +static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { + unsigned int crc = td::crc16(static_cast(func_name)); + return td::make_refint((crc & 0xffff) | 0x10000); +} + +static void calc_arg_ret_order_of_asm_function(V v_body, V param_list, TypeExpr* ret_type, + std::vector& arg_order, std::vector& ret_order) { + int cnt = param_list->size(); + int width = ret_type->get_width(); + if (width < 0 || width > 16) { + v_body->error("return type of an assembler built-in function must have a well-defined fixed width"); + } + if (cnt > 16) { + v_body->error("assembler built-in function must have at most 16 arguments"); + } + std::vector cum_arg_width; + cum_arg_width.push_back(0); + int tot_width = 0; + for (int i = 0; i < cnt; ++i) { + V v_param = param_list->get_param(i); + int arg_width = v_param->param_type->get_width(); + if (arg_width < 0 || arg_width > 16) { + v_param->error("parameters of an assembler built-in function must have a well-defined fixed width"); + } + cum_arg_width.push_back(tot_width += arg_width); + } + if (!v_body->arg_order.empty()) { + if (static_cast(v_body->arg_order.size()) != cnt) { + v_body->error("arg_order of asm function must specify all parameters"); + } + std::vector visited(cnt, false); + for (int i = 0; i < cnt; ++i) { + int j = v_body->arg_order[i]; + if (visited[j]) { + v_body->error("arg_order of asm function contains duplicates"); + } + visited[j] = true; + int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; + while (c1 < c2) { + arg_order.push_back(c1++); + } + } + tolk_assert(arg_order.size() == (unsigned)tot_width); + } + if (!v_body->ret_order.empty()) { + if (static_cast(v_body->ret_order.size()) != width) { + v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width)); + } + std::vector visited(width, false); + for (int i = 0; i < width; ++i) { + int j = v_body->ret_order[i]; + if (j < 0 || j >= width || visited[j]) { + v_body->error("ret_order contains invalid integer, not in range 0 .. width-1"); + } + visited[j] = true; + } + ret_order = v_body->ret_order; + } +} + +static void register_constant(V v) { + AnyV init_value = v->get_init_value(); + SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); + if (sym_def->value) { + fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); + } + + // todo currently, constant value calculation is dirty and roughly: init_value is evaluated to fif code + // and waited to be a single expression + // although it works, of course it should be later rewritten using AST calculations, as well as lots of other parts + CodeBlob code("tmp", v->loc, nullptr, nullptr); + Expr* x = process_expr(init_value, code); + if (!x->is_rvalue()) { + v->get_init_value()->error("expression is not strictly Rvalue"); + } + if (v->declared_type && !v->declared_type->equals_to(x->e_type)) { + v->error("expression type does not match declared type"); + } + SymValConst* sym_val = nullptr; + if (x->cls == Expr::_Const) { // Integer constant + sym_val = new SymValConst(static_cast(G.all_constants.size()), x->intval); + } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) + sym_val = new SymValConst(static_cast(G.all_constants.size()), x->strval); + } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) + code.emplace_back(v->loc, Op::_Import, std::vector()); + auto tmp_vars = x->pre_compile(code); + code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); + code.emplace_back(v->loc, Op::_Nop); + // It is REQUIRED to execute "optimizations" as in tolk.cpp + code.simplify_var_types(); + code.prune_unreachable_code(); + code.split_vars(true); + for (int i = 0; i < 16; i++) { + code.compute_used_code_vars(); + code.fwd_analyze(); + code.prune_unreachable_code(); + } + code.mark_noreturn(); + AsmOpList out_list(0, &code.vars); + code.generate_code(out_list); + if (out_list.list_.size() != 1) { + init_value->error("precompiled expression must result in single operation"); + } + auto op = out_list.list_[0]; + if (!op.is_const()) { + init_value->error("precompiled expression must result in compilation time constant"); + } + if (op.origin.is_null() || !op.origin->is_valid()) { + init_value->error("precompiled expression did not result in a valid integer constant"); + } + sym_val = new SymValConst(static_cast(G.all_constants.size()), op.origin); + } else { + init_value->error("integer or slice literal or constant expected"); + } + + sym_def->value = sym_val; +#ifdef TOLK_DEBUG + sym_def->value->sym_name = v->get_identifier()->name; +#endif + G.all_constants.push_back(sym_def); +} + +static void register_global_var(V v) { + SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); + if (sym_def->value) { + fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); + } + + sym_def->value = new SymValGlobVar(static_cast(G.all_global_vars.size()), v->declared_type); +#ifdef TOLK_DEBUG + sym_def->value->sym_name = v->get_identifier()->name; +#endif + G.all_global_vars.push_back(sym_def); +} + +static SymDef* register_parameter(V v, int idx) { + if (v->is_underscore()) { + return nullptr; + } + SymDef* sym_def = define_parameter(calc_sym_idx(v->get_identifier()->name), v->loc); + if (sym_def->value) { + // todo always false now, how to detect similar parameter names? (remember about underscore) + v->error("redefined parameter"); + } + + SymValVariable* sym_val = new SymValVariable(idx, v->param_type); + if (v->declared_as_mutate) { + sym_val->flags |= SymValVariable::flagMutateParameter; + } + if (!v->declared_as_mutate && idx == 0 && v->get_identifier()->name == "self") { + sym_val->flags |= SymValVariable::flagImmutable; + } + sym_def->value = sym_val; +#ifdef TOLK_DEBUG + sym_def->value->sym_name = v->get_identifier()->name; +#endif + return sym_def; +} + +static void register_function(V v) { + std::string_view func_name = v->get_identifier()->name; + + // calculate TypeExpr of a function: it's a map (params -> ret), probably surrounded by forall + TypeExpr* params_tensor_type = nullptr; + int n_params = v->get_num_params(); + int n_mutate_params = 0; + std::vector parameters_syms; + if (n_params) { + std::vector param_tensor_items; + param_tensor_items.reserve(n_params); + parameters_syms.reserve(n_params); + for (int i = 0; i < n_params; ++i) { + auto v_param = v->get_param(i); + n_mutate_params += static_cast(v_param->declared_as_mutate); + param_tensor_items.emplace_back(v_param->param_type); + parameters_syms.emplace_back(register_parameter(v_param, i)); + } + params_tensor_type = TypeExpr::new_tensor(std::move(param_tensor_items)); + } else { + params_tensor_type = TypeExpr::new_unit(); + } + + TypeExpr* function_type = TypeExpr::new_map(params_tensor_type, v->ret_type); + if (v->genericsT_list) { + std::vector type_vars; + type_vars.reserve(v->genericsT_list->size()); + for (int idx = 0; idx < v->genericsT_list->size(); ++idx) { + type_vars.emplace_back(v->genericsT_list->get_item(idx)->created_type); + } + function_type = TypeExpr::new_forall(std::move(type_vars), function_type); + } + if (v->marked_as_builtin) { + const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); + const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; + if (!func_val || !func_val->is_builtin()) { + v->error("`builtin` used for non-builtin function"); + } +#ifdef TOLK_DEBUG + // in release, we don't need this check, since `builtin` is used only in stdlib, which is our responsibility + if (!func_val->sym_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { + v->error("declaration for `builtin` function doesn't match an actual one"); + } +#endif + return; + } + + SymDef* sym_def = define_global_symbol(calc_sym_idx(func_name), v->loc); + if (sym_def->value) { + fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); + } + if (G.is_verbosity(1)) { + std::cerr << "fun " << func_name << " : " << function_type << std::endl; + } + if (v->marked_as_pure && v->ret_type->get_width() == 0) { + v->error("a pure function should return something, otherwise it will be optimized out anyway"); + } + + SymValFunc* sym_val = nullptr; + if (const auto* v_seq = v->get_body()->try_as()) { + sym_val = new SymValCodeFunc(std::move(parameters_syms), static_cast(G.all_code_functions.size()), function_type); + } else if (const auto* v_asm = v->get_body()->try_as()) { + std::vector arg_order, ret_order; + calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, arg_order, ret_order); + sym_val = new SymValAsmFunc(std::move(parameters_syms), function_type, std::move(arg_order), std::move(ret_order), 0); + } else { + v->error("Unexpected function body statement"); + } + + if (v->method_id) { + sym_val->method_id = td::string_to_int256(static_cast(v->method_id->int_val)); + if (sym_val->method_id.is_null()) { + v->method_id->error("invalid integer constant"); + } + } else if (v->marked_as_get_method) { + sym_val->method_id = calculate_method_id_by_func_name(func_name); + for (const SymDef* other : G.all_get_methods) { + if (!td::cmp(dynamic_cast(other->value)->method_id, sym_val->method_id)) { + v->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" << static_cast(func_name) << "` produce the same hash. Consider renaming one of these functions."); + } + } + } else if (v->is_entrypoint) { + sym_val->method_id = calculate_method_id_for_entrypoint(func_name); + } + if (v->marked_as_pure) { + sym_val->flags |= SymValFunc::flagMarkedAsPure; + } + if (v->marked_as_inline) { + sym_val->flags |= SymValFunc::flagInline; + } + if (v->marked_as_inline_ref) { + sym_val->flags |= SymValFunc::flagInlineRef; + } + if (v->marked_as_get_method) { + sym_val->flags |= SymValFunc::flagGetMethod; + } + if (v->is_entrypoint) { + sym_val->flags |= SymValFunc::flagIsEntrypoint; + } + if (n_mutate_params) { + sym_val->flags |= SymValFunc::flagHasMutateParams; + } + if (v->accepts_self) { + sym_val->flags |= SymValFunc::flagAcceptsSelf; + } + if (v->returns_self) { + sym_val->flags |= SymValFunc::flagReturnsSelf; + } + + sym_def->value = sym_val; +#ifdef TOLK_DEBUG + sym_def->value->sym_name = func_name; +#endif + if (dynamic_cast(sym_val)) { + G.all_code_functions.push_back(sym_def); + } + if (sym_val->is_get_method()) { + G.all_get_methods.push_back(sym_def); + } +} + +static void iterate_through_file_symbols(const SrcFile* file) { + static std::unordered_set seen; + if (!seen.insert(file).second) { + return; + } + tolk_assert(file && file->ast); + + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + switch (v->type) { + case ast_import_statement: + // on `import "another-file.tolk"`, register symbols from that file at first + // (for instance, it can calculate constants, which are used in init_val of constants in current file below import) + iterate_through_file_symbols(v->as()->file); + break; + + case ast_constant_declaration: + register_constant(v->as()); + break; + case ast_global_var_declaration: + register_global_var(v->as()); + break; + case ast_function_declaration: + register_function(v->as()); + break; + default: + break; + } + } +} + +void pipeline_register_global_symbols(const AllSrcFiles& all_src_files) { + for (const SrcFile* file : all_src_files) { + iterate_through_file_symbols(file); + } +} + +} // namespace tolk diff --git a/tolk/pipeline.h b/tolk/pipeline.h new file mode 100644 index 000000000..fdfd2b996 --- /dev/null +++ b/tolk/pipeline.h @@ -0,0 +1,41 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#pragma once + +#include "src-file.h" +#include + +namespace tolk { + +AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); + +void pipeline_register_global_symbols(const AllSrcFiles&); +void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&); + +void pipeline_find_unused_symbols(); +void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles&); + +} // namespace tolk diff --git a/tolk/platform-utils.h b/tolk/platform-utils.h new file mode 100644 index 000000000..7b16226e7 --- /dev/null +++ b/tolk/platform-utils.h @@ -0,0 +1,44 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#pragma once + +#if __GNUC__ +#define GNU_ATTRIBUTE_COLD [[gnu::cold]] +#define GNU_ATTRIBUTE_NORETURN [[gnu::noreturn]] +#define GNU_ATTRIBUTE_ALWAYS_INLINE [[gnu::always_inline]] +#else +#define GNU_ATTRIBUTE_COLD +#define GNU_ATTRIBUTE_NORETURN [[noreturn]] +#define GNU_ATTRIBUTE_ALWAYS_INLINE +#endif + +#if defined(__GNUC__) +#define LIKELY(x) __builtin_expect(x, true) +#define UNLIKELY(x) __builtin_expect(x, false) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp new file mode 100644 index 000000000..e5533f697 --- /dev/null +++ b/tolk/src-file.cpp @@ -0,0 +1,209 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "src-file.h" +#include "compiler-state.h" +#include +#include + +namespace tolk { + +static_assert(sizeof(SrcLocation) == 8); + +SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { + for (SrcFile* file : all_src_files) { + if (file->file_id == file_id) { + return file; + } + } + return nullptr; +} + +SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { + for (SrcFile* file : all_src_files) { + if (file->abs_filename == abs_filename) { + return file; + } + } + return nullptr; +} + +SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) { + td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); + if (path.is_error()) { + if (included_from.is_defined()) { + throw ParseError(included_from, "Failed to import: " + path.move_as_error().message().str()); + } + throw Fatal("Failed to locate " + rel_filename + ": " + path.move_as_error().message().str()); + } + + std::string abs_filename = path.move_as_ok(); + if (SrcFile* file = find_file(abs_filename)) { + return file; + } + + td::Result text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str()); + if (text.is_error()) { + if (included_from.is_defined()) { + throw ParseError(included_from, "Failed to import: " + text.move_as_error().message().str()); + } + throw Fatal("Failed to read " + rel_filename + ": " + text.move_as_error().message().str()); + } + + SrcFile* created = new SrcFile(++last_registered_file_id, rel_filename, std::move(abs_filename), text.move_as_ok()); + if (G.is_verbosity(1)) { + std::cerr << "register file_id " << created->file_id << " " << created->abs_filename << std::endl; + } + all_src_files.push_back(created); + return created; +} + +SrcFile* AllRegisteredSrcFiles::get_next_unparsed_file() { + if (last_parsed_file_id >= last_registered_file_id) { + return nullptr; + } + return all_src_files[++last_parsed_file_id]; +} + +AllSrcFiles AllRegisteredSrcFiles::get_all_files() const { + AllSrcFiles src_files_immutable; + src_files_immutable.reserve(all_src_files.size()); + for (const SrcFile* file : all_src_files) { + src_files_immutable.push_back(file); + } + return src_files_immutable; +} + +bool SrcFile::is_stdlib_file() const { + std::string_view rel(rel_filename); + return rel.size() > 10 && rel.substr(0, 8) == "@stdlib/"; // common.tolk, tvm-dicts.tolk, etc +} + +bool SrcFile::is_offset_valid(int offset) const { + return offset >= 0 && offset < static_cast(text.size()); +} + +SrcFile::SrcPosition SrcFile::convert_offset(int offset) const { + if (!is_offset_valid(offset)) { + return SrcPosition{offset, -1, -1, "invalid offset"}; + } + + int line_idx = 0; + int char_idx = 0; + int line_offset = 0; + for (int i = 0; i < offset; ++i) { + char c = text[i]; + if (c == '\n') { + line_idx++; + char_idx = 0; + line_offset = i + 1; + } else { + char_idx++; + } + } + + size_t line_len = text.size() - line_offset; + for (int i = line_offset; i < static_cast(text.size()); ++i) { + if (text[i] == '\n') { + line_len = i - line_offset; + break; + } + } + + std::string_view line_str(text.data() + line_offset, line_len); + return SrcPosition{offset, line_idx + 1, char_idx + 1, line_str}; +} + + +std::ostream& operator<<(std::ostream& os, const SrcFile* src_file) { + return os << (src_file ? src_file->rel_filename : "unknown-location"); +} + +std::ostream& operator<<(std::ostream& os, const Fatal& fatal) { + return os << fatal.what(); +} + +const SrcFile* SrcLocation::get_src_file() const { + return G.all_src_files.find_file(file_id); +} + +void SrcLocation::show(std::ostream& os) const { + const SrcFile* src_file = get_src_file(); + os << src_file; + if (src_file && src_file->is_offset_valid(char_offset)) { + SrcFile::SrcPosition pos = src_file->convert_offset(char_offset); + os << ':' << pos.line_no << ':' << pos.char_no; + } +} + +void SrcLocation::show_context(std::ostream& os) const { + const SrcFile* src_file = get_src_file(); + if (!src_file || !src_file->is_offset_valid(char_offset)) { + return; + } + SrcFile::SrcPosition pos = src_file->convert_offset(char_offset); + os << " " << pos.line_str << "\n"; + + os << " "; + for (int i = 1; i < pos.char_no; ++i) { + os << ' '; + } + os << '^' << "\n"; +} + +std::string SrcLocation::to_string() const { + std::ostringstream os; + show(os); + return os.str(); +} + +std::ostream& operator<<(std::ostream& os, SrcLocation loc) { + loc.show(os); + return os; +} + +void SrcLocation::show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const { + show(os); + if (!err_type.empty()) { + os << ": " << err_type; + } + os << ": " << message << std::endl; + show_context(os); +} + +void SrcLocation::show_note(const std::string& err_msg) const { + show_general_error(std::cerr, err_msg, "note"); +} + +void SrcLocation::show_warning(const std::string& err_msg) const { + show_general_error(std::cerr, err_msg, "warning"); +} + +void SrcLocation::show_error(const std::string& err_msg) const { + show_general_error(std::cerr, err_msg, "error"); +} + +std::ostream& operator<<(std::ostream& os, const ParseError& error) { + error.show(os); + return os; +} + +void ParseError::show(std::ostream& os) const { + os << where << ": error: " << message << std::endl; + where.show_context(os); +} + +} // namespace tolk diff --git a/tolk/src-file.h b/tolk/src-file.h new file mode 100644 index 000000000..815dccbed --- /dev/null +++ b/tolk/src-file.h @@ -0,0 +1,142 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include +#include + +namespace tolk { + +struct ASTNodeBase; + +struct SrcFile { + struct SrcPosition { + int offset; + int line_no; + int char_no; + std::string_view line_str; + }; + + struct ImportStatement { + const SrcFile* imported_file; + }; + + int file_id; // an incremental counter through all parsed files + std::string rel_filename; // relative to cwd + std::string abs_filename; // absolute from root + std::string text; // file contents loaded into memory, every Token::str_val points inside it + const ASTNodeBase* ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here + std::vector imports; // to check strictness (can't use a symbol without importing its file) + + SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text) + : file_id(file_id) + , rel_filename(std::move(rel_filename)) + , abs_filename(std::move(abs_filename)) + , text(std::move(text)) { } + + SrcFile(const SrcFile& other) = delete; + SrcFile &operator=(const SrcFile&) = delete; + + bool is_stdlib_file() const; + + bool is_offset_valid(int offset) const; + SrcPosition convert_offset(int offset) const; +}; + + +// SrcLocation points to a location (line, column) in some loaded .tolk source SrcFile. +// Note, that instead of storing src_file, line_no, etc., only 2 ints are stored. +// The purpose is: sizeof(SrcLocation) == 8, so it's just passed/stored without pointers/refs, just like int64_t. +// When decoding SrcLocation into human-readable format, it's converted to SrcFile::SrcPosition via offset. +class SrcLocation { + friend class Lexer; + + int file_id = -1; // = SrcFile::file_id (note, that get_src_file() does linear search) + int char_offset = -1; // offset from SrcFile::text + +public: + + SrcLocation() = default; + explicit SrcLocation(const SrcFile* src_file) : file_id(src_file->file_id) { + } + + bool is_defined() const { return file_id != -1; } + bool is_stdlib() const { return file_id == 0; } + const SrcFile* get_src_file() const; + + // similar to `this->get_src_file() == symbol->get_src_file() || symbol->get_src_file()->is_stdlib()` + // (but effectively, avoiding linear search) + bool is_symbol_from_same_or_builtin_file(SrcLocation symbol_loc) const { + return file_id == symbol_loc.file_id || symbol_loc.file_id < 1; + } + + void show(std::ostream& os) const; + void show_context(std::ostream& os) const; + std::string to_string() const; + + void show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const; + void show_note(const std::string& err_msg) const; + void show_warning(const std::string& err_msg) const; + void show_error(const std::string& err_msg) const; +}; + +std::ostream& operator<<(std::ostream& os, SrcLocation loc); + +using AllSrcFiles = std::vector; + +class AllRegisteredSrcFiles { + std::vector all_src_files; + int last_registered_file_id = -1; + int last_parsed_file_id = -1; + +public: + SrcFile *find_file(int file_id) const; + SrcFile* find_file(const std::string& abs_filename) const; + + SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from); + SrcFile* get_next_unparsed_file(); + + AllSrcFiles get_all_files() const; +}; + +struct Fatal final : std::exception { + std::string message; + + explicit Fatal(std::string _msg) : message(std::move(_msg)) { + } + const char* what() const noexcept override { + return message.c_str(); + } +}; + +std::ostream& operator<<(std::ostream& os, const Fatal& fatal); + +struct ParseError : std::exception { + SrcLocation where; + std::string message; + ParseError(SrcLocation _where, std::string _msg) : where(_where), message(std::move(_msg)) { + } + + const char* what() const noexcept override { + return message.c_str(); + } + void show(std::ostream& os) const; +}; + +std::ostream& operator<<(std::ostream& os, const ParseError& error); + +} // namespace tolk diff --git a/tolk/stack-transform.cpp b/tolk/stack-transform.cpp new file mode 100644 index 000000000..fe5735e5c --- /dev/null +++ b/tolk/stack-transform.cpp @@ -0,0 +1,1054 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * GENERIC STACK TRANSFORMATIONS + * + */ + +StackTransform::StackTransform(std::initializer_list list) { + *this = list; +} + +StackTransform &StackTransform::operator=(std::initializer_list list) { + if (list.size() > 255) { + invalidate(); + return *this; + } + set_id(); + if (!list.size()) { + return *this; + } + int m = (int)list.size(); + d = list.begin()[m - 1] - (m - 1); + if (d >= 128 || d < -128) { + invalidate(); + return *this; + } + for (int i = 0; i < m - 1; i++) { + int x = d + i; + int y = list.begin()[i]; + if (y != x) { + if (x != (short)x || y != (short)y || n == max_n) { + invalidate(); + return *this; + } + dp = std::max(dp, std::max(x, y) + 1); + A[n++] = std::make_pair((short)x, (short)y); + } + } + return *this; +} + +bool StackTransform::assign(const StackTransform &other) { + if (!other.is_valid() || (unsigned)other.n > max_n) { + return invalidate(); + } + d = other.d; + n = other.n; + dp = other.dp; + c = other.c; + invalid = false; + for (int i = 0; i < n; i++) { + A[i] = other.A[i]; + } + return true; +} + +int StackTransform::get(int x) const { + if (!is_valid()) { + return -1; + } + if (x <= c_start) { + return x - c; + } + x += d; + int i; + for (i = 0; i < n && A[i].first < x; i++) { + } + if (i < n && A[i].first == x) { + return A[i].second; + } else { + return x; + } +} + +bool StackTransform::set(int x, int y, bool relaxed) { + if (!is_valid()) { + return false; + } + if (x < 0) { + return (relaxed && y == x + d) || invalidate(); + } + if (!relaxed) { + touch(x); + } + x += d; + int i; + for (i = 0; i < n && A[i].first < x; i++) { + } + if (i < n && A[i].first == x) { + if (x != y) { + if (y != (short)y) { + return invalidate(); + } + A[i].second = (short)y; + } else { + --n; + for (; i < n; i++) { + A[i] = A[i + 1]; + } + } + } else { + if (x != y) { + if (x != (short)x || y != (short)y || n == max_n) { + return invalidate(); + } + for (int j = n++; j > i; j--) { + A[j] = A[j - 1]; + } + A[i].first = (short)x; + A[i].second = (short)y; + touch(x - d); + touch(y); + } + } + return true; +} + +// f(x') = x' + d for all x' >= x ? +bool StackTransform::is_trivial_after(int x) const { + return is_valid() && (!n || A[n - 1].first < x + d); +} + +// card f^{-1}(y) +int StackTransform::preimage_count(int y) const { + if (!is_valid()) { + return -1; + } + int count = (y >= d); + for (const auto &pair : A) { + if (pair.second == y) { + ++count; + } else if (pair.first == y) { + --count; + } + } + return count; +} + +// f^{-1}(y) +std::vector StackTransform::preimage(int y) const { + if (!is_valid()) { + return {}; + } + std::vector res; + bool f = (y >= d); + for (const auto &pair : A) { + if (pair.first > y && f) { + res.push_back(y - d); + f = false; + } + if (pair.first == y) { + f = false; + } else if (pair.second == y) { + res.push_back(pair.first - d); + } + } + return res; +} + +// is f:N->N bijective ? +bool StackTransform::is_permutation() const { + if (!is_valid() || d) { + return false; + } + tolk_assert(n <= max_n); + std::array X, Y; + for (int i = 0; i < n; i++) { + X[i] = A[i].first; + Y[i] = A[i].second; + if (Y[i] < 0) { + return false; + } + } + std::sort(Y.begin(), Y.begin() + n); + for (int i = 0; i < n; i++) { + if (X[i] != Y[i]) { + return false; + } + } + return true; +} + +bool StackTransform::remove_negative() { + int s = 0; + while (s < n && A[s].first < d) { + ++s; + } + if (s) { + n -= s; + for (int i = 0; i < n; i++) { + A[i] = A[i + s]; + } + } + return true; +} + +int StackTransform::try_load(int &i, int offs) const { + return i < n ? A[i++].first + offs : inf_x; +} + +bool StackTransform::try_store(int x, int y) { + if (x == y || x < d) { + return true; + } + if (n == max_n || x != (short)x || y != (short)y) { + return invalidate(); + } + A[n].first = (short)x; + A[n++].second = (short)y; + return true; +} + +// c := a * b +bool StackTransform::compose(const StackTransform &a, const StackTransform &b, StackTransform &c) { + if (!a.is_valid() || !b.is_valid()) { + return c.invalidate(); + } + c.d = a.d + b.d; + c.n = 0; + c.dp = std::max(a.dp, b.dp + a.d); + c.c = a.c + b.c; + c.invalid = false; + int i = 0, j = 0; + int x1 = a.try_load(i); + int x2 = b.try_load(j, a.d); + while (true) { + if (x1 < x2) { + int y = a.A[i - 1].second; + if (!c.try_store(x1, y)) { + return false; + } + x1 = a.try_load(i); + } else if (x2 < inf_x) { + if (x1 == x2) { + x1 = a.try_load(i); + } + int y = b.A[j - 1].second; + if (!c.try_store(x2, a(y))) { + return false; + } + x2 = b.try_load(j, a.d); + } else { + return true; + } + } +} + +// this = this * other +bool StackTransform::apply(const StackTransform &other) { + StackTransform res; + if (!compose(*this, other, res)) { + return invalidate(); + } + return assign(res); +} + +// this = other * this +bool StackTransform::preapply(const StackTransform &other) { + StackTransform res; + if (!compose(other, *this, res)) { + return invalidate(); + } + return assign(res); +} + +StackTransform StackTransform::operator*(const StackTransform &b) const & { + StackTransform res; + compose(*this, b, res); + return res; +} + +// this = this * other +StackTransform &StackTransform::operator*=(const StackTransform &other) { + StackTransform res; + (compose(*this, other, res) && assign(res)) || invalidate(); + return *this; +} + +bool StackTransform::apply_xchg(int i, int j, bool relaxed) { + if (!is_valid() || i < 0 || j < 0) { + return invalidate(); + } + if (i == j) { + return relaxed || touch(i); + } + int u = touch_get(i), v = touch_get(j); + return set(i, v) && set(j, u); +} + +bool StackTransform::apply_push(int i) { + if (!is_valid() || i < 0) { + return invalidate(); + } + int u = touch_get(i); + return shift(-1) && set(0, u); +} + +bool StackTransform::apply_push_newconst() { + if (!is_valid()) { + return false; + } + return shift(-1) && set(0, c_start - c++); +} + +bool StackTransform::apply_pop(int i) { + if (!is_valid() || i < 0) { + return invalidate(); + } + if (!i) { + return touch(0) && shift(1); + } else { + return set(i, get(0)) && shift(1); + } +} + +bool StackTransform::apply_blkpop(int k) { + if (!is_valid() || k < 0) { + return invalidate(); + } + return !k || (touch(k - 1) && shift(k)); +} + +bool StackTransform::equal(const StackTransform &other, bool relaxed) const { + if (!is_valid() || !other.is_valid()) { + return false; + } + if (!(n == other.n && d == other.d)) { + return false; + } + for (int i = 0; i < n; i++) { + if (A[i] != other.A[i]) { + return false; + } + } + return relaxed || dp == other.dp; +} + +StackTransform StackTransform::Xchg(int i, int j, bool relaxed) { + StackTransform t; + t.apply_xchg(i, j, relaxed); + return t; +} + +StackTransform StackTransform::Push(int i) { + StackTransform t; + t.apply_push(i); + return t; +} + +StackTransform StackTransform::Pop(int i) { + StackTransform t; + t.apply_pop(i); + return t; +} + +bool StackTransform::is_xchg(int i, int j) const { + if (i == j) { + return is_id(); + } + return is_valid() && !d && n == 2 && i >= 0 && j >= 0 && get(i) == j && get(j) == i; +} + +bool StackTransform::is_xchg(int *i, int *j) const { + if (!is_valid() || d || n > 2 || !dp) { + return false; + } + if (!n) { + *i = *j = 0; + return true; + } + if (n != 2) { + return false; + } + int a = A[0].first, b = A[1].first; + if (A[0].second != b || A[1].second != a) { + return false; + } + *i = std::min(a, b); + *j = std::max(a, b); + return true; +} + +bool StackTransform::is_xchg_xchg(int i, int j, int k, int l) const { + if (is_valid() && !d && n <= 4 && (i | j | k | l) >= 0) { + StackTransform t; + return t.apply_xchg(i, j) && t.apply_xchg(k, l) && t <= *this; + } else { + return false; + } +} + +bool StackTransform::is_xchg_xchg(int *i, int *j, int *k, int *l) const { + if (!is_valid() || d || n > 4 || !dp || !is_permutation()) { + return false; + } + if (!n) { + *i = *j = *k = *l = 0; + return true; + } + if (n <= 2) { + *k = *l = 0; + return is_xchg(i, j); + } + if (n == 3) { + // rotation: a -> b -> c -> a + int a = A[0].first; + int b = A[0].second; + int s = (b == A[2].first ? 2 : 1); + int c = A[s].second; + if (b != A[s].first || c != A[3 - s].first || a != A[3 - s].second) { + return false; + } + // implement as XCHG s(a),s(c) ; XCHG s(a),s(b) + *i = *k = a; + *j = c; + *l = b; + return is_xchg_xchg(*i, *j, *k, *l); + } + *i = A[0].first; + *j = A[0].second; + if (get(*j) != *i) { + return false; + } + for (int s = 1; s < 4; s++) { + if (A[s].first != *j) { + *k = A[s].first; + *l = A[s].second; + return get(*l) == *k && is_xchg_xchg(*i, *j, *k, *l); + } + } + return false; +} + +bool StackTransform::is_push(int i) const { + return is_valid() && d == -1 && n == 1 && A[0].first == -1 && A[0].second == i; +} + +bool StackTransform::is_push(int *i) const { + if (is_valid() && d == -1 && n == 1 && A[0].first == -1 && A[0].second >= 0) { + *i = A[0].second; + return true; + } else { + return false; + } +} + +// 1 2 3 4 .. = pop0 +// 0 2 3 4 .. = pop1 +// 1 0 3 4 .. = pop2 +// 1 2 0 4 .. = pop3 +// POP s(i) : 1 2 ... i-1 0 i+1 ... ; d=1, n=1, {(i,0)} +bool StackTransform::is_pop(int i) const { + if (!is_valid() || d != 1 || n > 1 || i < 0) { + return false; + } + if (!i) { + return !n; + } + return n == 1 && A[0].first == i && !A[0].second; +} + +bool StackTransform::is_pop(int *i) const { + if (!is_valid() || d != 1 || n > 1) { + return false; + } + if (!n) { + *i = 0; + return true; + } + if (n == 1 && !A[0].second) { + *i = A[0].first; + return true; + } + return false; +} + +// POP s(i) ; POP s(j) : 2 ... i-1 0 i+1 ... j 1 j+2 ... ; d=2, n=2, {(i,0),(j+1,1)} if i <> j+1 +bool StackTransform::is_pop_pop(int i, int j) const { + if (is_valid() && d == 2 && n <= 2 && i >= 0 && j >= 0) { + StackTransform t; + return t.apply_pop(i) && t.apply_pop(j) && t <= *this; + } else { + return false; + } +} + +bool StackTransform::is_pop_pop(int *i, int *j) const { + if (!is_valid() || d != 2 || n > 2) { + return false; + } + if (!n) { + *i = *j = 0; // 2DROP + } else if (n == 2) { + *i = A[0].first - A[0].second; + *j = A[1].first - A[1].second; + if (A[0].second > A[1].second) { + std::swap(*i, *j); + } + } else if (!A[0].second) { + *i = A[0].first; + *j = 0; + } else { + *i = 0; + *j = A[0].first - 1; + } + return is_pop_pop(*i, *j); +} + +const StackTransform StackTransform::rot{2, 0, 1, 3}; +const StackTransform StackTransform::rot_rev{1, 2, 0, 3}; + +bool StackTransform::is_rot() const { + return equal(rot, true); +} + +bool StackTransform::is_rotrev() const { + return equal(rot_rev, true); +} + +// PUSH i ; ROT == 1 i 0 2 3 +bool StackTransform::is_push_rot(int i) const { + return is_valid() && d == -1 && i >= 0 && is_trivial_after(3) && get(0) == 1 && get(1) == i && get(2) == 0; +} + +bool StackTransform::is_push_rot(int *i) const { + return is_valid() && (*i = get(1)) >= 0 && is_push_rot(*i); +} + +// PUSH i ; -ROT == 0 1 i 2 3 +bool StackTransform::is_push_rotrev(int i) const { + return is_valid() && d == -1 && i >= 0 && is_trivial_after(3) && get(0) == 0 && get(1) == 1 && get(2) == i; +} + +bool StackTransform::is_push_rotrev(int *i) const { + return is_valid() && (*i = get(2)) >= 0 && is_push_rotrev(*i); +} + +// PUSH s(i) ; XCHG s(j),s(k) --> i 0 1 .. i .. +// PUSH s(i) ; XCHG s(0),s(k) --> k-1 0 1 .. k-2 i k .. +bool StackTransform::is_push_xchg(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -1 && n <= 3 && t.apply_push(i) && t.apply_xchg(j, k) && t <= *this; +} + +bool StackTransform::is_push_xchg(int *i, int *j, int *k) const { + if (!(is_valid() && d == -1 && n <= 3 && n > 0)) { + return false; + } + int s = get(0); + if (s < 0) { + return false; + } + *i = s; + *j = 0; + if (n == 1) { + *k = 0; + } else if (n == 2) { + *k = s + 1; + *i = get(s + 1); + } else { + *j = A[1].first + 1; + *k = A[2].first + 1; + } + return is_push_xchg(*i, *j, *k); +} + +// XCHG s1,s(i) ; XCHG s0,s(j) +bool StackTransform::is_xchg2(int i, int j) const { + StackTransform t; + return is_valid() && !d && t.apply_xchg(1, i) && t.apply_xchg(0, j) && t <= *this; +} + +bool StackTransform::is_xchg2(int *i, int *j) const { + if (!is_valid() || d || n > 4 || n == 1 || dp < 2) { + return false; + } + *i = get(1); + *j = get(0); + if (!n) { + return true; + } + if (*i < 0 || *j < 0) { + return false; + } + if (n == 2 && !*i) { + *j = *i; // XCHG s0,s1 = XCHG2 s0,s0 + } else if (n == 3 && *i) { + // XCHG2 s(i),s(i) = XCHG s1,s(i) ; XCHG s0,s(i) : 0->1, 1->i + *j = *i; + } // XCHG2 s0,s(i) = XCHG s0,s1 ; XCHG s0,s(i) : 0->i, 1->0 + return is_xchg2(*i, *j); +} + +// XCHG s0,s(i) ; PUSH s(j) = PUSH s(j') ; XCHG s1,s(i+1) +// j'=j if j!=0, j!=i +// j'=0 if j=i +// j'=i if j=0 +bool StackTransform::is_xcpu(int i, int j) const { + StackTransform t; + return is_valid() && d == -1 && t.apply_xchg(0, i) && t.apply_push(j) && t <= *this; +} + +bool StackTransform::is_xcpu(int *i, int *j) const { + if (!is_valid() || d != -1 || n > 3 || dp < 1) { + return false; + } + *i = get(1); + *j = get(0); + if (!*j) { + *j = *i; + } else if (*j == *i) { + *j = 0; + } + return is_xcpu(*i, *j); +} + +// PUSH s(i) ; XCHG s0, s1 ; XCHG s0, s(j+1) +bool StackTransform::is_puxc(int i, int j) const { + StackTransform t; + return is_valid() && d == -1 && t.apply_push(i) && t.apply_xchg(0, 1) && t.apply_xchg(0, j + 1) && t <= *this; +} + +// j > 0 : 0 -> j, 1 -> i +// j = 0 : 0 -> i, 1 -> 0 ( PUSH s(i) ) +// j = -1 : 0 -> 0, 1 -> i ( PUSH s(i) ; XCHG s0, s1 ) +bool StackTransform::is_puxc(int *i, int *j) const { + if (!is_valid() || d != -1 || n > 3) { + return false; + } + *i = get(1); + *j = get(0); + if (!*i && is_push(*j)) { + std::swap(*i, *j); + return is_puxc(*i, *j); + } + if (!*j) { + --*j; + } + return is_puxc(*i, *j); +} + +// PUSH s(i) ; PUSH s(j+1) +bool StackTransform::is_push2(int i, int j) const { + StackTransform t; + return is_valid() && d == -2 && t.apply_push(i) && t.apply_push(j + 1) && t <= *this; +} + +bool StackTransform::is_push2(int *i, int *j) const { + if (!is_valid() || d != -2 || n > 2) { + return false; + } + *i = get(1); + *j = get(0); + return is_push2(*i, *j); +} + +// XCHG s2,s(i) ; XCHG s1,s(j) ; XCHG s0,s(k) +bool StackTransform::is_xchg3(int *i, int *j, int *k) const { + if (!is_valid() || d || dp < 3 || !is_permutation()) { + return false; + } + for (int s = 2; s >= 0; s--) { + *i = get(s); + StackTransform t = Xchg(2, *i) * *this; + if (t.is_xchg2(j, k)) { + return true; + } + } + return false; +} + +// XCHG s1,s(i) ; XCHG s0,s(j) ; PUSH s(k) +bool StackTransform::is_xc2pu(int *i, int *j, int *k) const { + if (!is_valid() || d != -1 || dp < 2) { + return false; + } + for (int s = 2; s >= 1; s--) { + *i = get(s); + StackTransform t = Xchg(1, *i) * *this; + if (t.is_xcpu(j, k)) { + return true; + } + } + return false; +} + +// XCHG s1,s(i) ; PUSH s(j) ; XCHG s0,s1 ; XCHG s0,s(k+1) +bool StackTransform::is_xcpuxc(int *i, int *j, int *k) const { + if (!is_valid() || d != -1 || dp < 2) { + return false; + } + for (int s = 2; s >= 0; s--) { + *i = get(s); + StackTransform t = Xchg(1, *i) * *this; + if (t.is_puxc(j, k)) { + return true; + } + } + return false; +} + +// XCHG s0,s(i) ; PUSH s(j) ; PUSH s(k+1) +bool StackTransform::is_xcpu2(int *i, int *j, int *k) const { + if (!is_valid() || d != -2 || dp < 1) { + return false; + } + *i = get(2); + StackTransform t = Xchg(0, *i) * *this; + return t.is_push2(j, k); +} + +// PUSH s(i) ; XCHG s0,s2 ; XCHG s1,s(j+1) ; XCHG s0,s(k+1) +// 0 -> i or 1 -> i or 2 -> i ; i has two preimages +// 0 -> k if k >= 2, k != j +// 1 -> j=k if j = k >= 2 +// 1 -> j if j >= 2, k != 0 +// 0 -> j if j >= 2, k = 0 +// => i in {f(0), f(1), f(2)} ; j in {-1, 0, 1, f(0), f(1)} ; k in {-1, 0, 1, f(0), f(1)} +bool StackTransform::is_puxc2(int *i, int *j, int *k) const { + if (!is_valid() || d != -1 || dp < 2) { + return false; + } + for (int s = 2; s >= 0; s--) { + *i = get(s); + if (preimage_count(*i) != 2) { + continue; + } + for (int u = -1; u <= 3; u++) { + *j = (u >= 2 ? get(u - 2) : u); + for (int v = -1; v <= 3; v++) { + *k = (v >= 2 ? get(v - 2) : v); + if (is_puxc2(*i, *j, *k)) { + return true; + } + } + } + } + return false; +} + +// PUSH s(i) ; XCHG s0,s2 ; XCHG s1,s(j+1) ; XCHG s0,s(k+1) +bool StackTransform::is_puxc2(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -1 && dp >= 2 // basic checks + && t.apply_push(i) && t.apply_xchg(0, 2) // PUSH s(i) ; XCHG s0,s2 + && t.apply_xchg(1, j + 1) // XCHG s1,s(j+1) + && t.apply_xchg(0, k + 1) && t <= *this; // XCHG s0,s(k+2) +} + +// PUSH s(i) ; XCHG s0,s1 ; XCHG s0,s(j+1) ; PUSH s(k+1) +bool StackTransform::is_puxcpu(int *i, int *j, int *k) const { + if (!is_valid() || d != -2 || dp < 1) { + return false; + } + StackTransform t = *this; + if (t.apply_pop() && t.is_puxc(i, j)) { + int y = get(0); + auto v = t.preimage(y); + if (!v.empty()) { + *k = v[0] - 1; + t.apply_push(*k + 1); + return t <= *this; + } + } + return false; +} + +// PUSH s(i) ; XCHG s0,s1 ; PUSH s(j+1) ; XCHG s0,s1 ; XCHG s0,s(k+2) +// 2 -> i; 1 -> j (if j >= 1, k != -1), 1 -> i (if j = 0, k != -1), 1 -> 0 (if j = -1, k != -1) +// 0 -> k (if k >= 1), 0 -> i (if k = 0), 0 -> j (if k = -1, j >= 1) +bool StackTransform::is_pu2xc(int *i, int *j, int *k) const { + if (!is_valid() || d != -2 || dp < 1) { + return false; + } + *i = get(2); + for (int v = -2; v <= 1; v++) { + *k = (v <= 0 ? v : get(0)); // one of -2, -1, 0, get(0) + for (int u = -1; u <= 1; u++) { + *j = (u <= 0 ? u : get(v != -1)); // one of -1, 0, get(0), get(1) + if (is_pu2xc(*i, *j, *k)) { + return true; + } + } + } + return false; +} + +bool StackTransform::is_pu2xc(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -2 && dp >= 1 // basic checks + && t.apply_push(i) && t.apply_xchg(0, 1) // PUSH s(i) ; XCHG s0,s1 + && t.apply_push(j + 1) && t.apply_xchg(0, 1) // PUSH s(j+1) ; XCHG s0,s1 + && t.apply_xchg(0, k + 2) && t <= *this; // XCHG s0,s(k+2) +} + +// PUSH s(i) ; PUSH s(j+1) ; PUSH s(k+2) +bool StackTransform::is_push3(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -3 && t.apply_push(i) && t.apply_push(j + 1) && t.apply_push(k + 2) && t <= *this; +} + +bool StackTransform::is_push3(int *i, int *j, int *k) const { + if (!is_valid() || d != -3 || n > 3) { + return false; + } + *i = get(2); + *j = get(1); + *k = get(0); + return is_push3(*i, *j, *k); +} + +bool StackTransform::is_blkswap(int *i, int *j) const { + if (!is_valid() || d || !is_permutation()) { + return false; + } + *j = get(0); + if (*j <= 0) { + return false; + } + auto v = preimage(0); + if (v.size() != 1) { + return false; + } + *i = v[0]; + return *i > 0 && is_blkswap(*i, *j); +} + +bool StackTransform::is_blkswap(int i, int j) const { + if (!is_valid() || d || i <= 0 || j <= 0 || dp < i + j || !is_trivial_after(i + j)) { + return false; + } + for (int s = 0; s < i; s++) { + if (get(s) != s + j) { + return false; + } + } + for (int s = 0; s < j; s++) { + if (get(s + i) != s) { + return false; + } + } + return true; +} + +// equivalent to i times DROP +bool StackTransform::is_blkdrop(int *i) const { + if (is_valid() && d > 0 && !n) { + *i = d; + return true; + } + return false; +} + +// 0 1 .. j-1 j+i j+i+1 ... +bool StackTransform::is_blkdrop2(int i, int j) const { + if (!is_valid() || d != i || i <= 0 || j < 0 || dp < i + j || n != j || !is_trivial_after(j)) { + return false; + } + for (int s = 0; s < j; s++) { + if (get(s) != s) { + return false; + } + } + return true; +} + +bool StackTransform::is_blkdrop2(int *i, int *j) const { + if (is_valid() && is_blkdrop2(d, n)) { + *i = d; + *j = n; + return true; + } + return false; +} + +// equivalent to i times PUSH s(j) +bool StackTransform::is_blkpush(int *i, int *j) const { + if (!is_valid() || d >= 0) { + return false; + } + *i = -d; + *j = get(*i - 1); + return is_blkpush(*i, *j); +} + +bool StackTransform::is_blkpush(int i, int j) const { + if (!is_valid() || d >= 0 || d != -i || j < 0 || dp < i + j || !is_trivial_after(i)) { + return false; + } + StackTransform t; + for (int s = 0; s < i; s++) { + if (!t.apply_push(j)) { + return false; + } + } + return t <= *this; +} + +bool StackTransform::is_reverse(int *i, int *j) const { + if (!is_valid() || d || !is_permutation() || n < 2) { + return false; + } + *j = A[0].first; + *i = A[n - 1].first - A[0].first + 1; + return is_reverse(*i, *j); +} + +bool StackTransform::is_reverse(int i, int j) const { + if (!is_valid() || d || !is_trivial_after(i + j) || n < 2 || A[0].first != j || A[n - 1].first != j + i - 1) { + return false; + } + for (int s = 0; s < i; s++) { + if (get(j + s) != j + i - 1 - s) { + return false; + } + } + return true; +} + +// 0 i+1 i+2 ... == i*NIP +// j i+1 i+2 ... == XCHG s(i),s(j) ; BLKDROP i +bool StackTransform::is_nip_seq(int i, int j) const { + return is_valid() && d == i && i > j && j >= 0 && n == 1 && A[0].first == i && A[0].second == j; +} + +bool StackTransform::is_nip_seq(int *i) const { + *i = d; + return is_nip_seq(*i); +} + +bool StackTransform::is_nip_seq(int *i, int *j) const { + if (is_valid() && n > 0) { + *i = d; + *j = A[0].second; + return is_nip_seq(*i, *j); + } else { + return false; + } +} + +// POP s(i); BLKDROP k (usually for i >= k >= 0) +bool StackTransform::is_pop_blkdrop(int i, int k) const { + StackTransform t; + return is_valid() && d == k + 1 && t.apply_pop(i) && t.apply_blkpop(k) && t <= *this; +} + +// POP s(i); BLKDROP k == XCHG s0,s(i); BLKDROP k+1 for i >= k >= 0 +// k+1 k+2 .. i-1 0 i+1 .. +bool StackTransform::is_pop_blkdrop(int *i, int *k) const { + if (is_valid() && n == 1 && d > 0 && !A[0].second) { + *k = d - 1; + *i = A[0].first; + return is_pop_blkdrop(*i, *k); + } else { + return false; + } +} + +// POP s(i); POP s(j); BLKDROP k (usually for i<>j >= k >= 0) +bool StackTransform::is_2pop_blkdrop(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == k + 2 && t.apply_pop(i) && t.apply_pop(j) && t.apply_blkpop(k) && t <= *this; +} + +// POP s(i); POP s(j); BLKDROP k == XCHG s0,s(i); XCHG s1,s(j+1); BLKDROP k+2 (usually for i<>j >= k >= 2) +// k+2 k+3 .. i-1 0 i+1 ... j 1 j+2 ... +bool StackTransform::is_2pop_blkdrop(int *i, int *j, int *k) const { + if (is_valid() && n == 2 && d >= 2 && A[0].second + A[1].second == 1) { + *k = d - 2; + int t = (A[0].second > 0); + *i = A[t].first; + *j = A[1 - t].first - 1; + return is_2pop_blkdrop(*i, *j, *k); + } else { + return false; + } +} + +// PUSHCONST c ; ROT == 1 -1000 0 2 3 +bool StackTransform::is_const_rot(int c) const { + return is_valid() && d == -1 && is_trivial_after(3) && get(0) == 1 && c <= c_start && get(1) == c && get(2) == 0; +} + +bool StackTransform::is_const_rot(int *c) const { + return is_valid() && (*c = get(1)) <= c_start && is_const_rot(*c); +} + +// PUSHCONST c ; POP s(i) == 0 1 .. i-1 -1000 i+1 ... +bool StackTransform::is_const_pop(int c, int i) const { + return is_valid() && !d && n == 1 && i > 0 && c <= c_start && get(i - 1) == c; +} + +bool StackTransform::is_const_pop(int *c, int *i) const { + if (is_valid() && !d && n == 1 && A[0].second <= c_start) { + *i = A[0].first + 1; + *c = A[0].second; + return is_const_pop(*c, *i); + } else { + return false; + } +} + +// PUSH i ; PUSHCONST c == c i 0 1 2 ... +bool StackTransform::is_push_const(int i, int c) const { + return is_valid() && d == -2 && c <= c_start && i >= 0 && is_trivial_after(2) && get(0) == c && get(1) == i; +} + +bool StackTransform::is_push_const(int *i, int *c) const { + return is_valid() && d == -2 && n == 2 && is_push_const(*i = get(1), *c = get(0)); +} + +void StackTransform::show(std::ostream &os, int mode) const { + if (!is_valid()) { + os << ""; + return; + } + int mi = 0, ma = 0; + if (n > 0 && A[0].first < d) { + mi = A[0].first - d; + } + if (n > 0) { + ma = std::max(ma, A[n - 1].first - d + 1); + } + ma = std::max(ma + 1, dp - d); + os << '{'; + if (dp == d) { + os << '|'; + } + for (int i = mi; i < ma; i++) { + os << get(i) << (i == -1 ? '?' : (i == dp - d - 1 ? '|' : ' ')); + } + os << get(ma) << "..}"; +} + +} // namespace tolk diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp new file mode 100644 index 000000000..abaeb0846 --- /dev/null +++ b/tolk/symtable.cpp @@ -0,0 +1,169 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "symtable.h" +#include "compiler-state.h" +#include +#include + +namespace tolk { + + +std::string Symbol::unknown_symbol_name(sym_idx_t i) { + if (!i) { + return "_"; + } else { + std::ostringstream os; + os << "SYM#" << i; + return os.str(); + } +} + +sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) { + unsigned long long h1 = 1, h2 = 1; + for (char c : str) { + h1 = ((h1 * 239) + (unsigned char)(c)) % SIZE_PRIME; + h2 = ((h2 * 17) + (unsigned char)(c)) % (SIZE_PRIME - 1); + } + ++h2; + ++h1; + while (true) { + if (sym[h1]) { + if (sym[h1]->str == str) { + return (mode & 2) ? not_found : sym_idx_t(h1); + } + h1 += h2; + if (h1 > SIZE_PRIME) { + h1 -= SIZE_PRIME; + } + } else { + if (!(mode & 1)) { + return not_found; + } + if (def_sym >= ((long long)SIZE_PRIME * 3) / 4) { + throw SymTableOverflow{def_sym}; + } + sym[h1] = std::make_unique(static_cast(str), idx <= 0 ? sym_idx_t(h1) : -idx); + ++def_sym; + return sym_idx_t(h1); + } + } +} + +std::string SymDef::name() const { + return G.symbols.get_name(sym_idx); +} + +void open_scope(SrcLocation loc) { + ++G.scope_level; + G.scope_opened_at.push_back(loc); +} + +void close_scope() { + if (!G.scope_level) { + throw Fatal{"cannot close the outer scope"}; + } + while (!G.symbol_stack.empty() && G.symbol_stack.back().first == G.scope_level) { + SymDef old_def = G.symbol_stack.back().second; + auto idx = old_def.sym_idx; + G.symbol_stack.pop_back(); + SymDef* cur_def = G.sym_def[idx]; + assert(cur_def); + assert(cur_def->level == G.scope_level && cur_def->sym_idx == idx); + //std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl; + if (cur_def->value) { + //std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl; + delete cur_def->value; + } + if (!old_def.level && !old_def.value) { + delete cur_def; // ??? keep the definition always? + G.sym_def[idx] = nullptr; + } else { + cur_def->value = old_def.value; + cur_def->level = old_def.level; + } + old_def.value = nullptr; + } + --G.scope_level; + G.scope_opened_at.pop_back(); +} + +SymDef* lookup_symbol(sym_idx_t idx) { + if (!idx) { + return nullptr; + } + if (G.sym_def[idx]) { + return G.sym_def[idx]; + } + if (G.global_sym_def[idx]) { + return G.global_sym_def[idx]; + } + return nullptr; +} + +SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc) { + if (SymDef* found = G.global_sym_def[name_idx]) { + return found; // found->value is filled; it means, that a symbol is redefined + } + + SymDef* registered = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc); +#ifdef TOLK_DEBUG + registered->sym_name = registered->name(); +#endif + return registered; // registered->value is nullptr; it means, it's just created +} + +SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc) { + // note, that parameters (defined at function declaration) are not inserted into symtable + // their SymDef is registered to be inserted into SymValFunc::parameters + // (and later ->value is filled with SymValVariable) + + SymDef* registered = new SymDef(0, name_idx, loc); +#ifdef TOLK_DEBUG + registered->sym_name = registered->name(); +#endif + return registered; +} + +SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { + if (!name_idx) { + return nullptr; + } + if (!G.scope_level) { + throw Fatal("unexpected scope_level = 0"); + } + auto found = G.sym_def[name_idx]; + if (found) { + if (found->level < G.scope_level) { + G.symbol_stack.emplace_back(G.scope_level, *found); + found->level = G.scope_level; + } else if (found->value && force_new) { + return nullptr; + } + found->value = nullptr; + found->loc = loc; + return found; + } + found = G.sym_def[name_idx] = new SymDef(G.scope_level, name_idx, loc); + G.symbol_stack.emplace_back(G.scope_level, SymDef{0, name_idx, loc}); +#ifdef TOLK_DEBUG + found->sym_name = found->name(); + G.symbol_stack.back().second.sym_name = found->name(); +#endif + return found; +} + +} // namespace tolk diff --git a/tolk/symtable.h b/tolk/symtable.h new file mode 100644 index 000000000..69e2eaa8e --- /dev/null +++ b/tolk/symtable.h @@ -0,0 +1,114 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" +#include "type-expr.h" +#include +#include + +namespace tolk { + +typedef int var_idx_t; +typedef int sym_idx_t; + +enum class SymValKind { _Var, _Func, _GlobVar, _Const }; + +struct SymValBase { + SymValKind kind; + int idx; + TypeExpr* sym_type; +#ifdef TOLK_DEBUG + std::string sym_name; // seeing symbol name in debugger makes it much easier to delve into Tolk sources +#endif + + SymValBase(SymValKind kind, int idx, TypeExpr* sym_type) : kind(kind), idx(idx), sym_type(sym_type) { + } + virtual ~SymValBase() = default; + + TypeExpr* get_type() const { + return sym_type; + } +}; + + +struct Symbol { + std::string str; + sym_idx_t idx; + + Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {} + + static std::string unknown_symbol_name(sym_idx_t i); +}; + +class SymTable { +public: + static constexpr int SIZE_PRIME = 100003; + +private: + sym_idx_t def_sym{0}; + std::unique_ptr sym[SIZE_PRIME + 1]; + sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0); + +public: + + static constexpr sym_idx_t not_found = 0; + sym_idx_t lookup(std::string_view str) { + return gen_lookup(str, 0); + } + sym_idx_t lookup_add(std::string_view str) { + return gen_lookup(str, 1); + } + Symbol* operator[](sym_idx_t i) const { + return sym[i].get(); + } + std::string get_name(sym_idx_t i) const { + return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i); + } +}; + +struct SymTableOverflow { + int sym_def; + explicit SymTableOverflow(int x) : sym_def(x) { + } +}; + + +struct SymDef { + int level; + sym_idx_t sym_idx; + SymValBase* value; + SrcLocation loc; +#ifdef TOLK_DEBUG + std::string sym_name; +#endif + SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr) + : level(lvl), sym_idx(idx), value(val), loc(_loc) { + } + std::string name() const; +}; + + +void open_scope(SrcLocation loc); +void close_scope(); +SymDef* lookup_symbol(sym_idx_t idx); + +SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc = {}); +SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc); +SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc); + +} // namespace tolk diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp new file mode 100644 index 000000000..7f939670b --- /dev/null +++ b/tolk/tolk-main.cpp @@ -0,0 +1,285 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "tolk-version.h" +#include "compiler-state.h" +#include "td/utils/port/path.h" +#include +#include +#include +#ifdef TD_DARWIN +#include +#elif TD_WINDOWS +#include +#else // linux +#include +#endif +#include "git.h" + +using namespace tolk; + +void usage(const char* progname) { + std::cerr + << "usage: " << progname << " [options] \n" + "\tGenerates Fift TVM assembler code from a .tolk file\n" + "-o\tWrites generated code into specified .fif file instead of stdout\n" + "-b\tGenerate Fift instructions to save TVM bytecode into .boc file\n" + "-O\tSets optimization level (2 by default)\n" + "-x\tEnables experimental options, comma-separated\n" + "-S\tDon't include stack layout comments into Fift output\n" + "-e\tIncreases verbosity level (extra output into stderr)\n" + "-v\tOutput version of Tolk and exit\n"; + std::exit(2); +} + +static bool stdlib_folder_exists(const char* stdlib_folder) { + struct stat f_stat; + int res = stat(stdlib_folder, &f_stat); + return res == 0 && (f_stat.st_mode & S_IFMT) == S_IFDIR; +} + +// getting current executable path is a complicated and not cross-platform task +// for instance, we can't just use argv[0] or even filesystem::canonical +// https://stackoverflow.com/questions/1023306/finding-current-executables-path-without-proc-self-exe/1024937 +static bool get_current_executable_filename(std::string& out) { +#ifdef TD_DARWIN + char name_buf[1024]; + unsigned int size = 1024; + if (0 == _NSGetExecutablePath(name_buf, &size)) { // may contain ../, so normalize it + char *exe_path = realpath(name_buf, nullptr); + if (exe_path != nullptr) { + out = exe_path; + return true; + } + } +#elif TD_WINDOWS + char exe_path[1024]; + if (GetModuleFileNameA(nullptr, exe_path, 1024)) { + out = exe_path; + std::replace(out.begin(), out.end(), '\\', '/'); // modern Windows correctly deals with / separator + return true; + } +#else // linux + char exe_path[1024]; + ssize_t res = readlink("/proc/self/exe", exe_path, 1024 - 1); + if (res >= 0) { + exe_path[res] = 0; + out = exe_path; + return true; + } +#endif + return false; +} + +// simple join "/some/folder/" (guaranteed to end with /) and "../relative/path" +static std::string join_path(std::string dir, const char* relative) { + while (relative[0] == '.' && relative[1] == '.' && relative[2] == '/') { + size_t slash_pos = dir.find_last_of('/', dir.size() - 2); // last symbol is slash, find before it + if (slash_pos != std::string::npos) { + dir = dir.substr(0, slash_pos + 1); + } + relative += 3; + } + + return dir + relative; +} + +static std::string auto_discover_stdlib_folder() { + // if the user launches tolk compiler from a package installed (e.g. /usr/bin/tolk), + // locate stdlib in /usr/share/ton/smartcont (this folder exists on package installation) + // (note, that paths are not absolute, they are relative to the launched binary) + // consider https://github.com/ton-blockchain/packages for actual paths + std::string executable_filename; + if (!get_current_executable_filename(executable_filename)) { + return {}; + } + + // extract dirname to concatenate with relative paths (separator / is ok even for windows) + size_t slash_pos = executable_filename.find_last_of('/'); + std::string executable_dir = executable_filename.substr(0, slash_pos + 1); + +#ifdef TD_DARWIN + std::string def_location = join_path(executable_dir, "../share/ton/ton/smartcont/tolk-stdlib"); +#elif TD_WINDOWS + std::string def_location = join_path(executable_dir, "smartcont/tolk-stdlib"); +#else // linux + std::string def_location = join_path(executable_dir, "../share/ton/smartcont/tolk-stdlib"); +#endif + + if (stdlib_folder_exists(def_location.c_str())) { + return def_location; + } + + // so, the binary is not from a system package + // maybe it's just built from sources? e.g. ~/ton/cmake-build-debug/tolk/tolk + // then, check the ~/ton/crypto/smartcont folder + std::string near_when_built_from_sources = join_path(executable_dir, "../../crypto/smartcont/tolk-stdlib"); + if (stdlib_folder_exists(near_when_built_from_sources.c_str())) { + return near_when_built_from_sources; + } + + // no idea of where to find stdlib; let's show an error for the user, he should provide env var above + return {}; +} + +td::Result fs_read_callback(CompilerSettings::FsReadCallbackKind kind, const char* query) { + switch (kind) { + case CompilerSettings::FsReadCallbackKind::Realpath: { + td::Result res_realpath; + if (query[0] == '@' && strlen(query) > 8 && !strncmp(query, "@stdlib/", 8)) { + // import "@stdlib/filename" or import "@stdlib/filename.tolk" + std::string path = G.settings.stdlib_folder + static_cast(query + 7); + if (strncmp(path.c_str() + path.size() - 5, ".tolk", 5) != 0) { + path += ".tolk"; + } + res_realpath = td::realpath(td::CSlice(path.c_str())); + } else { + // import "relative/to/cwd/path.tolk" + res_realpath = td::realpath(td::CSlice(query)); + } + + if (res_realpath.is_error()) { + // note, that for non-existing files, `realpath()` on Linux/Mac returns an error, + // whereas on Windows, it returns okay, but fails after, on reading, with a message "cannot open file" + return td::Status::Error(std::string{"cannot find file "} + query); + } + return res_realpath; + } + case CompilerSettings::FsReadCallbackKind::ReadFile: { + struct stat f_stat; + int res = stat(query, &f_stat); // query here is already resolved realpath + if (res != 0 || (f_stat.st_mode & S_IFMT) != S_IFREG) { + return td::Status::Error(std::string{"cannot open file "} + query); + } + + size_t file_size = static_cast(f_stat.st_size); + std::string str; + str.resize(file_size); + FILE* f = fopen(query, "rb"); + fread(str.data(), file_size, 1, f); + fclose(f); + return std::move(str); + } + default: { + return td::Status::Error("unknown query kind"); + } + } +} + +class StdCoutRedirectToFile { + std::unique_ptr output_file; + std::streambuf* backup_sbuf = nullptr; + +public: + explicit StdCoutRedirectToFile(const std::string& output_filename) { + if (!output_filename.empty()) { + output_file = std::make_unique(output_filename, std::fstream::trunc | std::fstream::out); + if (output_file->is_open()) { + backup_sbuf = std::cout.rdbuf(output_file->rdbuf()); + } + } + } + + ~StdCoutRedirectToFile() { + if (backup_sbuf) { + std::cout.rdbuf(backup_sbuf); + } + } + + bool is_failed() const { return output_file && !output_file->is_open(); } +}; + +int main(int argc, char* const argv[]) { + int i; + while ((i = getopt(argc, argv, "o:b:O:x:Sevh")) != -1) { + switch (i) { + case 'o': + G.settings.output_filename = optarg; + break; + case 'b': + G.settings.boc_output_filename = optarg; + break; + case 'O': + G.settings.optimization_level = std::max(0, atoi(optarg)); + break; + case 'x': + G.settings.parse_experimental_options_cmd_arg(optarg); + break; + case 'S': + G.settings.stack_layout_comments = false; + break; + case 'e': + G.settings.verbosity++; + break; + case 'v': + std::cout << "Tolk compiler v" << TOLK_VERSION << std::endl; + std::cout << "Build commit: " << GitMetadata::CommitSHA1() << std::endl; + std::cout << "Build date: " << GitMetadata::CommitDate() << std::endl; + std::exit(0); + case 'h': + default: + usage(argv[0]); + } + } + + StdCoutRedirectToFile redirect_cout(G.settings.output_filename); + if (redirect_cout.is_failed()) { + std::cerr << "Failed to create output file " << G.settings.output_filename << std::endl; + return 2; + } + + // locate tolk-stdlib/ based on env or default system paths + if (const char* env_var = getenv("TOLK_STDLIB")) { + std::string stdlib_filename = static_cast(env_var) + "/common.tolk"; + td::Result res = td::realpath(td::CSlice(stdlib_filename.c_str())); + if (res.is_error()) { + std::cerr << "Environment variable TOLK_STDLIB is invalid: " << res.move_as_error().message().c_str() << std::endl; + return 2; + } + G.settings.stdlib_folder = env_var; + } else { + G.settings.stdlib_folder = auto_discover_stdlib_folder(); + } + if (G.settings.stdlib_folder.empty()) { + std::cerr << "Failed to discover Tolk stdlib.\n" + "Probably, you have a non-standard Tolk installation.\n" + "Please, provide env variable TOLK_STDLIB referencing to tolk-stdlib/ folder.\n"; + return 2; + } + if (G.is_verbosity(2)) { + std::cerr << "stdlib folder: " << G.settings.stdlib_folder << std::endl; + } + + if (optind != argc - 1) { + std::cerr << "invalid usage: should specify exactly one input file.tolk" << std::endl; + return 2; + } + + G.settings.read_callback = fs_read_callback; + + int exit_code = tolk_proceed(argv[optind]); + return exit_code; +} diff --git a/tolk/tolk-version.h b/tolk/tolk-version.h new file mode 100644 index 000000000..6e5b764ca --- /dev/null +++ b/tolk/tolk-version.h @@ -0,0 +1,23 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +namespace tolk { + +constexpr const char* TOLK_VERSION = "0.6.0"; + +} // namespace tolk diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp new file mode 100644 index 000000000..e74589ce8 --- /dev/null +++ b/tolk/tolk-wasm.cpp @@ -0,0 +1,127 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "tolk-version.h" +#include "compiler-state.h" +#include "git.h" +#include "td/utils/JsonBuilder.h" +#include "fift/utils.h" +#include "td/utils/Status.h" +#include + +using namespace tolk; + +static td::Result compile_internal(char *config_json) { + TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) + td::JsonObject& config = input_json.get_object(); + + TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2)); + TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false)); + TRY_RESULT(entrypoint_filename, td::get_json_object_string_field(config, "entrypointFileName", false)); + TRY_RESULT(experimental_options, td::get_json_object_string_field(config, "experimentalOptions", true)); + + G.settings.verbosity = 0; + G.settings.optimization_level = std::max(0, opt_level); + G.settings.stack_layout_comments = stack_comments; + if (!experimental_options.empty()) { + G.settings.parse_experimental_options_cmd_arg(experimental_options.c_str()); + } + + std::ostringstream outs, errs; + std::cout.rdbuf(outs.rdbuf()); + std::cerr.rdbuf(errs.rdbuf()); + int exit_code = tolk_proceed(entrypoint_filename); + if (exit_code != 0) { + return td::Status::Error("Tolk compilation error: " + errs.str()); + } + + TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/")); + + td::JsonBuilder result_json; + auto obj = result_json.enter_object(); + obj("status", "ok"); + obj("fiftCode", fift_res.fiftCode); + obj("codeBoc64", fift_res.codeBoc64); + obj("codeHashHex", fift_res.codeHashHex); + obj("stderr", errs.str().c_str()); + obj.leave(); + + return result_json.string_builder().as_cslice().str(); +} + +/// Callback used to retrieve file contents from a "not file system". See tolk-js for implementation. +/// The callback must fill either destContents or destError. +/// The implementor must use malloc() for them and use free() after tolk_compile returns. +typedef void (*WasmFsReadCallback)(int kind, char const* data, char** destContents, char** destError); + +static CompilerSettings::FsReadCallback wrap_wasm_read_callback(WasmFsReadCallback _readCallback) { + return [_readCallback](CompilerSettings::FsReadCallbackKind kind, char const* data) -> td::Result { + char* destContents = nullptr; + char* destError = nullptr; + if (_readCallback) { + _readCallback(static_cast(kind), data, &destContents, &destError); + } + if (destContents) { + return destContents; + } + if (destError) { + return td::Status::Error(std::string(destError)); + } + return td::Status::Error("Invalid callback from wasm"); + }; +} + +extern "C" { + +const char* version() { + td::JsonBuilder version_json = td::JsonBuilder(); + auto obj = version_json.enter_object(); + obj("tolkVersion", TOLK_VERSION); + obj("tolkFiftLibCommitHash", GitMetadata::CommitSHA1()); + obj("tolkFiftLibCommitDate", GitMetadata::CommitDate()); + obj.leave(); + return strdup(version_json.string_builder().as_cslice().c_str()); +} + +const char *tolk_compile(char *config_json, WasmFsReadCallback callback) { + G.settings.read_callback = wrap_wasm_read_callback(callback); + + td::Result res = compile_internal(config_json); + + if (res.is_error()) { + td::JsonBuilder error_res = td::JsonBuilder(); + auto obj = error_res.enter_object(); + obj("status", "error"); + obj("message", res.move_as_error().message().str()); + obj.leave(); + return strdup(error_res.string_builder().as_cslice().c_str()); + } + + std::string res_string = res.move_as_ok(); + return strdup(res_string.c_str()); +} + +} // extern "C" diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp new file mode 100644 index 000000000..9268cc62d --- /dev/null +++ b/tolk/tolk.cpp @@ -0,0 +1,81 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "pipeline.h" +#include "compiler-state.h" +#include "lexer.h" +#include "ast.h" + +namespace tolk { + + +void on_assertion_failed(const char *description, const char *file_name, int line_number) { + std::string message = static_cast("Assertion failed at ") + file_name + ":" + std::to_string(line_number) + ": " + description; +#ifdef TOLK_DEBUG +#ifdef __arm64__ + // when developing, it's handy when the debugger stops on assertion failure (stacktraces and watches are available) + std::cerr << message << std::endl; + __builtin_debugtrap(); +#endif +#endif + throw Fatal(std::move(message)); +} + +int tolk_proceed(const std::string &entrypoint_filename) { + define_builtins(); + lexer_init(); + + // on any error, an exception is thrown, and the message is printed out below + // (currently, only a single error can be printed) + try { + AllSrcFiles all_files = pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); + + pipeline_register_global_symbols(all_files); + pipeline_convert_ast_to_legacy_Expr_Op(all_files); + + pipeline_find_unused_symbols(); + pipeline_generate_fif_output_to_std_cout(all_files); + + return 0; + } catch (Fatal& fatal) { + std::cerr << "fatal: " << fatal << std::endl; + return 2; + } catch (ParseError& error) { + std::cerr << error << std::endl; + return 2; + } catch (UnifyError& unif_err) { + std::cerr << "fatal: "; + unif_err.print_message(std::cerr); + std::cerr << std::endl; + return 2; + } catch (UnexpectedASTNodeType& error) { + std::cerr << "fatal: " << error.what() << std::endl; + std::cerr << "It's a compiler bug, please report to developers" << std::endl; + return 2; + } +} + +} // namespace tolk diff --git a/tolk/tolk.h b/tolk/tolk.h new file mode 100644 index 000000000..971ca35dd --- /dev/null +++ b/tolk/tolk.h @@ -0,0 +1,1445 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "platform-utils.h" +#include "src-file.h" +#include "type-expr.h" +#include "symtable.h" +#include "crypto/common/refint.h" +#include "td/utils/Status.h" +#include +#include +#include +#include + +#define tolk_assert(expr) if(UNLIKELY(!(expr))) on_assertion_failed(#expr, __FILE__, __LINE__); + +namespace tolk { + +GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN +void on_assertion_failed(const char *description, const char *file_name, int line_number); + +/* + * + * TYPE EXPRESSIONS + * + */ + +struct UnifyError : std::exception { + TypeExpr* te1; + TypeExpr* te2; + std::string msg; + + UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(std::move(_msg)) { + } + + void print_message(std::ostream& os) const; + const char* what() const noexcept override { + return msg.c_str(); + } +}; + +std::ostream& operator<<(std::ostream& os, const UnifyError& ue); + +void unify(TypeExpr*& te1, TypeExpr*& te2); + +/* + * + * ABSTRACT CODE + * + */ + +using const_idx_t = int; + +struct TmpVar { + TypeExpr* v_type; + var_idx_t idx; + sym_idx_t sym_idx; + int coord; + SrcLocation where; + std::vector> on_modification; + + TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc); + bool is_unnamed() const { return sym_idx == 0; } + + void show(std::ostream& os, int omit_idx = 0) const; + void dump(std::ostream& os) const; + void set_location(SrcLocation loc); +}; + +struct VarDescr { + var_idx_t idx; + enum { _Last = 1, _Unused = 2 }; + int flags; + enum { + _Const = 16, + _Int = 32, + _Zero = 64, + _NonZero = 128, + _Pos = 256, + _Neg = 512, + _Finite = 4096, + _Nan = 8192, + _Even = 16384, + _Odd = 32768, + }; + static constexpr int ConstZero = _Const | _Int | _Zero | _Pos | _Neg | _Finite | _Even; + static constexpr int ConstOne = _Const | _Int | _NonZero | _Pos | _Finite | _Odd; + static constexpr int ConstTrue = _Const | _Int | _NonZero | _Neg | _Finite | _Odd; + static constexpr int ValBit = _Int | _Pos | _Finite; + static constexpr int ValBool = _Int | _Neg | _Finite; + static constexpr int FiniteInt = _Int | _Finite; + static constexpr int FiniteUInt = _Int | _Finite | _Pos; + int val; + td::RefInt256 int_const; + std::string str_const; + + explicit VarDescr(var_idx_t _idx = -1, int _flags = 0, int _val = 0) : idx(_idx), flags(_flags), val(_val) { + } + bool operator<(var_idx_t other_idx) const { + return idx < other_idx; + } + bool is_unused() const { + return flags & _Unused; + } + bool is_last() const { + return flags & _Last; + } + bool always_true() const { + return val & _NonZero; + } + bool always_false() const { + return val & _Zero; + } + bool always_nonzero() const { + return val & _NonZero; + } + bool always_zero() const { + return val & _Zero; + } + bool always_even() const { + return val & _Even; + } + bool always_odd() const { + return val & _Odd; + } + bool is_int_const() const { + return (val & (_Int | _Const)) == (_Int | _Const) && int_const.not_null(); + } + bool always_nonpos() const { + return val & _Neg; + } + bool always_nonneg() const { + return val & _Pos; + } + bool always_pos() const { + return (val & (_Pos | _NonZero)) == (_Pos | _NonZero); + } + bool always_neg() const { + return (val & (_Neg | _NonZero)) == (_Neg | _NonZero); + } + bool always_finite() const { + return val & _Finite; + } + bool always_less(const VarDescr& other) const; + bool always_leq(const VarDescr& other) const; + bool always_greater(const VarDescr& other) const; + bool always_geq(const VarDescr& other) const; + bool always_equal(const VarDescr& other) const; + bool always_neq(const VarDescr& other) const; + void unused() { + flags |= _Unused; + } + void clear_unused() { + flags &= ~_Unused; + } + void set_const(long long value); + void set_const(td::RefInt256 value); + void set_const(std::string value); + void set_const_nan(); + void operator+=(const VarDescr& y) { + flags &= y.flags; + } + void operator|=(const VarDescr& y); + void operator&=(const VarDescr& y); + void set_value(const VarDescr& y); + void set_value(VarDescr&& y); + void set_value(const VarDescr* y) { + if (y) { + set_value(*y); + } + } + void clear_value(); + void show_value(std::ostream& os) const; + void show(std::ostream& os, const char* var_name = nullptr) const; +}; + +inline std::ostream& operator<<(std::ostream& os, const VarDescr& vd) { + vd.show(os); + return os; +} + +struct VarDescrList { + std::vector list; + bool unreachable{false}; + VarDescrList() : list() { + } + VarDescrList(const std::vector& _list) : list(_list) { + } + VarDescrList(std::vector&& _list) : list(std::move(_list)) { + } + std::size_t size() const { + return list.size(); + } + VarDescr* operator[](var_idx_t idx); + const VarDescr* operator[](var_idx_t idx) const; + VarDescrList operator+(const VarDescrList& y) const; + VarDescrList& operator+=(const VarDescrList& y); + VarDescrList& clear_last(); + VarDescrList& operator+=(var_idx_t idx) { + return add_var(idx); + } + VarDescrList& operator+=(const std::vector& idx_list) { + return add_vars(idx_list); + } + VarDescrList& add_var(var_idx_t idx, bool unused = false); + VarDescrList& add_vars(const std::vector& idx_list, bool unused = false); + VarDescrList& operator-=(const std::vector& idx_list); + VarDescrList& operator-=(var_idx_t idx); + std::size_t count(const std::vector idx_list) const; + std::size_t count_used(const std::vector idx_list) const; + VarDescr& add(var_idx_t idx); + VarDescr& add_newval(var_idx_t idx); + VarDescrList& operator&=(const VarDescrList& values); + VarDescrList& import_values(const VarDescrList& values); + VarDescrList operator|(const VarDescrList& y) const; + VarDescrList& operator|=(const VarDescrList& values); + void show(std::ostream& os) const; + void set_unreachable() { + list.clear(); + unreachable = true; + } +}; + +inline std::ostream& operator<<(std::ostream& os, const VarDescrList& values) { + values.show(os); + return os; +} + +struct CodeBlob; + +template +class ListIterator { + T* ptr; + + public: + ListIterator() : ptr(nullptr) { + } + explicit ListIterator(T* _ptr) : ptr(_ptr) { + } + ListIterator& operator++() { + ptr = ptr->next.get(); + return *this; + } + ListIterator operator++(int) { + T* z = ptr; + ptr = ptr->next.get(); + return ListIterator{z}; + } + T& operator*() const { + return *ptr; + } + T* operator->() const { + return ptr; + } + bool operator==(const ListIterator& y) const { + return ptr == y.ptr; + } + bool operator!=(const ListIterator& y) const { + return ptr != y.ptr; + } +}; + +struct Stack; + +struct Op { + enum OpKind { + _Undef, + _Nop, + _Call, + _CallInd, + _Let, + _IntConst, + _GlobVar, + _SetGlob, + _Import, + _Return, + _Tuple, + _UnTuple, + _If, + _While, + _Until, + _Repeat, + _Again, + _TryCatch, + _SliceConst, + }; + OpKind cl; + enum { _Disabled = 1, _NoReturn = 4, _Impure = 24 }; + int flags; + std::unique_ptr next; + SymDef* fun_ref; // despite its name, it may actually ref global var; applicable not only to Op::_Call, but for other kinds also + SrcLocation where; + VarDescrList var_info; + std::vector args; + std::vector left, right; + std::unique_ptr block0, block1; + td::RefInt256 int_const; + std::string str_const; + Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) { + } + Op(SrcLocation _where, OpKind _cl, std::vector&& _left) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, td::RefInt256 _const) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, std::string _const) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, + SymDef* _fun = nullptr) + : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) { + } + Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, + SymDef* _fun = nullptr) + : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { + } + + bool disabled() const { return flags & _Disabled; } + void set_disabled() { flags |= _Disabled; } + void set_disabled(bool flag); + + bool noreturn() const { return flags & _NoReturn; } + bool set_noreturn() { flags |= _NoReturn; return true; } + bool set_noreturn(bool flag); + + bool impure() const { return flags & _Impure; } + void set_impure(const CodeBlob &code); + void set_impure(const CodeBlob &code, bool flag); + + void show(std::ostream& os, const std::vector& vars, std::string pfx = "", int mode = 0) const; + void show_var_list(std::ostream& os, const std::vector& idx_list, const std::vector& vars) const; + void show_var_list(std::ostream& os, const std::vector& list, const std::vector& vars) const; + static void show_block(std::ostream& os, const Op* block, const std::vector& vars, std::string pfx = "", + int mode = 0); + void split_vars(const std::vector& vars); + static void split_var_list(std::vector& var_list, const std::vector& vars); + bool compute_used_vars(const CodeBlob& code, bool edit); + bool std_compute_used_vars(bool disabled = false); + bool set_var_info(const VarDescrList& new_var_info); + bool set_var_info(VarDescrList&& new_var_info); + bool set_var_info_except(const VarDescrList& new_var_info, const std::vector& var_list); + bool set_var_info_except(VarDescrList&& new_var_info, const std::vector& var_list); + void prepare_args(VarDescrList values); + VarDescrList fwd_analyze(VarDescrList values); + bool mark_noreturn(); + bool is_empty() const { + return cl == _Nop && !next; + } + bool generate_code_step(Stack& stack); + void generate_code_all(Stack& stack); + Op& last() { + return next ? next->last() : *this; + } + const Op& last() const { + return next ? next->last() : *this; + } +}; + +inline ListIterator begin(const std::unique_ptr& op_list) { + return ListIterator{op_list.get()}; +} + +inline ListIterator end(const std::unique_ptr& op_list) { + return ListIterator{}; +} + +inline ListIterator begin(const Op* op_list) { + return ListIterator{op_list}; +} + +inline ListIterator end(const Op* op_list) { + return ListIterator{}; +} + +typedef std::tuple FormalArg; +typedef std::vector FormalArgList; + +struct AsmOpList; + +/* + * + * SYMBOL VALUES + * + */ + +struct SymValVariable : SymValBase { + enum SymValFlag { + flagMutateParameter = 1, // parameter was declared with `mutate` keyword + flagImmutable = 2, // variable was declared via `val` (not `var`) + }; + int flags{0}; + + ~SymValVariable() override = default; + SymValVariable(int val, TypeExpr* sym_type) + : SymValBase(SymValKind::_Var, val, sym_type) {} + + bool is_function_parameter() const { + return idx >= 0; + } + bool is_mutate_parameter() const { + return flags & flagMutateParameter; + } + bool is_local_var() const { + return idx == -1; + } + bool is_immutable() const { + return flags & flagImmutable; + } +}; + +struct SymValFunc : SymValBase { + enum SymValFlag { + flagInline = 1, // marked `@inline` + flagInlineRef = 2, // marked `@inline_ref` + flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) + flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out + flagBuiltinFunction = 32, // was created via `define_builtin_func()`, not from source code + flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned + flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc. + flagHasMutateParams = 256, // has parameters declared as `mutate` + flagAcceptsSelf = 512, // is a member function (has `self` first parameter) + flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable + }; + + td::RefInt256 method_id; // todo why int256? it's small + int flags{0}; + std::vector parameters; // [i]-th may be nullptr for underscore; if not, its val is SymValVariable + std::vector arg_order, ret_order; + + ~SymValFunc() override = default; + SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags) + : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)) { + } + SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags, std::initializer_list arg_order, std::initializer_list ret_order) + : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)), arg_order(arg_order), ret_order(ret_order) { + } + + const std::vector* get_arg_order() const { + return arg_order.empty() ? nullptr : &arg_order; + } + const std::vector* get_ret_order() const { + return ret_order.empty() ? nullptr : &ret_order; + } + + bool is_inline() const { + return flags & flagInline; + } + bool is_inline_ref() const { + return flags & flagInlineRef; + } + bool is_marked_as_pure() const { + return flags & flagMarkedAsPure; + } + bool is_builtin() const { + return flags & flagBuiltinFunction; + } + bool is_get_method() const { + return flags & flagGetMethod; + } + bool is_entrypoint() const { + return flags & flagIsEntrypoint; + } + bool has_mutate_params() const { + return flags & flagHasMutateParams; + } + bool does_accept_self() const { + return flags & flagAcceptsSelf; + } + bool does_return_self() const { + return flags & flagReturnsSelf; + } +}; + +struct SymValCodeFunc : SymValFunc { + CodeBlob* code; + bool is_really_used{false}; // calculated via dfs; unused functions are not codegenerated + ~SymValCodeFunc() override = default; + SymValCodeFunc(std::vector parameters, int val, TypeExpr* _ft) + : SymValFunc(std::move(parameters), val, _ft, 0), code(nullptr) { + } + bool does_need_codegen() const; + void set_code(CodeBlob* code); +}; + +struct SymValGlobVar : SymValBase { + bool is_really_used{false}; // calculated via dfs from used functions; unused globals are not codegenerated + + SymValGlobVar(int val, TypeExpr* gvtype) + : SymValBase(SymValKind::_GlobVar, val, gvtype) { + } + ~SymValGlobVar() override = default; +}; + +struct SymValConst : SymValBase { + enum ConstKind { IntConst, SliceConst }; + + td::RefInt256 intval; + std::string strval; + ConstKind kind; + + SymValConst(int idx, td::RefInt256 value) + : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Int)), intval(std::move(value)), kind(IntConst) { + } + SymValConst(int idx, std::string value) + : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Slice)), strval(std::move(value)), kind(SliceConst) { + } + ~SymValConst() override = default; + td::RefInt256 get_int_value() const { + return intval; + } + std::string get_str_value() const { + return strval; + } + ConstKind get_kind() const { + return kind; + } +}; + + +/* + * + * EXPRESSIONS + * + */ + +struct Expr { + enum ExprCls { + _Apply, + _VarApply, + _GrabMutatedVars, + _ReturnSelf, + _MkTuple, + _Tensor, + _Const, + _Var, + _GlobFunc, + _GlobVar, + _Letop, + _Hole, + _CondExpr, + _SliceConst, + }; + ExprCls cls; + int val{0}; + enum { _IsRvalue = 2, _IsLvalue = 4, _IsImmutable = 8, _IsImpure = 32 }; + int flags{0}; + SrcLocation here; + td::RefInt256 intval; + std::string strval; + SymDef* sym{nullptr}; + TypeExpr* e_type{nullptr}; + std::vector args; + Expr(ExprCls c, SrcLocation loc) : cls(c), here(loc) { + } + Expr(ExprCls c, std::vector _args) : cls(c), args(std::move(_args)) { + } + Expr(ExprCls c, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { + } + Expr(ExprCls c, SymDef* _sym, std::initializer_list _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { + } + Expr(ExprCls c, SymDef* _sym, std::vector _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { + } + Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist); + ~Expr() { + for (auto& arg_ptr : args) { + delete arg_ptr; + } + } + Expr* copy() const; + void pb_arg(Expr* expr) { + args.push_back(expr); + } + void set_val(int _val) { + val = _val; + } + bool is_rvalue() const { + return flags & _IsRvalue; + } + bool is_lvalue() const { + return flags & _IsLvalue; + } + bool is_immutable() const { + return flags & _IsImmutable; + } + bool is_mktuple() const { + return cls == _MkTuple; + } + void chk_rvalue() const { + if (!is_rvalue()) { + fire_error_rvalue_expected(); + } + } + void deduce_type(); + void set_location(SrcLocation loc) { + here = loc; + } + SrcLocation get_location() const { + return here; + } + void define_new_vars(CodeBlob& code); + void predefine_vars(); + std::vector pre_compile(CodeBlob& code, std::vector>* lval_globs = nullptr) const; + var_idx_t new_tmp(CodeBlob& code) const; + std::vector new_tmp_vect(CodeBlob& code) const { + return {new_tmp(code)}; + } + + GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN + void fire_error_rvalue_expected() const; + GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN + void fire_error_lvalue_expected(const std::string& details) const; + GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN + void fire_error_modifying_immutable(const std::string& details) const; +}; + +/* + * + * GENERATE CODE + * + */ + +typedef std::vector StackLayout; +typedef std::pair var_const_idx_t; +typedef std::vector StackLayoutExt; +constexpr const_idx_t not_const = -1; +using Const = td::RefInt256; + +struct AsmOp { + enum Type { a_none, a_xchg, a_push, a_pop, a_const, a_custom, a_magic }; + Type t{a_none}; + int indent{0}; + int a, b; + bool gconst{false}; + std::string op; + td::RefInt256 origin; + struct SReg { + int idx; + SReg(int _idx) : idx(_idx) { + } + }; + AsmOp() = default; + AsmOp(Type _t) : t(_t) { + } + AsmOp(Type _t, std::string _op) : t(_t), op(std::move(_op)) { + } + AsmOp(Type _t, int _a) : t(_t), a(_a) { + } + AsmOp(Type _t, int _a, std::string _op) : t(_t), a(_a), op(std::move(_op)) { + } + AsmOp(Type _t, int _a, int _b) : t(_t), a(_a), b(_b) { + } + AsmOp(Type _t, int _a, int _b, std::string _op) : t(_t), a(_a), b(_b), op(std::move(_op)) { + compute_gconst(); + } + AsmOp(Type _t, int _a, int _b, std::string _op, td::RefInt256 x) : t(_t), a(_a), b(_b), op(std::move(_op)), origin(x) { + compute_gconst(); + } + void out(std::ostream& os) const; + void out_indent_nl(std::ostream& os, bool no_nl = false) const; + std::string to_string() const; + void compute_gconst() { + gconst = (is_custom() && (op == "PUSHNULL" || op == "NEWC" || op == "NEWB" || op == "TRUE" || op == "FALSE" || op == "NOW")); + } + bool is_nop() const { + return t == a_none && op.empty(); + } + bool is_comment() const { + return t == a_none && !op.empty(); + } + bool is_custom() const { + return t == a_custom; + } + bool is_very_custom() const { + return is_custom() && a >= 255; + } + bool is_push() const { + return t == a_push; + } + bool is_push(int x) const { + return is_push() && a == x; + } + bool is_push(int* x) const { + *x = a; + return is_push(); + } + bool is_pop() const { + return t == a_pop; + } + bool is_pop(int x) const { + return is_pop() && a == x; + } + bool is_xchg() const { + return t == a_xchg; + } + bool is_xchg(int x, int y) const { + return is_xchg() && b == y && a == x; + } + bool is_xchg(int* x, int* y) const { + *x = a; + *y = b; + return is_xchg(); + } + bool is_xchg_short() const { + return is_xchg() && (a <= 1 || b <= 1); + } + bool is_swap() const { + return is_xchg(0, 1); + } + bool is_const() const { + return t == a_const && !a && b == 1; + } + bool is_gconst() const { + return !a && b == 1 && (t == a_const || gconst); + } + static AsmOp Nop() { + return AsmOp(a_none); + } + static AsmOp Xchg(int a, int b = 0) { + return a == b ? AsmOp(a_none) : (a < b ? AsmOp(a_xchg, a, b) : AsmOp(a_xchg, b, a)); + } + static AsmOp Push(int a) { + return AsmOp(a_push, a); + } + static AsmOp Pop(int a = 0) { + return AsmOp(a_pop, a); + } + static AsmOp Xchg2(int a, int b) { + return make_stk2(a, b, "XCHG2", 0); + } + static AsmOp XcPu(int a, int b) { + return make_stk2(a, b, "XCPU", 1); + } + static AsmOp PuXc(int a, int b) { + return make_stk2(a, b, "PUXC", 1); + } + static AsmOp Push2(int a, int b) { + return make_stk2(a, b, "PUSH2", 2); + } + static AsmOp Xchg3(int a, int b, int c) { + return make_stk3(a, b, c, "XCHG3", 0); + } + static AsmOp Xc2Pu(int a, int b, int c) { + return make_stk3(a, b, c, "XC2PU", 1); + } + static AsmOp XcPuXc(int a, int b, int c) { + return make_stk3(a, b, c, "XCPUXC", 1); + } + static AsmOp XcPu2(int a, int b, int c) { + return make_stk3(a, b, c, "XCPU2", 3); + } + static AsmOp PuXc2(int a, int b, int c) { + return make_stk3(a, b, c, "PUXC2", 3); + } + static AsmOp PuXcPu(int a, int b, int c) { + return make_stk3(a, b, c, "PUXCPU", 3); + } + static AsmOp Pu2Xc(int a, int b, int c) { + return make_stk3(a, b, c, "PU2XC", 3); + } + static AsmOp Push3(int a, int b, int c) { + return make_stk3(a, b, c, "PUSH3", 3); + } + static AsmOp BlkSwap(int a, int b); + static AsmOp BlkPush(int a, int b); + static AsmOp BlkDrop(int a); + static AsmOp BlkDrop2(int a, int b); + static AsmOp BlkReverse(int a, int b); + static AsmOp make_stk2(int a, int b, const char* str, int delta); + static AsmOp make_stk3(int a, int b, int c, const char* str, int delta); + static AsmOp IntConst(td::RefInt256 value); + static AsmOp BoolConst(bool f); + static AsmOp Const(std::string push_op, td::RefInt256 origin = {}) { + return AsmOp(a_const, 0, 1, std::move(push_op), origin); + } + static AsmOp Const(int arg, std::string push_op, td::RefInt256 origin = {}); + static AsmOp Comment(std::string comment) { + return AsmOp(a_none, std::string{"// "} + comment); + } + static AsmOp Custom(std::string custom_op) { + return AsmOp(a_custom, 255, 255, custom_op); + } + static AsmOp Parse(std::string custom_op); + static AsmOp Custom(std::string custom_op, int args, int retv = 1) { + return AsmOp(a_custom, args, retv, custom_op); + } + static AsmOp Parse(std::string custom_op, int args, int retv = 1); + static AsmOp Tuple(int a); + static AsmOp UnTuple(int a); +}; + +inline std::ostream& operator<<(std::ostream& os, const AsmOp& op) { + op.out(os); + return os; +} + +std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg); + +struct AsmOpList { + std::vector list_; + int indent_{0}; + const std::vector* var_names_{nullptr}; + std::vector constants_; + bool retalt_{false}; + void out(std::ostream& os, int mode = 0) const; + AsmOpList(int indent = 0, const std::vector* var_names = nullptr) : indent_(indent), var_names_(var_names) { + } + template + AsmOpList& add(Args&&... args) { + append(AsmOp(std::forward(args)...)); + adjust_last(); + return *this; + } + bool append(const AsmOp& op) { + list_.push_back(op); + adjust_last(); + return true; + } + bool append(const std::vector& ops); + bool append(std::initializer_list ops) { + return append(std::vector(std::move(ops))); + } + AsmOpList& operator<<(const AsmOp& op) { + return add(op); + } + AsmOpList& operator<<(AsmOp&& op) { + return add(std::move(op)); + } + AsmOpList& operator<<(std::string str) { + return add(AsmOp::Type::a_custom, 255, 255, str); + } + const_idx_t register_const(Const new_const); + Const get_const(const_idx_t idx); + void show_var(std::ostream& os, var_idx_t idx) const; + void show_var_ext(std::ostream& os, std::pair idx_pair) const; + void adjust_last() { + if (list_.back().is_nop()) { + list_.pop_back(); + } else { + list_.back().indent = indent_; + } + } + void indent() { + ++indent_; + } + void undent() { + --indent_; + } + void set_indent(int new_indent) { + indent_ = new_indent; + } + void insert(size_t pos, std::string str) { + insert(pos, AsmOp(AsmOp::a_custom, 255, 255, str)); + } + void insert(size_t pos, const AsmOp& op) { + auto ip = list_.begin() + pos; + ip = list_.insert(ip, op); + ip->indent = (ip == list_.begin()) ? indent_ : (ip - 1)->indent; + } + void indent_all() { + for (auto &op : list_) { + ++op.indent; + } + } +}; + +inline std::ostream& operator<<(std::ostream& os, const AsmOpList& op_list) { + op_list.out(os); + return os; +} + +class IndentGuard { + AsmOpList& aol_; + + public: + IndentGuard(AsmOpList& aol) : aol_(aol) { + aol.indent(); + } + ~IndentGuard() { + aol_.undent(); + } +}; + +struct AsmOpCons { + std::unique_ptr car; + std::unique_ptr cdr; + AsmOpCons(std::unique_ptr head, std::unique_ptr tail) : car(std::move(head)), cdr(std::move(tail)) { + } + static std::unique_ptr cons(std::unique_ptr head, std::unique_ptr tail) { + return std::make_unique(std::move(head), std::move(tail)); + } +}; + +using AsmOpConsList = std::unique_ptr; + +int is_pos_pow2(td::RefInt256 x); +int is_neg_pow2(td::RefInt256 x); + +/* + * + * STACK TRANSFORMS + * + */ + +/* +A stack transform is a map f:N={0,1,...} -> N, such that f(x) = x + d_f for almost all x:N and for a fixed d_f:N. +They form a monoid under composition: (fg)(x)=f(g(x)). +They act on stacks S on the right: Sf=S', such that S'[n]=S[f(n)]. + +A stack transform f is determined by d_f and the finite set A of all pairs (x,y), such that x>=d_f, f(x-d_f) = y and y<>x. They are listed in increasing order by x. +*/ +struct StackTransform { + enum { max_n = 16, inf_x = 0x7fffffff, c_start = -1000 }; + int d{0}, n{0}, dp{0}, c{0}; + bool invalid{false}; + std::array, max_n> A; + StackTransform() = default; + // list of f(0),f(1),...,f(s); assumes next values are f(s)+1,f(s)+2,... + StackTransform(std::initializer_list list); + StackTransform& operator=(std::initializer_list list); + bool assign(const StackTransform& other); + static StackTransform id() { + return {}; + } + bool invalidate() { + invalid = true; + return false; + } + bool is_valid() const { + return !invalid; + } + bool set_id() { + d = n = dp = c = 0; + invalid = false; + return true; + } + bool shift(int offs) { // post-composes with x -> x + offs + d += offs; + return offs <= 0 || remove_negative(); + } + bool remove_negative(); + bool touch(int i) { + dp = std::max(dp, i + d + 1); + return true; + } + bool is_permutation() const; // is f:N->N bijective ? + bool is_trivial_after(int x) const; // f(x') = x' + d for all x' >= x + int preimage_count(int y) const; // card f^{-1}(y) + std::vector preimage(int y) const; + bool apply_xchg(int i, int j, bool relaxed = false); + bool apply_push(int i); + bool apply_pop(int i = 0); + bool apply_push_newconst(); + bool apply_blkpop(int k); + bool apply(const StackTransform& other); // this = this * other + bool preapply(const StackTransform& other); // this = other * this + // c := a * b + static bool compose(const StackTransform& a, const StackTransform& b, StackTransform& c); + StackTransform& operator*=(const StackTransform& other); + StackTransform operator*(const StackTransform& b) const &; + bool equal(const StackTransform& other, bool relaxed = false) const; + bool almost_equal(const StackTransform& other) const { + return equal(other, true); + } + bool operator==(const StackTransform& other) const { + return dp == other.dp && almost_equal(other); + } + bool operator<=(const StackTransform& other) const { + return dp <= other.dp && almost_equal(other); + } + bool operator>=(const StackTransform& other) const { + return dp >= other.dp && almost_equal(other); + } + int get(int i) const; + int touch_get(int i, bool relaxed = false) { + if (!relaxed) { + touch(i); + } + return get(i); + } + bool set(int i, int v, bool relaxed = false); + int operator()(int i) const { + return get(i); + } + class Pos { + StackTransform& t_; + int p_; + + public: + Pos(StackTransform& t, int p) : t_(t), p_(p) { + } + Pos& operator=(const Pos& other) = delete; + operator int() const { + return t_.get(p_); + } + const Pos& operator=(int v) const { + t_.set(p_, v); + return *this; + } + }; + Pos operator[](int i) { + return Pos(*this, i); + } + static const StackTransform rot; + static const StackTransform rot_rev; + bool is_id() const { + return is_valid() && !d && !n; + } + bool is_xchg(int i, int j) const; + bool is_xchg(int* i, int* j) const; + bool is_xchg_xchg(int i, int j, int k, int l) const; + bool is_xchg_xchg(int* i, int* j, int* k, int* l) const; + bool is_push(int i) const; + bool is_push(int* i) const; + bool is_pop(int i) const; + bool is_pop(int* i) const; + bool is_pop_pop(int i, int j) const; + bool is_pop_pop(int* i, int* j) const; + bool is_rot() const; + bool is_rotrev() const; + bool is_push_rot(int i) const; + bool is_push_rot(int* i) const; + bool is_push_rotrev(int i) const; + bool is_push_rotrev(int* i) const; + bool is_push_xchg(int i, int j, int k) const; + bool is_push_xchg(int* i, int* j, int* k) const; + bool is_xchg2(int i, int j) const; + bool is_xchg2(int* i, int* j) const; + bool is_xcpu(int i, int j) const; + bool is_xcpu(int* i, int* j) const; + bool is_puxc(int i, int j) const; + bool is_puxc(int* i, int* j) const; + bool is_push2(int i, int j) const; + bool is_push2(int* i, int* j) const; + bool is_xchg3(int* i, int* j, int* k) const; + bool is_xc2pu(int* i, int* j, int* k) const; + bool is_xcpuxc(int* i, int* j, int* k) const; + bool is_xcpu2(int* i, int* j, int* k) const; + bool is_puxc2(int i, int j, int k) const; + bool is_puxc2(int* i, int* j, int* k) const; + bool is_puxcpu(int* i, int* j, int* k) const; + bool is_pu2xc(int i, int j, int k) const; + bool is_pu2xc(int* i, int* j, int* k) const; + bool is_push3(int i, int j, int k) const; + bool is_push3(int* i, int* j, int* k) const; + bool is_blkswap(int i, int j) const; + bool is_blkswap(int* i, int* j) const; + bool is_blkpush(int i, int j) const; + bool is_blkpush(int* i, int* j) const; + bool is_blkdrop(int* i) const; + bool is_blkdrop2(int i, int j) const; + bool is_blkdrop2(int* i, int* j) const; + bool is_reverse(int i, int j) const; + bool is_reverse(int* i, int* j) const; + bool is_nip_seq(int i, int j = 0) const; + bool is_nip_seq(int* i) const; + bool is_nip_seq(int* i, int* j) const; + bool is_pop_blkdrop(int i, int k) const; + bool is_pop_blkdrop(int* i, int* k) const; + bool is_2pop_blkdrop(int i, int j, int k) const; + bool is_2pop_blkdrop(int* i, int* j, int* k) const; + bool is_const_rot(int c) const; + bool is_const_rot(int* c) const; + bool is_const_pop(int c, int i) const; + bool is_const_pop(int* c, int* i) const; + bool is_push_const(int i, int c) const; + bool is_push_const(int* i, int* c) const; + + void show(std::ostream& os, int mode = 0) const; + + static StackTransform Xchg(int i, int j, bool relaxed = false); + static StackTransform Push(int i); + static StackTransform Pop(int i); + + private: + int try_load(int& i, int offs = 0) const; // returns A[i++].first + offs or inf_x + bool try_store(int x, int y); // appends (x,y) to A +}; + +inline std::ostream& operator<<(std::ostream& os, const StackTransform& trans) { + trans.show(os); + return os; +} + +bool apply_op(StackTransform& trans, const AsmOp& op); + +/* + * + * STACK OPERATION OPTIMIZER + * + */ + +struct Optimizer { + static constexpr int optimize_depth = 20; + AsmOpConsList code_; + int l_{0}, l2_{0}, p_, pb_, q_, indent_; + bool debug_{false}; + std::unique_ptr op_[optimize_depth], oq_[optimize_depth]; + AsmOpCons* op_cons_[optimize_depth]; + int offs_[optimize_depth]; + StackTransform tr_[optimize_depth]; + int mode_{0}; + Optimizer() { + } + Optimizer(bool debug, int mode = 0) : debug_(debug), mode_(mode) { + } + Optimizer(AsmOpConsList code, bool debug = false, int mode = 0) : Optimizer(debug, mode) { + set_code(std::move(code)); + } + void set_code(AsmOpConsList code_); + void unpack(); + void pack(); + void apply(); + bool find_at_least(int pb); + bool find(); + bool optimize(); + bool compute_stack_transforms(); + bool say(std::string str) const; + bool show_stack_transforms() const; + void show_head() const; + void show_left() const; + void show_right() const; + bool find_const_op(int* op_idx, int cst); + bool is_push_const(int* i, int* c) const; + bool rewrite_push_const(int i, int c); + bool is_const_push_xchgs(); + bool rewrite_const_push_xchgs(); + bool is_const_rot(int* c) const; + bool rewrite_const_rot(int c); + bool is_const_pop(int* c, int* i) const; + bool rewrite_const_pop(int c, int i); + bool rewrite(int p, AsmOp&& new_op); + bool rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2); + bool rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3); + bool rewrite(AsmOp&& new_op) { + return rewrite(p_, std::move(new_op)); + } + bool rewrite(AsmOp&& new_op1, AsmOp&& new_op2) { + return rewrite(p_, std::move(new_op1), std::move(new_op2)); + } + bool rewrite(AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3) { + return rewrite(p_, std::move(new_op1), std::move(new_op2), std::move(new_op3)); + } + bool rewrite_nop(); + bool is_pred(const std::function& pred, int min_p = 2); + bool is_same_as(const StackTransform& trans, int min_p = 2); + bool is_rot(); + bool is_rotrev(); + bool is_tuck(); + bool is_2dup(); + bool is_2drop(); + bool is_2swap(); + bool is_2over(); + bool is_xchg(int* i, int* j); + bool is_xchg_xchg(int* i, int* j, int* k, int* l); + bool is_push(int* i); + bool is_pop(int* i); + bool is_pop_pop(int* i, int* j); + bool is_nop(); + bool is_push_rot(int* i); + bool is_push_rotrev(int* i); + bool is_push_xchg(int* i, int* j, int* k); + bool is_xchg2(int* i, int* j); + bool is_xcpu(int* i, int* j); + bool is_puxc(int* i, int* j); + bool is_push2(int* i, int* j); + bool is_xchg3(int* i, int* j, int* k); + bool is_xc2pu(int* i, int* j, int* k); + bool is_xcpuxc(int* i, int* j, int* k); + bool is_xcpu2(int* i, int* j, int* k); + bool is_puxc2(int* i, int* j, int* k); + bool is_puxcpu(int* i, int* j, int* k); + bool is_pu2xc(int* i, int* j, int* k); + bool is_push3(int* i, int* j, int* k); + bool is_blkswap(int* i, int* j); + bool is_blkpush(int* i, int* j); + bool is_blkdrop(int* i); + bool is_blkdrop2(int* i, int* j); + bool is_reverse(int* i, int* j); + bool is_nip_seq(int* i, int* j); + bool is_pop_blkdrop(int* i, int* k); + bool is_2pop_blkdrop(int* i, int* j, int* k); + AsmOpConsList extract_code(); +}; + +AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode = 0); +AsmOpConsList optimize_code(AsmOpConsList op_list, int mode); +void optimize_code(AsmOpList& ops); + +struct Stack { + StackLayoutExt s; + AsmOpList& o; + enum { + _StkCmt = 1, _CptStkCmt = 2, _DisableOut = 128, _Shown = 256, + _InlineFunc = 512, _NeedRetAlt = 1024, _InlineAny = 2048, + _ModeSave = _InlineFunc | _NeedRetAlt | _InlineAny, + _Garbage = -0x10000 + }; + int mode; + Stack(AsmOpList& _o, int _mode = 0) : o(_o), mode(_mode) { + } + Stack(AsmOpList& _o, const StackLayoutExt& _s, int _mode = 0) : s(_s), o(_o), mode(_mode) { + } + Stack(AsmOpList& _o, StackLayoutExt&& _s, int _mode = 0) : s(std::move(_s)), o(_o), mode(_mode) { + } + int depth() const { + return (int)s.size(); + } + var_idx_t operator[](int i) const { + validate(i); + return s[depth() - i - 1].first; + } + var_const_idx_t& at(int i) { + validate(i); + return s[depth() - i - 1]; + } + var_const_idx_t at(int i) const { + validate(i); + return s[depth() - i - 1]; + } + var_const_idx_t get(int i) const { + return at(i); + } + bool output_disabled() const { + return mode & _DisableOut; + } + bool output_enabled() const { + return !output_disabled(); + } + void disable_output() { + mode |= _DisableOut; + } + StackLayout vars() const; + int find(var_idx_t var, int from = 0) const; + int find(var_idx_t var, int from, int to) const; + int find_const(const_idx_t cst, int from = 0) const; + int find_outside(var_idx_t var, int from, int to) const; + void forget_const(); + void validate(int i) const { + if (i > 255) { + throw Fatal{"Too deep stack"}; + } + tolk_assert(i >= 0 && i < depth() && "invalid stack reference"); + } + void modified() { + mode &= ~_Shown; + } + void issue_pop(int i); + void issue_push(int i); + void issue_xchg(int i, int j); + int drop_vars_except(const VarDescrList& var_info, int excl_var = 0x80000000); + void forget_var(var_idx_t idx); + void push_new_var(var_idx_t idx); + void push_new_const(var_idx_t idx, const_idx_t cidx); + void assign_var(var_idx_t new_idx, var_idx_t old_idx); + void do_copy_var(var_idx_t new_idx, var_idx_t old_idx); + void enforce_state(const StackLayout& req_stack); + void rearrange_top(const StackLayout& top, std::vector last); + void rearrange_top(var_idx_t top, bool last); + void merge_const(const Stack& req_stack); + void merge_state(const Stack& req_stack); + void show(int _mode); + void show() { + show(mode); + } + void opt_show() { + if ((mode & (_StkCmt | _Shown)) == _StkCmt) { + show(mode); + } + } + bool operator==(const Stack& y) const & { + return s == y.s; + } + void apply_wrappers(int callxargs_count) { + bool is_inline = mode & _InlineFunc; + if (o.retalt_) { + o.insert(0, "SAMEALTSAVE"); + o.insert(0, "c2 SAVE"); + } + if (callxargs_count != -1 || (is_inline && o.retalt_)) { + o.indent_all(); + o.insert(0, "CONT:<{"); + o << "}>"; + if (callxargs_count != -1) { + if (callxargs_count <= 15) { + o << AsmOp::Custom(PSTRING() << callxargs_count << " -1 CALLXARGS"); + } else { + tolk_assert(callxargs_count <= 254); + o << AsmOp::Custom(PSTRING() << callxargs_count << " PUSHINT -1 PUSHINT CALLXVARARGS"); + } + } else { + o << "EXECUTE"; + } + } + } +}; + +/* + * + * SPECIFIC SYMBOL VALUES, + * BUILT-IN FUNCTIONS AND OPERATIONS + * + */ + +typedef std::function&, std::vector&, SrcLocation)> simple_compile_func_t; +typedef std::function&, std::vector&)> compile_func_t; + +inline simple_compile_func_t make_simple_compile(AsmOp op) { + return [op](std::vector& out, std::vector& in, SrcLocation) -> AsmOp { return op; }; +} + +inline compile_func_t make_ext_compile(std::vector&& ops) { + return [ops = std::move(ops)](AsmOpList& dest, std::vector& out, std::vector& in)->bool { + return dest.append(ops); + }; +} + +inline compile_func_t make_ext_compile(AsmOp op) { + return + [op](AsmOpList& dest, std::vector& out, std::vector& in) -> bool { return dest.append(op); }; +} + +struct SymValAsmFunc : SymValFunc { + simple_compile_func_t simple_compile; + compile_func_t ext_compile; + ~SymValAsmFunc() override = default; + SymValAsmFunc(std::vector parameters, TypeExpr* ft, std::vector&& arg_order, std::vector&& ret_order, int flags) + : SymValFunc(std::move(parameters), -1, ft, flags) { + this->arg_order = std::move(arg_order); + this->ret_order = std::move(ret_order); + } + SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags) + : SymValFunc(std::move(parameters), -1, ft, flags), simple_compile(std::move(_compile)) { + } + SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags, + std::initializer_list arg_order, std::initializer_list ret_order) + : SymValFunc(std::move(parameters), -1, ft, flags, arg_order, ret_order), simple_compile(std::move(_compile)) { + } + void set_code(std::vector code); + bool compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; +}; + +struct CodeBlob { + enum { _ForbidImpure = 4 }; + int var_cnt, in_var_cnt; + TypeExpr* ret_type; + const SymValCodeFunc* func_val; + std::string name; + SrcLocation loc; + std::vector vars; + std::unique_ptr ops; + std::unique_ptr* cur_ops; + std::vector debug_ttt; + std::stack*> cur_ops_stack; + int flags = 0; + bool require_callxargs = false; + CodeBlob(std::string name, SrcLocation loc, const SymValCodeFunc* func_val, TypeExpr* ret_type) + : var_cnt(0), in_var_cnt(0), ret_type(ret_type), func_val(func_val), name(std::move(name)), loc(loc), cur_ops(&ops) { + } + template + Op& emplace_back(Args&&... args) { + Op& res = *(*cur_ops = std::make_unique(args...)); + cur_ops = &(res.next); + debug_ttt.push_back(&res); + return res; + } + bool import_params(FormalArgList arg_list); + var_idx_t create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation loc); + var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { + return create_var(var_type, 0, loc); + } + int split_vars(bool strict = false); + bool compute_used_code_vars(); + bool compute_used_code_vars(std::unique_ptr& ops, const VarDescrList& var_info, bool edit) const; + void print(std::ostream& os, int flags = 0) const; + void push_set_cur(std::unique_ptr& new_cur_ops) { + cur_ops_stack.push(cur_ops); + cur_ops = &new_cur_ops; + } + void close_blk(SrcLocation location) { + *cur_ops = std::make_unique(location, Op::_Nop); + } + void pop_cur() { + cur_ops = cur_ops_stack.top(); + cur_ops_stack.pop(); + } + void close_pop_cur(SrcLocation location) { + close_blk(location); + pop_cur(); + } + void simplify_var_types(); + void prune_unreachable_code(); + void fwd_analyze(); + void mark_noreturn(); + void generate_code(AsmOpList& out_list, int mode = 0); + void generate_code(std::ostream& os, int mode = 0, int indent = 0); + + void on_var_modification(var_idx_t idx, SrcLocation here) const { + for (auto& f : vars.at(idx).on_modification) { + f(here); + } + } +}; + +// defined in builtins.cpp +AsmOp exec_arg_op(std::string op, long long arg); +AsmOp exec_arg_op(std::string op, long long arg, int args, int retv = 1); +AsmOp exec_arg_op(std::string op, td::RefInt256 arg); +AsmOp exec_arg_op(std::string op, td::RefInt256 arg, int args, int retv = 1); +AsmOp exec_arg2_op(std::string op, long long imm1, long long imm2, int args, int retv = 1); +AsmOp push_const(td::RefInt256 x); + +void define_builtins(); + + + +/* + * + * OUTPUT CODE GENERATOR + * + */ + +int tolk_proceed(const std::string &entrypoint_filename); + +} // namespace tolk + + diff --git a/tolk/type-expr.h b/tolk/type-expr.h new file mode 100644 index 000000000..21a35a8e3 --- /dev/null +++ b/tolk/type-expr.h @@ -0,0 +1,131 @@ +#pragma once + +#include +#include + +namespace tolk { + +struct TypeExpr { + enum Kind { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll }; + enum AtomicType { _Int, _Cell, _Slice, _Builder, _Continutaion, _Tuple }; + Kind constr; + int value; + int minw, maxw; + static constexpr int w_inf = 1023; + std::vector args; + bool was_forall_var = false; + + explicit TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { + } + TypeExpr(Kind _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { + } + TypeExpr(Kind _constr, std::vector list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(Kind _constr, std::initializer_list list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(Kind _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { + compute_width(); + } + TypeExpr(Kind _constr, TypeExpr* elem0, std::vector list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + TypeExpr(Kind _constr, TypeExpr* elem0, std::initializer_list list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + + bool is_atomic() const { + return constr == te_Atomic; + } + bool is_atomic(int v) const { + return constr == te_Atomic && value == v; + } + bool is_int() const { + return is_atomic(_Int); + } + bool is_var() const { + return constr == te_Var; + } + bool is_map() const { + return constr == te_Map; + } + bool is_tuple() const { + return constr == te_Tuple; + } + bool has_fixed_width() const { + return minw == maxw; + } + int get_width() const { + return has_fixed_width() ? minw : -1; + } + void compute_width(); + bool recompute_width(); + void show_width(std::ostream& os); + std::ostream& print(std::ostream& os, int prio = 0) const; + void replace_with(TypeExpr* te2); + int extract_components(std::vector& comp_list); + bool equals_to(const TypeExpr* rhs) const; + bool has_unknown_inside() const; + static int holes, type_vars; + static TypeExpr* new_hole() { + return new TypeExpr{te_Unknown, ++holes}; + } + static TypeExpr* new_hole(int width) { + return new TypeExpr{te_Unknown, ++holes, width}; + } + static TypeExpr* new_unit() { + return new TypeExpr{te_Tensor, 0, 0}; + } + static TypeExpr* new_atomic(int value) { + return new TypeExpr{te_Atomic, value, 1}; + } + static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); + static TypeExpr* new_func() { + return new_map(new_hole(), new_hole()); + } + static TypeExpr* new_tensor(std::vector list, bool red = true) { + return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(std::initializer_list list) { + return new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { + return new_tensor({te1, te2}); + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { + return new_tensor({te1, te2, te3}); + } + static TypeExpr* new_tuple(TypeExpr* arg0) { + return new TypeExpr{te_Tuple, arg0}; + } + static TypeExpr* new_tuple(std::vector list, bool red = false) { + return new_tuple(new_tensor(std::move(list), red)); + } + static TypeExpr* new_tuple(std::initializer_list list) { + return new_tuple(new_tensor(list)); + } + static TypeExpr* new_var() { + return new TypeExpr{te_Var, --type_vars, 1}; + } + static TypeExpr* new_var(int idx) { + return new TypeExpr{te_Var, idx, 1}; + } + static TypeExpr* new_forall(std::vector list, TypeExpr* body) { + return new TypeExpr{te_ForAll, body, std::move(list)}; + } + + static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); + static std::vector remove_forall(TypeExpr*& te); + static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); +}; + +std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); + +} // namespace tolk diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp new file mode 100644 index 000000000..cee71942b --- /dev/null +++ b/tolk/unify-types.cpp @@ -0,0 +1,454 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * TYPE EXPRESSIONS + * + */ + +int TypeExpr::holes = 0, TypeExpr::type_vars = 0; // not thread safe, but it is ok for now + +void TypeExpr::compute_width() { + switch (constr) { + case te_Atomic: + case te_Map: + minw = maxw = 1; + break; + case te_Tensor: + minw = maxw = 0; + for (TypeExpr* arg : args) { + minw += arg->minw; + maxw += arg->maxw; + } + if (minw > w_inf) { + minw = w_inf; + } + if (maxw > w_inf) { + maxw = w_inf; + } + break; + case te_Tuple: + minw = maxw = 1; + for (TypeExpr* arg : args) { + arg->compute_width(); + } + break; + case te_Indirect: + minw = args[0]->minw; + maxw = args[0]->maxw; + break; + default: + minw = 0; + maxw = w_inf; + break; + } +} + +bool TypeExpr::recompute_width() { + switch (constr) { + case te_Tensor: + case te_Indirect: { + int min = 0, max = 0; + for (TypeExpr* arg : args) { + min += arg->minw; + max += arg->maxw; + } + if (min > maxw || max < minw) { + return false; + } + if (min > w_inf) { + min = w_inf; + } + if (max > w_inf) { + max = w_inf; + } + if (minw < min) { + minw = min; + } + if (maxw > max) { + maxw = max; + } + return true; + } + case te_Tuple: { + for (TypeExpr* arg : args) { + if (arg->minw > 1 || arg->maxw < 1 || arg->minw > arg->maxw) { + return false; + } + } + return true; + } + default: + return false; + } +} + +int TypeExpr::extract_components(std::vector& comp_list) { + if (constr != te_Indirect && constr != te_Tensor) { + comp_list.push_back(this); + return 1; + } + int res = 0; + for (TypeExpr* arg : args) { + res += arg->extract_components(comp_list); + } + return res; +} + +bool TypeExpr::equals_to(const TypeExpr *rhs) const { + const TypeExpr *l = this; + const TypeExpr *r = rhs; + while (l->constr == te_Indirect) + l = l->args[0]; + while (r->constr == te_Indirect) + r = r->args[0]; + + bool eq = l->constr == r->constr && l->value == r->value && + l->minw == r->minw && l->maxw == r->maxw && + l->was_forall_var == r->was_forall_var && + l->args.size() == r->args.size(); + if (!eq) + return false; + + for (int i = 0; i < static_cast(l->args.size()); ++i) { + if (!l->args[i]->equals_to(r->args[i])) + return false; + } + return true; +} + +bool TypeExpr::has_unknown_inside() const { + if (constr == te_Unknown) + return true; + + for (const TypeExpr* inner : args) { + if (inner->has_unknown_inside()) + return true; + } + return false; +} + +TypeExpr* TypeExpr::new_map(TypeExpr* from, TypeExpr* to) { + return new TypeExpr{te_Map, std::vector{from, to}}; +} + +void TypeExpr::replace_with(TypeExpr* te2) { + if (te2 == this) { + return; + } + constr = te_Indirect; + value = 0; + minw = te2->minw; + maxw = te2->maxw; + args.clear(); + args.push_back(te2); +} + +bool TypeExpr::remove_indirect(TypeExpr*& te, TypeExpr* forbidden) { + tolk_assert(te); + while (te->constr == te_Indirect) { + te = te->args[0]; + } + if (te->constr == te_Unknown) { + return te != forbidden; + } + bool res = true; + for (auto& x : te->args) { + res &= remove_indirect(x, forbidden); + } + return res; +} + +std::vector TypeExpr::remove_forall(TypeExpr*& te) { + tolk_assert(te && te->constr == te_ForAll); + tolk_assert(te->args.size() >= 1); + std::vector new_vars; + for (std::size_t i = 1; i < te->args.size(); i++) { + new_vars.push_back(new_hole(1)); + } + TypeExpr* te2 = te; + // std::cerr << "removing universal quantifier in " << te << std::endl; + te = te->args[0]; + remove_forall_in(te, te2, new_vars); + // std::cerr << "-> " << te << std::endl; + return new_vars; +} + +bool TypeExpr::remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars) { + tolk_assert(te); + tolk_assert(te2 && te2->constr == te_ForAll); + if (te->constr == te_Var) { + for (std::size_t i = 0; i < new_vars.size(); i++) { + if (te == te2->args[i + 1]) { + te = new_vars[i]; + return true; + } + } + return false; + } + if (te->constr == te_ForAll) { + return false; + } + if (te->args.empty()) { + return false; + } + auto te1 = new TypeExpr(*te); + bool res = false; + for (auto& arg : te1->args) { + res |= remove_forall_in(arg, te2, new_vars); + } + if (res) { + te = te1; + } else { + delete te1; + } + return res; +} + +void TypeExpr::show_width(std::ostream& os) { + os << minw; + if (maxw != minw) { + os << ".."; + if (maxw < w_inf) { + os << maxw; + } + } +} + +std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr) { + if (!type_expr) { + return os << "(null-type-ptr)"; + } + return type_expr->print(os); +} + +std::ostream& TypeExpr::print(std::ostream& os, int lex_level) const { + switch (constr) { + case te_Unknown: + return os << "??" << value; + case te_Var: + if (value >= -26 && value < 0) { + return os << "_" << (char)(91 + value); + } else if (value >= 0 && value < 26) { + return os << (char)(65 + value); + } else { + return os << "TVAR" << value; + } + case te_Indirect: + return os << args[0]; + case te_Atomic: { + switch (value) { + case _Int: + return os << "int"; + case _Cell: + return os << "cell"; + case _Slice: + return os << "slice"; + case _Builder: + return os << "builder"; + case _Continutaion: + return os << "cont"; + case _Tuple: + return os << "tuple"; + default: + return os << "atomic-type-" << value; + } + } + case te_Tensor: { + if (lex_level > -127) { + os << "("; + } + auto c = args.size(); + if (c) { + for (const auto& x : args) { + x->print(os); + if (--c) { + os << ", "; + } + } + } + if (lex_level > -127) { + os << ")"; + } + return os; + } + case te_Tuple: { + os << "["; + auto c = args.size(); + if (c == 1 && args[0]->constr == te_Tensor) { + args[0]->print(os, -127); + } else if (c) { + for (const auto& x : args) { + x->print(os); + if (--c) { + os << ", "; + } + } + } + return os << "]"; + } + case te_Map: { + tolk_assert(args.size() == 2); + if (lex_level > 0) { + os << "("; + } + args[0]->print(os, 1); + os << " -> "; + args[1]->print(os); + if (lex_level > 0) { + os << ")"; + } + return os; + } + case te_ForAll: { + tolk_assert(args.size() >= 1); + if (lex_level > 0) { + os << '('; + } + os << "Forall "; + for (std::size_t i = 1; i < args.size(); i++) { + os << (i > 1 ? ' ' : '('); + args[i]->print(os); + } + os << ") "; + args[0]->print(os); + if (lex_level > 0) { + os << ')'; + } + return os; + } + default: + return os << "unknown-type-expr-" << constr; + } +} + +void UnifyError::print_message(std::ostream& os) const { + os << "cannot unify type " << te1 << " with " << te2; + if (!msg.empty()) { + os << ": " << msg; + } +} + +std::ostream& operator<<(std::ostream& os, const UnifyError& ue) { + ue.print_message(os); + return os; +} + +void check_width_compat(TypeExpr* te1, TypeExpr* te2) { + if (te1->minw > te2->maxw || te2->minw > te1->maxw) { + std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate}; + te1->show_width(os); + os << " and "; + te2->show_width(os); + throw UnifyError{te1, te2, os.str()}; + } +} + +void check_update_widths(TypeExpr* te1, TypeExpr* te2) { + check_width_compat(te1, te2); + te1->minw = te2->minw = std::max(te1->minw, te2->minw); + te1->maxw = te2->maxw = std::min(te1->maxw, te2->maxw); + tolk_assert(te1->minw <= te1->maxw); +} + +void unify(TypeExpr*& te1, TypeExpr*& te2) { + tolk_assert(te1 && te2); + // std::cerr << "unify( " << te1 << " , " << te2 << " )\n"; + while (te1->constr == TypeExpr::te_Indirect) { + te1 = te1->args[0]; + } + while (te2->constr == TypeExpr::te_Indirect) { + te2 = te2->args[0]; + } + if (te1 == te2) { + return; + } + if (te1->constr == TypeExpr::te_ForAll) { + TypeExpr* te = te1; + std::vector new_vars = TypeExpr::remove_forall(te); + for (TypeExpr* t : new_vars) { + t->was_forall_var = true; + } + unify(te, te2); + for (TypeExpr* t : new_vars) { + t->was_forall_var = false; + } + return; + } + if (te2->constr == TypeExpr::te_ForAll) { + TypeExpr* te = te2; + std::vector new_vars = TypeExpr::remove_forall(te); + for (TypeExpr* t : new_vars) { + t->was_forall_var = true; + } + unify(te1, te); + for (TypeExpr* t : new_vars) { + t->was_forall_var = false; + } + return; + } + if (te1->was_forall_var && te2->constr == TypeExpr::te_Tensor) { + throw UnifyError{te1, te2, "cannot unify generic type and tensor"}; + } + if (te2->was_forall_var && te1->constr == TypeExpr::te_Tensor) { + throw UnifyError{te2, te1, "cannot unify generic type and tensor"}; + } + if (te1->constr == TypeExpr::te_Unknown) { + if (te2->constr == TypeExpr::te_Unknown) { + tolk_assert(te1->value != te2->value); + } + if (!TypeExpr::remove_indirect(te2, te1)) { + throw UnifyError{te1, te2, "type unification results in an infinite cyclic type"}; + } + check_update_widths(te1, te2); + te1->replace_with(te2); + te1 = te2; + return; + } + if (te2->constr == TypeExpr::te_Unknown) { + if (!TypeExpr::remove_indirect(te1, te2)) { + throw UnifyError{te2, te1, "type unification results in an infinite cyclic type"}; + } + check_update_widths(te2, te1); + te2->replace_with(te1); + te2 = te1; + return; + } + if (te1->constr != te2->constr || te1->value != te2->value || te1->args.size() != te2->args.size()) { + throw UnifyError{te1, te2}; + } + for (std::size_t i = 0; i < te1->args.size(); i++) { + unify(te1->args[i], te2->args[i]); + } + if (te1->constr == TypeExpr::te_Tensor) { + if (!te1->recompute_width()) { + throw UnifyError{te1, te2, "type unification incompatible with known width of first type"}; + } + if (!te2->recompute_width()) { + throw UnifyError{te2, te1, "type unification incompatible with known width of first type"}; + } + check_update_widths(te1, te2); + } + te1->replace_with(te2); + te1 = te2; +} + +} // namespace tolk diff --git a/tonlib/tonlib/tonlib-cli.cpp b/tonlib/tonlib/tonlib-cli.cpp index 4567478eb..8fddedd40 100644 --- a/tonlib/tonlib/tonlib-cli.cpp +++ b/tonlib/tonlib/tonlib-cli.cpp @@ -386,7 +386,7 @@ class TonlibCli : public td::actor::Actor { td::TerminalIO::out() << "sendfile \tLoad a serialized message from and send it to server\n"; td::TerminalIO::out() << "setconfig|validateconfig [] [] [] - set or validate " "lite server config\n"; - td::TerminalIO::out() << "runmethod ...\tRuns GET method of account " + td::TerminalIO::out() << "runmethod ...\tRuns GET method of account " " with specified parameters\n"; td::TerminalIO::out() << "getstate \tget state of wallet with requested key\n"; td::TerminalIO::out() << "getstatebytransaction \tget state of wallet with requested key after transaction with local time and hash (base64url)\n";