diff --git a/.clang-format b/.clang-format index a5c6b03..d18ac33 100644 --- a/.clang-format +++ b/.clang-format @@ -2,27 +2,23 @@ Language: Cpp # Base style BasedOnStyle: LLVM - # Indentation IndentWidth: 4 TabWidth: 4 UseTab: Never NamespaceIndentation: None - # Line breaking -ColumnLimit: 100 +ColumnLimit: 120 AlwaysBreakTemplateDeclarations: Yes BreakBeforeBraces: Allman AllowShortFunctionsOnASingleLine: InlineOnly AllowShortIfStatementsOnASingleLine: Never AllowShortLoopsOnASingleLine: false - # Alignment -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false +AlignConsecutiveAssignments: true +AlignConsecutiveDeclarations: true AlignOperands: true AlignTrailingComments: true - # Spacing SpaceAfterCStyleCast: false SpaceAfterTemplateKeyword: true @@ -34,8 +30,14 @@ SpacesInCStyleCastParentheses: false SpacesInContainerLiterals: false SpacesInParentheses: false SpacesInSquareBrackets: false - # Other PointerAlignment: Left SortIncludes: true -FixNamespaceComments: true \ No newline at end of file +FixNamespaceComments: true +# Added options +BinPackArguments: false +BinPackParameters: false +AllowAllParametersOfDeclarationOnNextLine: true +BreakConstructorInitializersBeforeComma: true +ConstructorInitializerAllOnOneLineOrOnePerLine: true +AllowAllConstructorInitializersOnNextLine: true \ No newline at end of file diff --git a/.github/workflows/msvc.yml b/.github/workflows/msvc.yml deleted file mode 100644 index 474f570..0000000 --- a/.github/workflows/msvc.yml +++ /dev/null @@ -1,52 +0,0 @@ -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. -# -# Find more information at: -# https://github.com/microsoft/msvc-code-analysis-action - -name: C++ Code Analysis - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - schedule: - - cron: '22 23 * * 3' - -env: - BUILD_TYPE: Debug - BUILD_DIR: '${{ github.workspace }}/build' - -jobs: - analyze: - name: Analyze - runs-on: windows-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - - name: Configure CMake - run: cmake -B ${{ env.build }} -DCMAKE_BUILD_TYPE=${{ env.config }} - - - name: Run MSVC Code Analysis - uses: microsoft/msvc-code-analysis-action@v0.1.1 - id: run-analysis - with: - cmakeBuildDirectory: ${{ env.BUILD_DIR }} - buildConfiguration: ${{ env.BUILD_TYPE }} - ruleset: NativeRecommendedRules.ruleset - - - name: Upload SARIF to GitHub - uses: github/codeql-action/upload-sarif@v2 - with: - sarif_file: ${{ steps.run-analysis.outputs.sarif }} - - - name: Upload SARIF as an Artifact - uses: actions/upload-artifact@v2 - with: - name: sarif-file - path: ${{ steps.run-analysis.outputs.sarif }} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 40e06a1..68ca144 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,75 +6,104 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -# -------------------- Set Compiler Warnings -------------------------------- -if(MSVC) - add_compile_options(/W4 /WX) -else() - add_compile_options(-Wall -Wextra -Wpedantic -Werror) +# -------------------- Compiler Warnings -------------------------------- +if(NOT EMSCRIPTEN) + if(MSVC) + add_compile_options(/W4 /WX /MP) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + else() + add_compile_options( + -Wall -Wextra -Wpedantic -Werror + -Wno-unused-parameter + -Wno-missing-field-initializers + ) + endif() endif() -# -------------------- Fetch Json component -------------------------------- +# -------------------- FetchContent Setup -------------------------------- include(FetchContent) FetchContent_Declare( json GIT_REPOSITORY https://github.com/nlohmann/json.git - GIT_TAG master + GIT_TAG master ) -FetchContent_MakeAvailable(json) -# -------------------- Specify the source files for each component -------------------------------- -set(SOURCES_LEXER - CuriousX/Lexer/LexerToken.cpp -) - -set(SOURCES_PARSER - CuriousX/Parser/Parser.cpp -) - -set(SOURCES_SEMANTIC - CuriousX/Semantic/Semantic.cpp -) - -set(SOURCES_GEN - CuriousX/Gen/Codegen.cpp -) +FetchContent_GetProperties(json) +if(NOT json_POPULATED) + FetchContent_MakeAvailable(json) +endif() +# -------------------- Build Tests Option -------------------------------- +option(BUILD_TESTS "Build tests" OFF) +message(STATUS "[STX] Build tests: ${BUILD_TESTS}") -#------------ Include Utility Folder-------------------------------- +# -------------------- Include Utility Folder -------------------------------- add_library(CompilerUtils INTERFACE) target_include_directories(CompilerUtils INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/CompilerUtils) -# -------------------- Include Lexer Folder-------------------------------- +# -------------------- Include Lexer -------------------------------- +set(SOURCES_LEXER CuriousX/Lexer/LexerToken.cpp) add_library(Lexer STATIC ${SOURCES_LEXER}) target_include_directories(Lexer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/CuriousX/Lexer) target_link_libraries(Lexer PUBLIC CompilerUtils nlohmann_json::nlohmann_json) -# -------------------- Include CodeGen Folder-------------------------------- -add_library(Gen STATIC ${SOURCES_GEN}) -target_include_directories(Gen PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/CuriousX/Gen) -target_link_libraries(Gen PUBLIC Lexer) +# -------------------- Include Parser -------------------------------- +set(SOURCES_PARSER CuriousX/Parser/Parser.cpp) +add_library(Parser STATIC ${SOURCES_PARSER}) +target_include_directories(Parser PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/CuriousX/Parser) +target_link_libraries(Parser PUBLIC Lexer) -# -------------------- Include Semantic Folder-------------------------------- +# -------------------- Include Semantic Analyzer -------------------------------- +set(SOURCES_SEMANTIC CuriousX/Semantic/Semantic.cpp) add_library(Semantic STATIC ${SOURCES_SEMANTIC}) target_include_directories(Semantic PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/CuriousX/Semantic) -target_link_libraries(Semantic PUBLIC Gen) +target_link_libraries(Semantic PUBLIC Parser) -# -------------------- Include Parser Folder-------------------------------- -add_library(Parser STATIC ${SOURCES_PARSER}) -target_include_directories(Parser PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/CuriousX/Parser) -target_link_libraries(Parser PUBLIC Semantic) +# -------------------- Include Code Generator -------------------------------- +set(SOURCES_GEN CuriousX/Generation/Codegen.cpp) +add_library(Gen STATIC ${SOURCES_GEN}) +target_include_directories(Gen PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/CuriousX/Generation) +target_link_libraries(Gen PUBLIC Semantic) +# -------------------- Include Compiler Core -------------------------------- +add_library(Compiler STATIC CuriousX/Compiler.cpp) +target_include_directories(Compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/CuriousX) +target_link_libraries(Compiler PUBLIC Parser Semantic Gen) -# -------------------- Create the main executable and link it to the libraries -------------------------------- -add_executable(CuriousX "CuriousX/main.cpp") -target_link_libraries(CuriousX PRIVATE Parser) +# -------------------- Main Executable -------------------------------- +add_executable(CuriousX CuriousX/main.cpp) target_include_directories(CuriousX PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CuriousX) +target_link_libraries(CuriousX PRIVATE Compiler) + +# -------------------- Tests -------------------------------- +if(BUILD_TESTS) + FetchContent_Declare( + gtest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG main + ) + FetchContent_GetProperties(gtest) + if(NOT gtest_POPULATED) + FetchContent_MakeAvailable(gtest) + endif() + include(CTest) + enable_testing() + + file(GLOB SOURCES_tests tests/*.cpp) + add_executable(curiousx_tests ${SOURCES_tests}) + target_include_directories(curiousx_tests PRIVATE ${CMAKE_SOURCE_DIR}/CuriousX ${CMAKE_SOURCE_DIR}/CompilerUtils) + target_link_libraries(curiousx_tests PRIVATE GTest::gtest GTest::gtest_main Compiler) + add_test(NAME CuriousxTests COMMAND curiousx_tests) +endif() + +# -------------------- Emscripten Support -------------------------------- if(EMSCRIPTEN) set_target_properties(CuriousX PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/CompilerEditor" - LINK_FLAGS "--bind -s DISABLE_EXCEPTION_CATCHING=0" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/CompilerEditor" ) -endif() \ No newline at end of file + target_link_options(CuriousX PRIVATE --bind -s DISABLE_EXCEPTION_CATCHING=0) + message(STATUS "Building with Emscripten support") +endif() diff --git a/CompilerEditor/index.html b/CompilerEditor/index.html index 3e549a3..706bdae 100644 --- a/CompilerEditor/index.html +++ b/CompilerEditor/index.html @@ -1,89 +1,135 @@ - + + - CuriousX + Compiler Playground + + - -
-
-

CuriousX

- + + + +
+
+

CuriousX

+
+ + -
-
-
- - - +
+ + + +
+ +
+ + +
+ +
+ +
+ + +
+ +
+ + + + +
+ + +
+
+ Lexer output will appear here... +
+ + + +
+
+
+ + + + +
+
+ \ No newline at end of file diff --git a/CompilerEditor/script.js b/CompilerEditor/script.js index 80ffb74..c14760b 100644 --- a/CompilerEditor/script.js +++ b/CompilerEditor/script.js @@ -1,148 +1,180 @@ -// Get DOM elements -const codeTextarea = document.getElementById("code-textarea"); -const compileButton = document.getElementById("compile-button"); -const outputDisplay = document.getElementById("output-display"); -const errorContainer = document.getElementById("error-container"); -const tabButtons = document.querySelectorAll(".tab-button"); -const fileInput = document.getElementById("file-input"); -const themeToggle = document.getElementById("theme-toggle"); -const moonIcon = themeToggle.querySelector(".moon"); -const sunIcon = themeToggle.querySelector(".sun"); - -// Initialize state -let currentTab = "lexer"; -let output = { - Lexer: "", - trees: "", - semantic: "", - codegen: "", -}; - -// Event listeners -compileButton.addEventListener("click", handleCompile); -tabButtons.forEach((button) => { - button.addEventListener("click", () => switchTab(button.dataset.tab)); +// Monaco Editor Initialization +let editor; + +require.config({ + paths: { + vs: "https://cdnjs.cloudflare.com/ajax/libs/monaco-editor/0.36.1/min/vs", + }, }); -fileInput.addEventListener("change", handleFileUpload); -themeToggle.addEventListener("click", toggleTheme); +require(["vs/editor/editor.main"], function () { + // custom dark theme + monaco.editor.defineTheme("custom-dark", { + base: "vs-dark", + inherit: true, + rules: [], + colors: { + "editor.background": "#292c35", + }, + }); -let Modules = {}; -Modules["onRuntimeInitialized"] = function () { - document.getElementById("run-btn").disabled = false; -}; + editor = monaco.editor.create(document.getElementById("editor-container"), { + value: "// Write your code here...\n", + language: "python", + theme: document.body.classList.contains("dark") + ? "custom-dark" + : "vs-light", + automaticLayout: true, + }); +}); -function handleCompile() { - const code = codeTextarea.value; - errorContainer.innerHTML = ""; - if (code.trim() !== "") { - try { - const result = Module.processFileContent(code); - const parsedResult = JSON.parse(result); - if (parsedResult.success === false) { - // If the operation wasn't successful, throw the error - throw new Error(parsedResult.error); - } - displayResults(parsedResult); - } catch (error) { - console.error("Error:", error); - showError(error.message || error.toString()); - } - } else { - showError("Please type some code or upload a file."); - } -} +const themeSelect = document.getElementById("theme-select"); -function displayResults(result) { - output.lexer = formatLexerOutput(result.Lexer); - output.trees = generateAsciiTree(result.AST); - output.semantic = generateTable(result.SymbolTable); - output.codegen = formatCodegenOutput(result.Gen); +// Set initial select value based on body class +themeSelect.value = document.body.classList.contains("dark") ? "Dark" : "Light"; +themeSelect.addEventListener("change", () => { + document.body.classList.toggle("dark", themeSelect.value === "Dark"); + monaco.editor.setTheme( + themeSelect.value === "Dark" ? "custom-dark" : "vs-light", + ); +}); - updateOutput(); - showAllTabs(); -} +// dummy code for the editor +document.addEventListener("DOMContentLoaded", () => { + const insertCodeButton = document.getElementById("insert-code-btn"); + insertCodeButton.addEventListener("click", () => { + const dummyCode = `x = 5\ny = 10\n\nif (x == y) {\n print(x + y)\n} else {\n print("not equal")\n}`; + editor.setValue(dummyCode); + }); +}); -function formatLexerOutput(lexerData) { - const header = - "Token Position Value\n" + - "----------------------------------------------------------------------\n"; +// Clear button functionality +document.querySelector(".clear-btn").addEventListener("click", () => { + if (editor) editor.setValue(""); // Clear editor + document + .querySelectorAll(".output-pane") + .forEach((pane) => (pane.innerHTML = "")); +}); - const formattedTokens = lexerData.map((item) => { - const tokenPadded = item.type.padEnd(20); - const locationPadded = item.location.padEnd(20); - const value = formatValue(item.value); +// compile button click +document.getElementById("run-btn").addEventListener("click", handleCompile); - return `${tokenPadded}${locationPadded}${value}\n`; - }); +function handleCompile() { + const code = editor.getValue(); + hideError(); - return header + formattedTokens.join(""); -} + if (code.trim() === "") { + showError("Please type some code or upload a file."); + return; + } -function formatValue(value) { - if (value === "") return ""; - if (value === "\n") return "\\n"; - if (value === "\t") return "\\t"; - return `[${value}]`; -} + try { + const result = Module.processFileContent(code); + const parsedResult = JSON.parse(result); + if (!parsedResult.success) { + throw new Error(parsedResult.error); + } -function switchTab(tab) { - currentTab = tab; - tabButtons.forEach((button) => { - button.classList.toggle("active", button.dataset.tab === tab); - }); - updateOutput(); + displayResults(parsedResult); + } catch (error) { + console.error("Error:", error); + showError(error.message || error.toString()); + } } -function updateOutput() { - outputDisplay.textContent = output[currentTab] || ""; +// Display compilation results in the output panes +function clearResults() { + document.getElementById("lexer-output").innerHTML = + "probably an error occurred .."; + document.getElementById("parse-tree-output").innerHTML = + "probably an error occurred .."; + document.getElementById("symbol-table-output").innerHTML = + "probably an error occurred .."; + document.getElementById("code-gen-output").innerHTML = + "probably an error occurred .."; } -function showAllTabs() { - tabButtons.forEach((tab) => (tab.style.display = "block")); +// Display compilation results in the output panes +function displayResults(parsedResult) { + document.getElementById("lexer-output").innerHTML = formatLexerOutput( + parsedResult.Lexer, + ); + document.getElementById("parse-tree-output").innerHTML = generateAsciiTree( + parsedResult.AST, + ); + document.getElementById("symbol-table-output").innerHTML = generateTable( + parsedResult.SymbolTable, + ); + document.getElementById("code-gen-output").innerHTML = generateGenOutput( + parsedResult.Gen, + parsedResult.Local, + ); } -function showError(message) { - errorContainer.innerHTML = `
Error: ${escapeHtml(message)}
`; +function showError(error) { + clearResults(); + const errorContainer = document.getElementById("error-container"); + const errorMessage = document.getElementById("error-message"); + errorMessage.textContent = error; + errorContainer.classList.remove("hidden"); } -// Helper function to escape HTML special characters -function escapeHtml(unsafe) { - return unsafe - .replace(/&/g, "&") - .replace(//g, ">") - .replace(/"/g, """) - .replace(/'/g, "'"); +function hideError() { + const errorContainer = document.getElementById("error-container"); + errorContainer.classList.add("hidden"); } -function handleFileUpload(event) { - const file = event.target.files[0]; - if (file) { - const reader = new FileReader(); - reader.onload = function (e) { - codeTextarea.value = e.target.result; - }; - reader.onerror = function () { - showError("Error reading file"); - }; - reader.readAsText(file); - } -} +// Tab Switching with Bold and Color +const tabButtons = document.querySelectorAll(".tab-btn"); +const outputPanes = document.querySelectorAll(".output-pane"); + +// Color classes for active tabs +const colorClasses = [ + "border-blue-600", + "border-green-600", + "border-red-600", + "border-yellow-600", +]; + +tabButtons.forEach((btn, index) => { + btn.addEventListener("click", () => { + // Reset all tabs and panes + tabButtons.forEach((b) => { + b.classList.remove(...colorClasses, "font-bold", "text-red-900"); + b.classList.add("text-gray-600"); + }); + outputPanes.forEach((pane) => pane.classList.add("hidden")); -function toggleTheme() { - document.documentElement.classList.toggle("dark"); - updateThemeIcon(); -} + btn.classList.add(colorClasses[index], "font-bold", "text-red-900"); + btn.classList.remove("text-gray-600"); + document.getElementById(btn.dataset.target).classList.remove("hidden"); + }); +}); + +function formatLexerOutput(lexerData) { + const formattedTokens = lexerData.map((token) => { + const position = token.location.replace(/[<>]/g, "").padEnd(25); + const tokenType = token.type.padEnd(20); + const value = formatValue(token.value); + + return `${tokenType}${position}${[value]}`; + }); -function updateThemeIcon() { - const isDark = document.documentElement.classList.contains("dark"); - moonIcon.style.display = isDark ? "none" : "block"; - sunIcon.style.display = isDark ? "block" : "none"; + return formattedTokens.join("\n"); } -// Initial theme icon update -updateThemeIcon(); +function formatValue(value) { + switch (value) { + case "\\n": + return "[\\n]"; + case "\\t": + return "[\\t]"; + case "": + return "[]"; + default: + return `[${value}]`; + } +} function generateAsciiTree(node, prefix = "", isLast = true, depth = 0) { if (!node) return ""; @@ -150,7 +182,6 @@ function generateAsciiTree(node, prefix = "", isLast = true, depth = 0) { let result = ""; const indent = prefix + (isLast ? "└─ " : "├─ "); - // Add token value or indicate it's unknown if (node.token && node.token.value) { result += indent + node.token.value + "\n"; } else { @@ -161,55 +192,93 @@ function generateAsciiTree(node, prefix = "", isLast = true, depth = 0) { if (Array.isArray(node.children) && node.children.length > 0) { node.children.forEach((child, index) => { const isLastChild = index === node.children.length - 1; - result += generateAsciiTree(child, prefix + (isLast ? " " : "│ "), isLastChild, depth + 1); + result += generateAsciiTree( + child, + prefix + (isLast ? " " : "│ "), + isLastChild, + depth + 1, + ); }); } else if (node.left && node.right) { // Handle left-right child nodes - result += generateAsciiTree(node.left, prefix + (isLast ? " " : "│ "), false, depth + 1); - result += generateAsciiTree(node.right, prefix + (isLast ? " " : "│ "), true, depth + 1); + result += generateAsciiTree( + node.left, + prefix + (isLast ? " " : "│ "), + false, + depth + 1, + ); + result += generateAsciiTree( + node.right, + prefix + (isLast ? " " : "│ "), + true, + depth + 1, + ); } else if (node.condition && node.ifNode && node.elseNode) { // Handle conditionals - result += generateAsciiTree(node.condition, prefix + (isLast ? " " : "│ "), false, depth + 1); - result += generateAsciiTree(node.ifNode, prefix + (isLast ? " " : "│ "), false, depth + 1); - result += generateAsciiTree(node.elseNode, prefix + (isLast ? " " : "│ "), true, depth + 1); + result += generateAsciiTree( + node.condition, + prefix + (isLast ? " " : "│ "), + false, + depth + 1, + ); + result += generateAsciiTree( + node.ifNode, + prefix + (isLast ? " " : "│ "), + false, + depth + 1, + ); + result += generateAsciiTree( + node.elseNode, + prefix + (isLast ? " " : "│ "), + true, + depth + 1, + ); } else if (node.condition && node.ifNode) { // Handle conditionals without an elseNode (optional else) - result += generateAsciiTree(node.condition, prefix + (isLast ? " " : "│ "), false, depth + 1); - result += generateAsciiTree(node.ifNode, prefix + (isLast ? " " : "│ "), true, depth + 1); + result += generateAsciiTree( + node.condition, + prefix + (isLast ? " " : "│ "), + false, + depth + 1, + ); + result += generateAsciiTree( + node.ifNode, + prefix + (isLast ? " " : "│ "), + true, + depth + 1, + ); } return result; } - - -function getNodeValue(node) { - if (typeof node === "string") return node; - if (node.token && node.token.value) return `${node.token.value}`; - return "Unknown"; -} - -function getNodeChildren(node) { - if (Array.isArray(node.children)) return node.children; - if (node.left || node.right) return [node.left, node.right].filter(Boolean); - return []; +function generateTable(symbolTable) { + const formattedEntries = symbolTable.flatMap((entry) => + Object.values(entry).map((symbol) => { + const typePadded = symbol.type.padEnd(15); + const valuePadded = symbol.value.padEnd(10); + return `${typePadded}${valuePadded}\n`; + }), + ); + + return formattedEntries.join(""); } -function generateTable(symbolTable) { - let tableHeader = - "Type Value\n" + - "--------------------------------\n"; - - // Format each entry in the symbol table - const formattedEntries = symbolTable.map((entry) => { - const typePadded = entry.type.padEnd(15); - const valuePadded = entry.value.padEnd(10); - return `${typePadded}${valuePadded}\n`; +function generateGenOutput(genData, Local) { + let output = "\n"; + genData[0].forEach((instruction) => { + output += `${instruction}\n`; + }); + output += "\n\n\nLocal Variables:\n"; + output += "-----------------\n"; + output += "Index Variable\n"; + + // Display the Local variables in a table-like format + Local.forEach((local) => { + const index = "local " + local.index.toString().padEnd(8); + const name = local.name; + output += `${index}${name}\n`; }); - return tableHeader + formattedEntries.join(""); + return output; } - -function formatCodegenOutput(codegenData) { - return codegenData.join('\n'); -} \ No newline at end of file diff --git a/CompilerEditor/style.css b/CompilerEditor/style.css index b336b37..9010471 100644 --- a/CompilerEditor/style.css +++ b/CompilerEditor/style.css @@ -1,174 +1,66 @@ -:root { - --bg-color: #fff; - --text-color: #1a202c; - --primary-color: #4a5568; - --secondary-color: #e2e8f0; - --accent-color: #3182ce; - --code-bg-color: #edf2f7; - --code-text-color: #2d3748; -} - -.dark { - --bg-color: #1a202c; - --text-color: #fff; - --primary-color: #2d3748; - --secondary-color: #4a5568; - --accent-color: #805ad5; - --code-bg-color: #4a5568; - --code-text-color: #48bb78; -} - -body { - font-family: Arial, sans-serif; - margin: 0; - padding: 0; - min-height: 100vh; - background-color: var(--bg-color); - color: var(--text-color); - transition: background-color 0.3s, color 0.3s; -} - -.container { - max-width: 1200px; - margin: 0 auto; - padding: 2rem 1rem; - display: flex; - flex-direction: column; - min-height: 95vh; -} - -.header { - display: flex; - justify-content: space-between; - align-items: center; - margin-bottom: 2rem; -} - -h1 { - font-size: 2.5rem; - color: var(--accent-color); - margin: 0; -} - -.editor-container { - display: flex; - flex-direction: column; - gap: 1rem; - flex-grow: 1; -} - -@media (min-width: 1024px) { - .editor-container { - flex-direction: row; - } -} - -.code-input, .output-display { - flex: 1; - background-color: var(--primary-color); - border-radius: 0.5rem; - padding: 1rem; - box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); - display: flex; - flex-direction: column; -} - -textarea { - width: 100%; - flex-grow: 1; - min-height: 100px; - background-color: var(--code-bg-color); - color: var(--code-text-color); - border: none; - border-radius: 0.25rem; - padding: 0.5rem; - font-family: monospace; - resize: vertical; - margin-top: 1rem; -} - -button, .file-label { - background-color: var(--accent-color); - color: var(--bg-color); - border: none; - padding: 0.5rem 1rem; - border-radius: 0.25rem; - cursor: pointer; - transition: background-color 0.3s; -} - -button:hover, .file-label:hover { - filter: brightness(110%); -} - -.tab-buttons { - display: flex; - gap: 0.5rem; - overflow-x: auto; - padding-bottom: 0.5rem; -} - -.tab-button { - background-color: var(--secondary-color); - color: var(--text-color); - border: none; - padding: 0.5rem 1rem; - border-radius: 0.25rem; +body.dark { + background-color: #292c35; +} + +body.dark #sidebar { + background-color: #292c35; +} + +.classic-beveled-btn { + padding: 2px 8px; + font-size: 12px; + color: #333; + border: 1px solid #999; + border-radius: 2px; + background: #f0f0f0; + box-shadow: + inset 1px 1px 0 #fff, + inset -1px -1px 0 #ccc, + 0 1px 1px rgba(0, 0, 0, 0.1); + text-shadow: 0 1px 0 #fff; cursor: pointer; - transition: background-color 0.3s; - display: flex; - align-items: center; - gap: 0.5rem; } -.tab-button.active { - background-color: var(--accent-color); - color: var(--bg-color); +.classic-beveled-btn:hover { + background: #f5f5f5; } -.tab-button svg { - width: 1.2em; - height: 1.2em; +.classic-beveled-btn:active { + background: #e8e8e8; + box-shadow: + inset 1px 1px 2px rgba(0, 0, 0, 0.1), + inset -1px -1px 0 #fff; } -.output-content { - background-color: var(--code-bg-color); - color: var(--code-text-color); - border-radius: 0.25rem; - padding: 1rem; - flex-grow: 1; - overflow-y: auto; +.output-pane { font-family: monospace; - white-space: pre-wrap; + white-space: pre; + height: 100%; } -.error-message { - background-color: #feb2b2; - color: #c53030; - padding: 1rem; - border-radius: 0.25rem; - margin-top: 1rem; +body.dark .output-pane { + color: #fff; } -.file-input { - display: none; +#error-container:not(.hidden) { + display: block; + max-height: 150px; + overflow-y: auto; + background-color: #FEE2E2; + border-top: 2px solid #EF4444; } -.button-container { - display: flex; - gap: 1rem; - margin-bottom: 1rem; +#error-message { + white-space: pre-wrap; + margin: 0; + color: #991B1B; } -#theme-toggle { - background: none; - border: none; - cursor: pointer; - padding: 0; +.dark #error-container:not(.hidden) { + background-color: #7F1D1D; + border-top: 2px solid #EF4444; } -#theme-toggle svg { - width: 1.5em; - height: 1.5em; - fill: var(--text-color); -} +.dark #error-message { + color: #991B1B; +} \ No newline at end of file diff --git a/CompilerUtils/CompilerOutput.hpp b/CompilerUtils/CompilerOutput.hpp new file mode 100644 index 0000000..18b970c --- /dev/null +++ b/CompilerUtils/CompilerOutput.hpp @@ -0,0 +1,48 @@ +// CompilerOutput.hpp +#pragma once + +#include +#include +#include +#include + +class CompilerOutput +{ + public: + CompilerOutput() : m_json({{"success", true}}) {} + + explicit CompilerOutput(const std::string& filename) : m_json({{"success", true}}), m_filename(filename) {} + + nlohmann::json& getJson() { return m_json; } + const nlohmann::json& getJson() const { return m_json; } + + void setError(const std::string& error) + { + m_json["success"] = false; + m_json["error"] = error; + } + + nlohmann::json& getError() { return m_json["error"]; } + + void writeToFile(const std::string& filename) const + { + std::ofstream file(filename); + file << m_json.dump(4); + } + + std::string readFromFile() const + { + std::ifstream inputFile(m_filename); + if (!inputFile.is_open()) + { + throw std::runtime_error("Unable to open input file: " + m_filename); + } + std::ostringstream sstr; + sstr << inputFile.rdbuf(); + return sstr.str(); + } + + private: + nlohmann::json m_json; + std::string m_filename; +}; \ No newline at end of file diff --git a/CompilerUtils/CompilerOutputParser.hpp b/CompilerUtils/CompilerOutputParser.hpp deleted file mode 100644 index 90f0ee2..0000000 --- a/CompilerUtils/CompilerOutputParser.hpp +++ /dev/null @@ -1,312 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include "Node.hpp" -#include "SymbolTable.hpp" -#include "WasmInstructions.hpp" - -using json = nlohmann::json; - -class CompilerOutputParser -{ - private: - CompilerOutputParser() - { - jsonOutput["Lexer"] = nlohmann::json::array(); - jsonOutput["success"] = true; - } - - static std::string formatValue(const std::string& value) - { - if (value.empty()) - return ""; - if (value == "\n") - return "\\n"; - if (value == "\t") - return "\\t"; - return "[" + value + "]"; - } - - nlohmann::json jsonOutput; - - public: - static CompilerOutputParser& getInstance() - { - static CompilerOutputParser instance; - return instance; - } - - CompilerOutputParser(const CompilerOutputParser&) = delete; - CompilerOutputParser& operator=(const CompilerOutputParser) = delete; - - static std::string readInputFile(const std::string& filePath) - { - std::ifstream inputFile(filePath); - if (!inputFile.is_open()) - { - throw std::runtime_error("Unable to open input file: " + filePath); - } - std::ostringstream sstr; - sstr << inputFile.rdbuf(); - return sstr.str(); - } - - std::string getJson() const { return jsonOutput.dump(); } - - static void formatTokens(const std::string& jsonString, const std::string& outputFile) - { - nlohmann::json j = nlohmann::json::parse(jsonString); - std::ofstream outFile(outputFile); - if (!outFile.is_open()) - { - throw std::runtime_error("Unable to open output file: " + outputFile); - } - - outFile << "==== Lexer Output ====\n\n"; - outFile << std::left << std::setw(20) << "Token" << std::setw(20) << "Position" - << "Value\n"; - outFile << std::string(70, '-') << "\n"; - - for (const auto& token : j["Lexer"]) - { - outFile << std::left << std::setw(20) << token["type"].get() - << std::setw(20) << std::setw(20) << token["location"].get() - << formatValue(token["value"].get()) << "\n"; - } - - outFile << "==== AST Output ====\n\n"; - drawASTNode(j["AST"], outFile, 0); - - outFile << "==== Symbol Output ====\n\n"; - drawTable(j["SymbolTable"], outFile); - - outFile << "==== Code Output ====\n\n"; - outFile << j["Gen"].dump(4); - - outFile.close(); - std::cout << "output written to: " << outputFile << std::endl; - } - - static void drawASTNode(const nlohmann::json& node, std::ofstream& outFile, int depth) - { - if (node.is_null()) - return; - - std::string indent(depth * 2, ' '); - outFile << indent << "└─ "; - - // Print token information - outFile << node["token"]["value"].get() << "\n"; - - // Handle different node types - if (node.contains("children")) - { - for (const auto& child : node["children"]) - { - drawASTNode(child, outFile, depth + 1); - } - } - else if (node.contains("left") && node.contains("right")) - { - drawASTNode(node["left"], outFile, depth + 1); - drawASTNode(node["right"], outFile, depth + 1); - } - else if (node.contains("condition") && node.contains("ifNode") && node.contains("elseNode")) - { - drawASTNode(node["condition"], outFile, depth + 2); - drawASTNode(node["ifNode"], outFile, depth + 2); - drawASTNode(node["elseNode"], outFile, depth + 2); - } - } - - nlohmann::json serializeLexerToken(const LexerToken& token) - { - nlohmann::json j; - j["type"] = toString(token.type); - j["location"] = token.location.toString(); - j["value"] = token.value; - - return j; - } - - void setErrorOutput(const Error& ex) - { - jsonOutput["success"] = false; - jsonOutput["error"] = ex.what(); - } - - void SetLexerOutput(const LexerToken& token) - { - jsonOutput["Lexer"].push_back(serializeLexerToken(token)); - } - - void setASTOutput(const std::unique_ptr& root, - const std::vector>& table) - { - jsonOutput["AST"] = nodeToJson(root); - jsonOutput["SymbolTable"] = tableToJson(table); - } - - void codeOutput(const std::vector& data) - { - - json instructionArray = json::array(); - for (const auto& instr : data) - { - instructionArray.push_back(instructionToString(instr)); - } - jsonOutput["Gen"] = instructionArray; - } - - nlohmann::json - tableToJson(const std::vector>& table) - { - nlohmann::json jArray = nlohmann::json::array(); - nlohmann::json j; - - for (const auto& node : table) - { - for (auto it : node) - { - j["value"] = it.first; - j["type"] = getInferredTypeDescription(it.second.type); - jArray.push_back(j); - } - } - return jArray; - } - - static std::string getNodeTypeName(NodeType type) - { - switch (type) - { - case NodeType::BinaryOperation: - return "BinaryOperation"; - case NodeType::ConditionalOperation: - return "ConditionalOperation"; - case NodeType::BlockOperation: - return "BlockOperation"; - default: - return "Unknown"; - } - } - - nlohmann::json nodeToJson(const std::unique_ptr& node) - { - if (!node) - return nullptr; - - nlohmann::json j; - j["type"] = getNodeTypeName(node->getType()); - j["token"] = {{"type", toString(node->token.type)}, - {"value", node->token.value}, - {"location", node->token.location.toString()}}; - - j["children"] = nlohmann::json::array(); - for (const auto& child : node->children) - { - j["children"].push_back(nodeToJson(child)); - } - - return j; - } - - nlohmann::json nodeToJson(const std::unique_ptr& node) - { - if (!node) - return nullptr; - - nlohmann::json j; - j["type"] = getNodeTypeName(node->getType()); - j["token"] = {{"type", toString(node->token.type)}, - {"value", node->token.value}, - {"location", node->token.location.toString()}}; - - switch (node->getType()) - { - case NodeType::BinaryOperation: - { - const auto& binaryNode = static_cast(*node); - j["left"] = nodeToJson(binaryNode.left); - j["right"] = nodeToJson(binaryNode.right); - break; - } - case NodeType::ConditionalOperation: - { - const auto& condNode = static_cast(*node); - j["condition"] = nodeToJson(condNode.condition); - j["ifNode"] = nodeToJson(condNode.ifNode); - j["elseNode"] = nodeToJson(condNode.elseNode); - break; - } - case NodeType::BlockOperation: - const auto& printNode = static_cast(*node); - j["children"] = nlohmann::json::array(); - for (const auto& child : printNode.children) - { - j["children"].push_back(nodeToJson(child)); - } - break; - } - - return j; - } - - std::string_view getInferredTypeDescription(const InferredType& t) - { - switch (t) - { - case InferredType::BOOL: - return "Boolean"; - case InferredType::FLOAT: - return "Float"; - case InferredType::INTEGER: - return "Integer"; - case InferredType::STRING: - return "String"; - default: - return "An unknown error occurred"; - } - } - - static void drawTable(const nlohmann::json& node, std::ofstream& outFile) - { - if (!node.is_array() || node.empty()) - { - outFile << "No data to display\n"; - return; - } - - // Determine the maximum width for each column - size_t typeWidth = 4; // Minimum width for "Type" header - size_t valueWidth = 5; // Minimum width for "Value" header - for (const auto& item : node) - { - typeWidth = std::max(typeWidth, item["type"].get().length()); - valueWidth = std::max(valueWidth, item["value"].get().length()); - } - - // Draw the header - outFile << std::setfill('-') << std::setw(typeWidth + valueWidth + 7) << "-" << "\n"; - outFile << std::setfill(' '); - outFile << "| " << std::left << std::setw(typeWidth) << "Type" - << " | " << std::setw(valueWidth) << "Value" << " |\n"; - outFile << std::setfill('-') << std::setw(typeWidth + valueWidth + 7) << "-" << "\n"; - outFile << std::setfill(' '); - - // Draw the rows - for (const auto& item : node) - { - outFile << "| " << std::left << std::setw(typeWidth) << item["type"].get() - << " | " << std::setw(valueWidth) << item["value"].get() << " |\n"; - } - - // Draw the bottom border - outFile << std::setfill('-') << std::setw(typeWidth + valueWidth + 7) << "-" << "\n"; - } -}; diff --git a/CompilerUtils/input.txt b/CompilerUtils/input.txt index 218c26b..0a00a85 100644 --- a/CompilerUtils/input.txt +++ b/CompilerUtils/input.txt @@ -1,7 +1,9 @@ -d = true -a = 5.9 -if (d <= false) { - print(a) -}else { - print(a) +a = 7 +b = 9 +x = "hello" #this is a comment +if (a == b) { + 10 + a +} else { + print(b + 10) + print(x) } diff --git a/CuriousX/Compiler.cpp b/CuriousX/Compiler.cpp new file mode 100644 index 0000000..8f895c3 --- /dev/null +++ b/CuriousX/Compiler.cpp @@ -0,0 +1,54 @@ +// Compiler.cpp +#include "Compiler.hpp" + +Compiler::Compiler(std::string_view source, CompilerOutput& output) + : m_parser(source, output) + , m_semantic(output) + , m_codegen(output) + , m_root(ASTNodeFactory::createTreeNode({}, {"Program", {0, 0}, LexerTokenType::ProgramToken})) + , m_output(output) +{ +} + +bool Compiler::compile() +{ + try + { + LexerToken token; + m_parser.advanceToken(token); + + while (token.type != LexerTokenType::Eof) + { + if (auto node = m_parser.parseStatement(token)) + { + processNode(node); + } + if (!m_parser.expectNewlineOrEOF(token)) + { + throw Error("Expected new line before " + std::string(token.value), token.location, ErrorType::SYNTAX); + } + + m_parser.advancePastNewlines(token); + } + collectOutputs(); + return !m_root->children.empty(); + } + catch (const Error& e) + { + m_output.setError(e.what()); + return false; + } +} + +void Compiler::processNode(std::unique_ptr& node) +{ + m_semantic.analyzeTree(*node); + m_codegen.generate(*node); + m_root->children.emplace_back(std::move(node)); +} +void Compiler::collectOutputs() +{ + m_parser.addASTToOutput(m_root); + m_semantic.addSymbolTableToOutput(); + m_codegen.addGeneratedCodeToOutput(); +} diff --git a/CuriousX/Compiler.hpp b/CuriousX/Compiler.hpp new file mode 100644 index 0000000..b496787 --- /dev/null +++ b/CuriousX/Compiler.hpp @@ -0,0 +1,23 @@ +// Compiler.hpp +#pragma once +#include "Generation/Codegen.hpp" +#include "Parser/Parser.hpp" +#include "Semantic/Semantic.hpp" +#include + +class Compiler +{ + public: + Compiler(std::string_view source, CompilerOutput& output); + + bool compile(); + void collectOutputs(); + void processNode(std::unique_ptr& node); + + private: + Parser m_parser; + Semantic m_semantic; + WasmGen m_codegen; + std::unique_ptr m_root; + CompilerOutput m_output; +}; diff --git a/CuriousX/Gen/Codegen.hpp b/CuriousX/Gen/Codegen.hpp deleted file mode 100644 index dd8fa62..0000000 --- a/CuriousX/Gen/Codegen.hpp +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include -#include - -#include "CompilerOutputParser.hpp" - -class WasmGen -{ - public: - WasmGen() : localVarIndex(0) {} - void traverse(const ASTNode& node); - bool isFloatType(const BinaryNode& node); - - void generateBinaryOp(const BinaryNode& node); - void generateConditional(const ConditionalNode& node); - void generateBlock(const TreeNode& node); - void generateExpression(const BinaryNode& node); - - int getLocalIndex(std::string_view varName); - - void addInstruction(const WasmInstructionWithData& instruction); - const std::vector& getInstructions() const; - const std::unordered_map& getLocalMap() const; - - private: - std::unordered_map locals; - int localVarIndex; - std::vector instructions; -}; diff --git a/CuriousX/Gen/Codegen.cpp b/CuriousX/Generation/Codegen.cpp similarity index 53% rename from CuriousX/Gen/Codegen.cpp rename to CuriousX/Generation/Codegen.cpp index cf0f7a1..fdc583a 100644 --- a/CuriousX/Gen/Codegen.cpp +++ b/CuriousX/Generation/Codegen.cpp @@ -1,10 +1,6 @@ #include "Codegen.hpp" -// TODO -// restructure codegen, maybe call it in semantic cuz they have same structure -// boolean true and false parses < and > - -void WasmGen::traverse(const ASTNode& node) +void WasmGen::generate(const ASTNode& node) { switch (node.getType()) { @@ -18,7 +14,7 @@ void WasmGen::traverse(const ASTNode& node) generateBlock(static_cast(node)); break; default: - // Handle unexpected node types + throw Error("Unexpected type", node.token.location, ErrorType::SEMANTIC); break; } } @@ -28,9 +24,8 @@ void WasmGen::generateBinaryOp(const BinaryNode& node) if (node.token.type == LexerTokenType::AssignToken) { generateExpression(static_cast(*node.right)); - int localIndex = getLocalIndex(node.left->token.value); - addInstruction( - WasmInstructionWithData(WasmInstruction::LocalSet, std::to_string(localIndex))); + int localIndex = getOrCreateLocalIndex(node.left->token.value); + addInstruction(WasmInstructionWithData(WasmInstruction::LocalSet, std::to_string(localIndex))); } else { @@ -50,67 +45,56 @@ void WasmGen::generateExpression(const BinaryNode& node) switch (node.token.type) { case LexerTokenType::PlusToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Add - : WasmInstruction::I32Add)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Add : WasmInstruction::I32Add)); break; case LexerTokenType::MinusToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Sub - : WasmInstruction::I32Sub)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Sub : WasmInstruction::I32Sub)); break; case LexerTokenType::MultiplyToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Mul - : WasmInstruction::I32Mul)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Mul : WasmInstruction::I32Mul)); break; case LexerTokenType::DivideToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Div - : WasmInstruction::I32DivS)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Div : WasmInstruction::I32DivS)); break; case LexerTokenType::IntToken: - addInstruction( - WasmInstructionWithData(WasmInstruction::I32Const, std::string(node.token.value))); + addInstruction(WasmInstructionWithData(WasmInstruction::I32Const, std::string(node.token.value))); break; case LexerTokenType::FloatToken: - addInstruction( - WasmInstructionWithData(WasmInstruction::F32Const, std::string(node.token.value))); + addInstruction(WasmInstructionWithData(WasmInstruction::F32Const, std::string(node.token.value))); break; case LexerTokenType::StringToken: - // Handle string constants (this might need more complex handling) - addInstruction(WasmInstructionWithData(WasmInstruction::I32Const, "0")); // Placeholder + addInstruction(WasmInstructionWithData(WasmInstruction::I32Const, "offset " + std::to_string(m_stringOffset))); + m_stringOffset += int(node.token.value.size() - 2); + addInstruction(WasmInstructionWithData(WasmInstruction::I32Const, + std::to_string(m_stringOffset))); // subtract 2 for quotes break; case LexerTokenType::BoolToken: - addInstruction(WasmInstructionWithData(WasmInstruction::I32Const, - node.token.value == "true" ? "true" : "false")); + addInstruction( + WasmInstructionWithData(WasmInstruction::I32Const, node.token.value == "true" ? "true" : "false")); break; case LexerTokenType::VarToken: { - int localIndex = getLocalIndex(node.token.value); - addInstruction( - WasmInstructionWithData(WasmInstruction::LocalGet, std::to_string(localIndex))); + int localIndex = getOrCreateLocalIndex(node.token.value); + addInstruction(WasmInstructionWithData(WasmInstruction::LocalGet, std::to_string(localIndex))); } break; case LexerTokenType::EqualToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Eq - : WasmInstruction::I32Eq)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Eq : WasmInstruction::I32Eq)); break; case LexerTokenType::NotEqualToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Ne - : WasmInstruction::I32Ne)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Ne : WasmInstruction::I32Ne)); break; case LexerTokenType::GreaterEqualToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Ge - : WasmInstruction::I32GeS)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Ge : WasmInstruction::I32GeS)); break; case LexerTokenType::GreaterToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Gt - : WasmInstruction::I32GtS)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Gt : WasmInstruction::I32GtS)); break; case LexerTokenType::LessEqualToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Le - : WasmInstruction::I32LeS)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Le : WasmInstruction::I32LeS)); break; case LexerTokenType::LessToken: - addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Lt - : WasmInstruction::I32LtS)); + addInstruction(WasmInstructionWithData(isFloatOperation ? WasmInstruction::F32Lt : WasmInstruction::I32LtS)); break; default: // Handle unsupported operations @@ -118,48 +102,54 @@ void WasmGen::generateExpression(const BinaryNode& node) } } -int WasmGen::getLocalIndex(std::string_view varName) +int WasmGen::getOrCreateLocalIndex(std::string_view varName) { - auto it = locals.find(std::string(varName)); - if (it != locals.end()) + auto it = m_locals.find(std::string(varName)); + if (it != m_locals.end()) { return it->second; } - int newIndex = localVarIndex++; - locals[std::string(varName)] = newIndex; + int newIndex = m_nextLocalIndex++; + m_locals[std::string(varName)] = newIndex; return newIndex; } -void WasmGen::addInstruction(const WasmInstructionWithData& instruction) +void WasmGen::addInstruction(WasmInstructionWithData instruction) { - instructions.push_back(instruction); + m_instructions.push_back(instruction); } -const std::vector& WasmGen::getInstructions() const +// Used lambda sike!!!! :) +bool WasmGen::isFloatType(const BinaryNode& node) { - return instructions; -} + auto isFloatOperand = [](const std::unique_ptr& operand) -> bool + { + if (!operand) + return false; -const std::unordered_map& WasmGen::getLocalMap() const -{ - return locals; -} + const auto& token = operand->token; -bool WasmGen::isFloatType(const BinaryNode& node) -{ - if (node.left) - return ScopedSymbolTable::getInstance().isFloatType(node.left->token.value); - if (node.right) - return ScopedSymbolTable::getInstance().isFloatType(node.right->token.value); - return false; + if (token.type == LexerTokenType::FloatToken) + return true; + + if (token.type == LexerTokenType::VarToken) + { + if (auto type = ScopedSymbolTable::getInstance().lookup(std::string(token.value))) + { + return *type == InferredType::FLOAT; + } + } + + return false; + }; + + return isFloatOperand(node.left) || isFloatOperand(node.right); } void WasmGen::generateConditional(const ConditionalNode& node) { // Generate code for the condition generateExpression(static_cast(*node.condition)); - - // Start of if block addInstruction(WasmInstructionWithData(WasmInstruction::If)); // Generate code for the if block @@ -171,20 +161,37 @@ void WasmGen::generateConditional(const ConditionalNode& node) addInstruction(WasmInstructionWithData(WasmInstruction::Else)); generateBlock(*node.elseNode); } - - // End of if-else block addInstruction(WasmInstructionWithData(WasmInstruction::End)); } void WasmGen::generateBlock(const TreeNode& node) { - auto blah = node.token; + auto operand = node.token; for (const auto& block : node.children) { - traverse(*block); + generate(*block); } - if (blah.type == LexerTokenType::PrintToken) + if (operand.type == LexerTokenType::PrintToken) { addInstruction(WasmInstructionWithData(WasmInstruction::CallPrint)); } +} + +void WasmGen::addGeneratedCodeToOutput() +{ + nlohmann::json instructionArray = nlohmann::json::array(); + for (const auto& instr : m_instructions) + { + instructionArray.push_back(instructionToString(instr)); + } + m_output.getJson()["Gen"].push_back(instructionArray); + for (const auto& local : m_locals) + { + m_output.getJson()["Local"].push_back({{"name", local.first}, {"index", local.second}}); + } +} + +const std::vector WasmGen::getInstructions() const +{ + return m_instructions; } \ No newline at end of file diff --git a/CuriousX/Generation/Codegen.hpp b/CuriousX/Generation/Codegen.hpp new file mode 100644 index 0000000..9b3dca8 --- /dev/null +++ b/CuriousX/Generation/Codegen.hpp @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include + +#include "Semantic.hpp" +#include "WasmInstructions.hpp" + + +class WasmGen +{ + public: + explicit WasmGen(CompilerOutput& output): m_output(output){} + + void generate(const ASTNode& rootNode); + void addGeneratedCodeToOutput(); + const std::vector getInstructions() const; + + private: + // Node traversal methods + void generateBinaryOp(const BinaryNode& node); + void generateConditional(const ConditionalNode& node); + void generateBlock(const TreeNode& node); + // Expression generation methods + void generateExpression(const BinaryNode& node); + + // Helper methods + bool isFloatType(const BinaryNode& node); + int getOrCreateLocalIndex(std::string_view varName); + void addInstruction(WasmInstructionWithData instruction); + + // Data members + std::unordered_map m_locals; + std::vector m_instructions; + int m_nextLocalIndex = 0; + int m_stringOffset = 0; + CompilerOutput& m_output; +}; \ No newline at end of file diff --git a/CuriousX/Semantic/README.md b/CuriousX/Generation/README.md similarity index 100% rename from CuriousX/Semantic/README.md rename to CuriousX/Generation/README.md diff --git a/CuriousX/Gen/WasmInstructions.hpp b/CuriousX/Generation/WasmInstructions.hpp similarity index 90% rename from CuriousX/Gen/WasmInstructions.hpp rename to CuriousX/Generation/WasmInstructions.hpp index f64f65f..37835d6 100644 --- a/CuriousX/Gen/WasmInstructions.hpp +++ b/CuriousX/Generation/WasmInstructions.hpp @@ -1,9 +1,5 @@ #pragma once -#include #include -#include -#include -#include enum class WasmInstruction { @@ -59,7 +55,7 @@ struct WasmInstructionWithData std::string data; bool has_data; - WasmInstructionWithData(WasmInstruction instr) : instruction(instr), has_data(false) {} + explicit WasmInstructionWithData(WasmInstruction instr) : instruction(instr), has_data(false) {} WasmInstructionWithData(WasmInstruction instr, std::string value) : instruction(instr), data(value), has_data(true) diff --git a/CuriousX/Lexer/Lexer.hpp b/CuriousX/Lexer/Lexer.hpp index f2b9132..b2525af 100644 --- a/CuriousX/Lexer/Lexer.hpp +++ b/CuriousX/Lexer/Lexer.hpp @@ -6,6 +6,7 @@ #include #include "LexerToken.hpp" +#include "Error.hpp" class Lexer { @@ -26,13 +27,13 @@ class Lexer private: std::string_view data; - size_t pos = 0; - unsigned short x_pos = 1; - unsigned short y_pos = 1; + size_t pos = 0; + unsigned short x_pos = 1; + unsigned short y_pos = 1; struct Checkpoint { - size_t pos; + size_t pos; unsigned short x_pos; unsigned short y_pos; }; @@ -45,7 +46,7 @@ class Lexer { if (checkpoint) { - pos = checkpoint->pos; + pos = checkpoint->pos; x_pos = checkpoint->x_pos; y_pos = checkpoint->y_pos; checkpoint.reset(); @@ -75,8 +76,8 @@ class Lexer static bool isInt(std::string_view data) { - return std::all_of(data.begin(), data.end(), - [](char c) { return std::isdigit(static_cast(c)); }); + return std::all_of( + data.begin(), data.end(), [](char c) { return std::isdigit(static_cast(c)); }); } LexerToken handleComment(size_t startPos, const SourceLocation& location) @@ -94,22 +95,22 @@ class Lexer LexerToken doGetNextToken() { const SourceLocation location = currentLocation(); - const auto startPos = pos; - const char nchar = next_char(); + const auto startPos = pos; + const char nchar = next_char(); // Map of single-character tokens - static const std::unordered_map> - singleCharTokens = {{'\0', {"\0", LexerTokenType::Eof}}, - {'\n', {"\\n", LexerTokenType::Newline}}, - {'\t', {"\t", LexerTokenType::Tab}}, - {'(', {"(", LexerTokenType::ParenOpen}}, - {')', {")", LexerTokenType::ParenClose}}, - {'{', {"{", LexerTokenType::BracesOpen}}, - {'}', {"}", LexerTokenType::BracesClose}}, - {'+', {"+", LexerTokenType::PlusToken}}, - {'/', {"/", LexerTokenType::DivideToken}}, - {'*', {"*", LexerTokenType::MultiplyToken}}, - {'-', {"-", LexerTokenType::MinusToken}}}; + static const std::unordered_map> singleCharTokens = { + {'\0', {"\0", LexerTokenType::Eof}}, + {'\n', {"\\n", LexerTokenType::Newline}}, + {'\t', {"\t", LexerTokenType::Tab}}, + {'(', {"(", LexerTokenType::ParenOpen}}, + {')', {")", LexerTokenType::ParenClose}}, + {'{', {"{", LexerTokenType::BracesOpen}}, + {'}', {"}", LexerTokenType::BracesClose}}, + {'+', {"+", LexerTokenType::PlusToken}}, + {'/', {"/", LexerTokenType::DivideToken}}, + {'*', {"*", LexerTokenType::MultiplyToken}}, + {'-', {"-", LexerTokenType::MinusToken}}}; // Check for single-character tokens if (auto it = singleCharTokens.find(nchar); it != singleCharTokens.end()) @@ -126,7 +127,7 @@ class Lexer { if (next_char() == '=') return {data.substr(startPos, 2), location, LexerTokenType::NotEqualToken}; - throw Error("Lexical Error- Unexpected character after '!' at line ", location, ErrorType::LEXICAL); + throw Error("Lexical Error- Unexpected character after '!' ", location, ErrorType::LEXICAL); } if (nchar == '>') @@ -159,36 +160,39 @@ class Lexer if (nchar == '"') { size_t count = 1; - auto t = next_char(); - while (t != '"') { + auto t = next_char(); + while (t != '"') + { if (t == '\0' || t == '\n') throw Error("Unclosed string literal", location, ErrorType::LEXICAL); count++; t = next_char(); - } + } return {data.substr(startPos, count + 1), location, LexerTokenType::StringToken}; } if (!(std::isalpha(nchar) || std::isdigit(nchar))) { - throw Error(" Lexical Error- Unknown character at line ", location, ErrorType::LEXICAL); + throw Error(" Lexical Error- Unknown character", location, ErrorType::LEXICAL); } // Handle numeric and keyword tokens auto substr = next_valid_sequences(startPos); - if (std::all_of(substr.begin(), substr.end(), - [](char c) { return std::isdigit(c) || c == '.'; })) + if (std::all_of(substr.begin(), substr.end(), [](char c) { return std::isdigit(c) || c == '.'; })) { - return {substr, location, - substr.find('.') == std::string::npos ? LexerTokenType::IntToken - : LexerTokenType::FloatToken}; + return {substr, + location, + substr.find('.') == std::string::npos ? LexerTokenType::IntToken : LexerTokenType::FloatToken}; } // Map of keyword tokens static const std::unordered_map keywords = { - {"print", LexerTokenType::PrintToken}, {"Print", LexerTokenType::PrintToken}, - {"if", LexerTokenType::IfToken}, {"else", LexerTokenType::ElseToken}, - {"true", LexerTokenType::BoolToken}, {"false", LexerTokenType::BoolToken}}; + {"print", LexerTokenType::PrintToken}, + {"Print", LexerTokenType::PrintToken}, + {"if", LexerTokenType::IfToken}, + {"else", LexerTokenType::ElseToken}, + {"true", LexerTokenType::BoolToken}, + {"false", LexerTokenType::BoolToken}}; if (auto it = keywords.find(substr); it != keywords.end()) { diff --git a/CuriousX/Lexer/LexerToken.hpp b/CuriousX/Lexer/LexerToken.hpp index 96c643c..c947a55 100644 --- a/CuriousX/Lexer/LexerToken.hpp +++ b/CuriousX/Lexer/LexerToken.hpp @@ -1,7 +1,6 @@ #pragma once #include "SourceLocation.hpp" -#include "Error.hpp" #include enum class LexerTokenType diff --git a/CompilerUtils/Node.hpp b/CuriousX/Parser/Node.hpp similarity index 62% rename from CompilerUtils/Node.hpp rename to CuriousX/Parser/Node.hpp index 4c08a90..66824e5 100644 --- a/CompilerUtils/Node.hpp +++ b/CuriousX/Parser/Node.hpp @@ -15,20 +15,19 @@ class ASTNode { public: ASTNode(const LexerToken& token) : token(token) {} - virtual ~ASTNode() = default; + virtual ~ASTNode() = default; virtual NodeType getType() const = 0; - LexerToken token; + LexerToken token; }; class BinaryNode : public ASTNode { public: - BinaryNode(std::unique_ptr left, std::unique_ptr right, - const LexerToken& token) + BinaryNode(std::unique_ptr left, std::unique_ptr right, const LexerToken& token) : ASTNode(token), left(std::move(left)), right(std::move(right)) { } - NodeType getType() const override { return NodeType::BinaryOperation; } + NodeType getType() const override { return NodeType::BinaryOperation; } std::unique_ptr left; std::unique_ptr right; }; @@ -40,46 +39,43 @@ class TreeNode : public ASTNode : ASTNode(token), children(std::move(children)) { } - NodeType getType() const override { return NodeType::BlockOperation; } + NodeType getType() const override { return NodeType::BlockOperation; } std::vector> children; }; class ConditionalNode : public ASTNode { public: - ConditionalNode(std::unique_ptr condition, std::unique_ptr ifNode, - std::unique_ptr elseNode, const LexerToken& token) - : ASTNode(token), condition(std::move(condition)), ifNode(std::move(ifNode)), - elseNode(std::move(elseNode)) + ConditionalNode(std::unique_ptr condition, + std::unique_ptr ifNode, + std::unique_ptr elseNode, + const LexerToken& token) + : ASTNode(token), condition(std::move(condition)), ifNode(std::move(ifNode)), elseNode(std::move(elseNode)) { } - NodeType getType() const override { return NodeType::ConditionalOperation; } - std::unique_ptr condition; + NodeType getType() const override { return NodeType::ConditionalOperation; } + std::unique_ptr condition; std::unique_ptr ifNode; std::unique_ptr elseNode; }; - - class ASTNodeFactory { public: - static std::unique_ptr createBinaryNode(std::unique_ptr left, - std::unique_ptr right, - const LexerToken& token) + static std::unique_ptr + createBinaryNode(std::unique_ptr left, std::unique_ptr right, const LexerToken& token) { return std::make_unique(std::move(left), std::move(right), token); } - static std::unique_ptr createConditionalNode(std::unique_ptr condition, + static std::unique_ptr createConditionalNode(std::unique_ptr condition, std::unique_ptr ifNode, std::unique_ptr elseNode, - const LexerToken& token) + const LexerToken& token) { - return std::make_unique(std::move(condition), std::move(ifNode), - std::move(elseNode), token); + return std::make_unique(std::move(condition), std::move(ifNode), std::move(elseNode), token); } static std::unique_ptr createTreeNode(std::vector> children, - const LexerToken& token) + const LexerToken& token) { return std::make_unique(std::move(children), token); } diff --git a/CuriousX/Parser/Parser.cpp b/CuriousX/Parser/Parser.cpp index 7cbe3f0..1e263ce 100644 --- a/CuriousX/Parser/Parser.cpp +++ b/CuriousX/Parser/Parser.cpp @@ -1,35 +1,26 @@ #include "Parser.hpp" +#include +#include + +Parser::Parser(std::string_view data, CompilerOutput& output) + : m_lexer(std::make_unique(data)) + , m_prevToken({"Program", {0, 0}, LexerTokenType::ProgramToken}) + , m_output(output) -bool Parser::parseTokens() { - LexerToken token; - advanceToken(token); +} - while (token.type != LexerTokenType::Eof) - { - if (auto node = parseStatement(token)) - { - m_semantic.analyze(*node); - m_wasmgen.traverse(*node); - m_root->children.emplace_back(std::move(node)); - } - if (!expectNewlineOrEOF(token)) - { - throw Error("Expected new line before " + std::string(token.value) + " at ", - token.location, ErrorType::SYNTAX); - } - advancePastNewlines(token); - } - CompilerOutputParser::getInstance().setASTOutput(m_root, m_semantic.getSymbolTable()); - CompilerOutputParser::getInstance().codeOutput(m_wasmgen.getInstructions()); - return !m_root->children.empty(); +void Parser::addTokenToOutput(const LexerToken& token) +{ + m_output.getJson()["Lexer"].push_back( + {{"type", toString(token.type)}, {"value", token.value}, {"location", token.location.toString()}}); } +// Helper methods bool Parser::expectNewlineOrEOF(const LexerToken& token) const { return token.type == LexerTokenType::Newline || token.type == LexerTokenType::Eof || - token.type == LexerTokenType::CommentToken || - m_prevToken.type == LexerTokenType::Newline; + token.type == LexerTokenType::CommentToken || m_prevToken.type == LexerTokenType::Newline; } void Parser::advancePastNewlines(LexerToken& token) @@ -41,14 +32,19 @@ void Parser::advancePastNewlines(LexerToken& token) void Parser::advanceToken(LexerToken& token) { m_prevToken = token; - token = m_lexer->nextNWToken(); - CompilerOutputParser::getInstance().SetLexerOutput(token); + token = m_lexer->nextNWToken(); + addTokenToOutput(token); } -std::unique_ptr Parser::parseStatement(LexerToken& token) +bool Parser::isValidFactorStart(LexerTokenType type) { - std::shared_ptr Node; + return type == LexerTokenType::VarToken || type == LexerTokenType::FloatToken || type == LexerTokenType::IntToken || + type == LexerTokenType::StringToken || type == LexerTokenType::BoolToken; +} +// Parsing methods +std::unique_ptr Parser::parseStatement(LexerToken& token) +{ switch (token.type) { case LexerTokenType::CommentToken: @@ -64,11 +60,6 @@ std::unique_ptr Parser::parseStatement(LexerToken& token) } } -/** - * @ parseExpression is made of sum of Terms - * - * E -> T+E || T-E || T - */ std::unique_ptr Parser::parseExpression(LexerToken& token) { auto left = parseTerm(token); @@ -77,16 +68,11 @@ std::unique_ptr Parser::parseExpression(LexerToken& token) auto op = token; advanceToken(token); auto right = parseTerm(token); - left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); + left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); } return left; } -/** - * @ parseTerm is a product of factors - * - * T -> F*T || F/T || F - */ std::unique_ptr Parser::parseTerm(LexerToken& token) { auto left = parseFactor(token); @@ -103,7 +89,7 @@ std::unique_ptr Parser::parseTerm(LexerToken& token) auto op = token; advanceToken(token); auto right = parseFactor(token); - left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); + left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); advanceToken(token); } return left; @@ -119,11 +105,6 @@ std::unique_ptr Parser::parseAssignment(std::unique_ptr& left, return ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), type); } -/** - * @ parseFactor is a number, string or parenthesized sub expression - * - * F -> ID || Integer || E - */ std::unique_ptr Parser::parseFactor(LexerToken& token) { if (isValidFactorStart(token.type)) @@ -141,46 +122,33 @@ std::unique_ptr Parser::parseFactor(LexerToken& token) return expr; } + handleUnexpectedToken(token); + return nullptr; +} + +void Parser::handleUnexpectedToken(const LexerToken& token) +{ switch (token.type) { case LexerTokenType::ElseToken: - throw Error("Unexpected 'else' keyword. 'else' must be preceded by 'if'", token.location, - ErrorType::SYNTAX); - + throw Error("Unexpected 'else' keyword. 'else' must be preceded by 'if'", token.location, ErrorType::SYNTAX); case LexerTokenType::ParenClose: - throw Error("Unexpected closing parenthesis ')'. Did you forget an opening parenthesis or " - "is this an empty parenthesis? At ", - token.location, ErrorType::SYNTAX); - + throw Error("Unexpected closing parenthesis ')'", token.location, ErrorType::SYNTAX); case LexerTokenType::Eof: - throw Error("Unexpected end of file. Expression is incomplete", token.location, - ErrorType::SYNTAX); - + throw Error("Unexpected end of file. Expression is incomplete", token.location, ErrorType::SYNTAX); case LexerTokenType::AssignToken: - throw Error("Assignment is not allowed within print statement", token.location, - ErrorType::SYNTAX); - + throw Error("Assignment is not allowed within print statement", token.location, ErrorType::SYNTAX); default: - throw Error("Unexpected token '" + std::string(token.value) + - "' in factor. Expected a value, variable, or '('", - token.location, ErrorType::SYNTAX); + throw Error("Unexpected token '" + std::string(token.value) + "' in factor", token.location, ErrorType::SYNTAX); } } -bool Parser::isValidFactorStart(LexerTokenType type) -{ - return type == LexerTokenType::VarToken || type == LexerTokenType::FloatToken || - type == LexerTokenType::IntToken || type == LexerTokenType::StringToken || - type == LexerTokenType::BoolToken; -} - std::unique_ptr Parser::parseConditional(LexerToken& token) { - if (m_prevToken.type != LexerTokenType::Newline && - m_prevToken.type != LexerTokenType::ProgramToken) + if (m_prevToken.type != LexerTokenType::Newline && m_prevToken.type != LexerTokenType::ProgramToken) { - throw Error("'if' statement cannot start a program and must start on a new line", - token.location, ErrorType::SYNTAX); + throw Error( + "'if' statement cannot start a program and must start on a new line", token.location, ErrorType::SYNTAX); } // condition @@ -202,8 +170,7 @@ std::unique_ptr Parser::parseConditional(LexerToken& token) elseBlock = parseBlock(token, {"Else", {0, 0}, LexerTokenType::ElseToken}); } - return ASTNodeFactory::createConditionalNode(std::move(cond), std::move(then), - std::move(elseBlock), op); + return ASTNodeFactory::createConditionalNode(std::move(cond), std::move(then), std::move(elseBlock), op); } std::unique_ptr Parser::parseComparisonExpression(LexerToken& token) @@ -215,14 +182,13 @@ std::unique_ptr Parser::parseComparisonExpression(LexerToken& token) auto left = parseExpression(token); if (token.type == LexerTokenType::GreaterToken || token.type == LexerTokenType::LessToken || - token.type == LexerTokenType::GreaterEqualToken || - token.type == LexerTokenType::LessEqualToken || token.type == LexerTokenType::EqualToken || - token.type == LexerTokenType::NotEqualToken) + token.type == LexerTokenType::GreaterEqualToken || token.type == LexerTokenType::LessEqualToken || + token.type == LexerTokenType::EqualToken || token.type == LexerTokenType::NotEqualToken) { auto op = token; advanceToken(token); auto right = parseExpression(token); - left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); + left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); } if (token.type != LexerTokenType::ParenClose) @@ -236,7 +202,7 @@ std::unique_ptr Parser::parseBlock(LexerToken& token, LexerToken what) advanceToken(token); std::vector> statements; - LexerToken blockToken = what; + LexerToken blockToken = what; if (token.type != LexerTokenType::BracesOpen) throw Error("Expected opening braces for block", token.location, ErrorType::SYNTAX); @@ -265,28 +231,20 @@ std::unique_ptr Parser::parseBlock(LexerToken& token, LexerToken what) return ASTNodeFactory::createTreeNode(std::move(statements), blockToken); } -/** - * @ Print out a number, string, variable or even expression - * - * Print -> ID || Integer || E || String - */ - std::unique_ptr Parser::parsePrintStatement(LexerToken& token) { auto printToken = token; advanceToken(token); // Consume 'print' token if (token.type != LexerTokenType::ParenOpen) - throw Error("Expected opening parenthesis after 'print'", token.location, - ErrorType::SYNTAX); + throw Error("Expected opening parenthesis after 'print'", token.location, ErrorType::SYNTAX); advanceToken(token); // Consume '(' auto expression = parsePrintExpression(token); if (token.type != LexerTokenType::ParenClose) - throw Error("Expected closing parenthesis after print expression", token.location, - ErrorType::SYNTAX); + throw Error("Expected closing parenthesis after print expression", token.location, ErrorType::SYNTAX); advanceToken(token); // Consume ')' @@ -302,14 +260,13 @@ std::unique_ptr Parser::parsePrintExpression(LexerToken& token) while (token.type == LexerTokenType::PlusToken || token.type == LexerTokenType::MinusToken || token.type == LexerTokenType::GreaterToken || token.type == LexerTokenType::LessToken || - token.type == LexerTokenType::GreaterEqualToken || - token.type == LexerTokenType::LessEqualToken || + token.type == LexerTokenType::GreaterEqualToken || token.type == LexerTokenType::LessEqualToken || token.type == LexerTokenType::EqualToken || token.type == LexerTokenType::NotEqualToken) { auto op = token; advanceToken(token); auto right = parseTerm(token); - left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); + left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); } return left; @@ -321,16 +278,92 @@ std::unique_ptr Parser::parsePrintTerm(LexerToken& token) advanceToken(token); if (token.type == LexerTokenType::AssignToken) - throw Error("Assignment is not allowed within print statement", token.location, - ErrorType::SYNTAX); + throw Error("Assignment is not allowed within print statement", token.location, ErrorType::SYNTAX); while (token.type == LexerTokenType::MultiplyToken || token.type == LexerTokenType::DivideToken) { auto op = token; advanceToken(token); auto right = parseFactor(token); - left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); + left = ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), op); } return left; } + +// Json output methods + +void Parser::addASTToOutput(const std::unique_ptr& root) +{ + m_output.getJson()["AST"] = nodeToJson(root.get()); +} + +nlohmann::json Parser::nodeToJson(const ASTNode* node) +{ + if (!node) + return nullptr; + + nlohmann::json j = {{"type", getNodeTypeName(node->getType())}, + {"token", + {{"type", toString(node->token.type)}, + {"value", node->token.value}, + {"location", node->token.location.toString()}}}}; + + switch (node->getType()) + { + case NodeType::BinaryOperation: + jsonifyBinaryNode(j, static_cast(node)); + break; + case NodeType::ConditionalOperation: + jsonifyConditionalNode(j, static_cast(node)); + break; + case NodeType::BlockOperation: + jsonifyBlockNode(j, static_cast(node)); + break; + default: + // No additional processing needed for unknown types + break; + } + + return j; +} + +void Parser::jsonifyBinaryNode(nlohmann::json& j, const BinaryNode* node) +{ + j["left"] = nodeToJson(node->left.get()); + j["right"] = nodeToJson(node->right.get()); +} + +void Parser::jsonifyConditionalNode(nlohmann::json& j, const ConditionalNode* node) +{ + j["condition"] = nodeToJson(node->condition.get()); + j["ifNode"] = nodeToJson(node->ifNode.get()); + if (node->elseNode) + { + j["elseNode"] = nodeToJson(node->elseNode.get()); + } +} + +void Parser::jsonifyBlockNode(nlohmann::json& j, const TreeNode* node) +{ + j["children"] = nlohmann::json::array(); + for (const auto& child : node->children) + { + j["children"].push_back(nodeToJson(child.get())); + } +} + +std::string Parser::getNodeTypeName(NodeType type) +{ + switch (type) + { + case NodeType::BinaryOperation: + return "BinaryOperation"; + case NodeType::ConditionalOperation: + return "ConditionalOperation"; + case NodeType::BlockOperation: + return "BlockOperation"; + default: + return "Unknown"; + } +} \ No newline at end of file diff --git a/CuriousX/Parser/Parser.hpp b/CuriousX/Parser/Parser.hpp index 45bde88..b4a792d 100644 --- a/CuriousX/Parser/Parser.hpp +++ b/CuriousX/Parser/Parser.hpp @@ -1,43 +1,49 @@ #pragma once -#include "Semantic.hpp" -#include "Codegen.hpp" +#include "CompilerOutput.hpp" +#include "Node.hpp" +#include +#include #include class Parser { public: - explicit Parser(std::string_view data) - : m_lexer(std::make_unique(data)), - m_root(ASTNodeFactory::createTreeNode({}, - {"Program", {0, 0}, LexerTokenType::ProgramToken})), - m_prevToken({"Program", {0, 0}, LexerTokenType::ProgramToken}), m_semantic(), m_wasmgen() - { - } - - bool parseTokens(); + explicit Parser(std::string_view data, CompilerOutput& output); std::unique_ptr parseStatement(LexerToken& token); - std::unique_ptr parseFactor(LexerToken& token); - std::unique_ptr parseExpression(LexerToken& token); - std::unique_ptr parseTerm(LexerToken& token); - std::unique_ptr parseConditional(LexerToken& token); - std::unique_ptr parseAssignment(std::unique_ptr& left, LexerToken& token); - std::unique_ptr parseComparisonExpression(LexerToken& token); - std::unique_ptr parseBlock(LexerToken& token, LexerToken what); - std::unique_ptr parsePrintStatement(LexerToken& token); - std::unique_ptr parsePrintExpression(LexerToken& token); - std::unique_ptr parsePrintTerm(LexerToken& token); + void advanceToken(LexerToken& token); + bool expectNewlineOrEOF(const LexerToken& token) const; + void advancePastNewlines(LexerToken& token); + void addASTToOutput(const std::unique_ptr& root); private: + // Parsing methods + std::unique_ptr parseExpression(LexerToken& token); + std::unique_ptr parseTerm(LexerToken& token); + std::unique_ptr parseFactor(LexerToken& token); + std::unique_ptr parseConditional(LexerToken& token); + std::unique_ptr parseAssignment(std::unique_ptr& left, LexerToken& token); + std::unique_ptr parseComparisonExpression(LexerToken& token); + std::unique_ptr parseBlock(LexerToken& token, LexerToken what); + std::unique_ptr parsePrintStatement(LexerToken& token); + std::unique_ptr parsePrintExpression(LexerToken& token); + std::unique_ptr parsePrintTerm(LexerToken& token); + + // Helper methods bool isValidFactorStart(LexerTokenType type); - void advanceToken(LexerToken& token); - bool expectNewlineOrEOF(const LexerToken& token) const; - void advancePastNewlines(LexerToken& token); + void handleUnexpectedToken(const LexerToken& token); + + // Json functions + nlohmann::json nodeToJson(const ASTNode* node); + void jsonifyBlockNode(nlohmann::json& j, const TreeNode* node); + void jsonifyConditionalNode(nlohmann::json& j, const ConditionalNode* node); + void jsonifyBinaryNode(nlohmann::json& j, const BinaryNode* node); + std::string getNodeTypeName(NodeType type); + void addTokenToOutput(const LexerToken& token); + // Member variables std::unique_ptr m_lexer; - std::unique_ptr m_root; - LexerToken m_prevToken; - Semantic m_semantic; - WasmGen m_wasmgen; + LexerToken m_prevToken; + CompilerOutput& m_output; }; \ No newline at end of file diff --git a/CuriousX/Gen/Readme.md b/CuriousX/Semantic/Readme.md similarity index 100% rename from CuriousX/Gen/Readme.md rename to CuriousX/Semantic/Readme.md diff --git a/CuriousX/Semantic/Semantic.cpp b/CuriousX/Semantic/Semantic.cpp index b35a94e..ce91836 100644 --- a/CuriousX/Semantic/Semantic.cpp +++ b/CuriousX/Semantic/Semantic.cpp @@ -1,11 +1,9 @@ #include "Semantic.hpp" #include #include +#include -// if can start a program ? -// division by zero - -bool Semantic::analyze(const ASTNode& node) +void Semantic::analyzeTree(const ASTNode& node) { switch (node.getType()) { @@ -19,14 +17,13 @@ bool Semantic::analyze(const ASTNode& node) analyzeBlockOperation(static_cast(node)); break; default: + throw Error("Unexpected node type", node.token.location, ErrorType::SEMANTIC); break; } - return true; } void Semantic::analyzeBinaryOperation(const BinaryNode& node) { - flag = false; if (node.token.type == LexerTokenType::AssignToken) { analyzeAssignment(node); @@ -47,20 +44,20 @@ void Semantic::analyzeAssignment(const BinaryNode& node) { throw Error("Invalid assignment: left side must be a variable", node.left->token.location); } - const std::string& varName = std::string(node.left->token.value); - InferredType rightType = inferType(*node.right); - if (ScopedSymbolTable::getInstance().contains(varName)) + const std::string& varName = getVariableName(*node.left); + InferredType rightType = inferType(*node.right); + + auto& symbolTable = ScopedSymbolTable::getInstance(); + + if (symbolTable.contains(varName)) { - auto existingType = ScopedSymbolTable::getInstance().lookup(varName); - if (existingType != rightType) - { - throw Error("Type mismatch in assignment", node.left->token.location); - } + auto existingType = symbolTable.lookup(varName); + ensureTypeMatch(existingType.value(), rightType, node.left->token); } else { - ScopedSymbolTable::getInstance().insert(varName, rightType, node.left->token); + symbolTable.insert(varName, rightType, node.left->token); } } @@ -82,7 +79,6 @@ InferredType Semantic::inferType(const ASTNode& node) case LexerTokenType::MinusToken: case LexerTokenType::DivideToken: case LexerTokenType::MultiplyToken: - return inferTypeFromOperation(static_cast(node)); case LexerTokenType::EqualToken: case LexerTokenType::NotEqualToken: case LexerTokenType::GreaterEqualToken: @@ -95,6 +91,44 @@ InferredType Semantic::inferType(const ASTNode& node) } } +void Semantic::analyzeExpression(const BinaryNode& node) +{ + if (node.left && node.right) + { + ensureTypeMatch(inferType(*node.left), inferType(*node.right), node.token); + } + + if (!containsNonLiteral(node)) + { + throw Error("Literal expressions without effect are not allowed", node.token.location, ErrorType::SEMANTIC); + } +} + +bool Semantic::containsNonLiteral(const ASTNode& node) const +{ + if (node.token.type == LexerTokenType::VarToken) { + return true; + } + + if (node.getType() == NodeType::BinaryOperation) { + const auto* binaryNode = dynamic_cast(&node); + + if (binaryNode) { + bool leftHasNonLiteral = binaryNode->left && containsNonLiteral(*binaryNode->left); + bool rightHasNonLiteral = binaryNode->right && containsNonLiteral(*binaryNode->right); + return leftHasNonLiteral || rightHasNonLiteral; + } + } + return false; +} + +void Semantic::ensureTypeMatch(InferredType left, InferredType right, const LexerToken& token) const +{ + if (left != right) + { + throw Error("Type mismatch in operation", token.location, ErrorType::SEMANTIC); + } +} InferredType Semantic::inferTypeFromVariable(const ASTNode& node) { auto type = ScopedSymbolTable::getInstance().lookup(std::string(node.token.value)); @@ -102,41 +136,37 @@ InferredType Semantic::inferTypeFromVariable(const ASTNode& node) { throw Error("Variable not defined", node.token.location, ErrorType::SEMANTIC); } - flag = true; - return *type; } +bool Semantic::isComparisonOp(const BinaryNode& node) +{ + return (node.token.type == LexerTokenType::LessEqualToken || node.token.type == LexerTokenType::LessToken || + node.token.type == LexerTokenType::GreaterEqualToken || node.token.type == LexerTokenType::GreaterToken); +} + InferredType Semantic::inferTypeFromOperation(const BinaryNode& node) { if (!node.left || !node.right) { - throw Error("Unbalanced expression, missing operand", node.token.location, - ErrorType::SEMANTIC); + throw Error("Unbalanced expression, missing operand", node.token.location, ErrorType::SEMANTIC); } - // // check division by zero - // if (node.token.type == LexerTokenType::DivideToken) checkDivisionByZero(*node.right); - - InferredType leftType = inferType(*node.left); - InferredType rightType = inferType(*node.right); - if (leftType != rightType) + if (node.token.type == LexerTokenType::DivideToken) { - throw Error("Type mismatch in operation", node.token.location, ErrorType::SEMANTIC); + checkDivisionByZero(*node.right); } - return leftType; -} -void Semantic::analyzeExpression(const BinaryNode& node) -{ + InferredType leftType = inferType(*node.left); InferredType rightType = inferType(*node.right); - InferredType leftType = inferType(*node.left); - if (rightType != leftType) + ensureTypeMatch(leftType, rightType, node.token); + if ((leftType == InferredType::BOOL && rightType == InferredType::BOOL) && isComparisonOp(node)) { - throw Error("Type mismatch in operation", node.token.location, ErrorType::SEMANTIC); + throw Error("Invalid operation: cannot compare boolean values using <, >, <=, or >=", + node.token.location, + ErrorType::SEMANTIC); } - if (!flag) - throw Error("literal Expressions not allowed", node.token.location, ErrorType::SEMANTIC); + return leftType; } void Semantic::analyzeConditionalOperation(const ConditionalNode& node) @@ -148,37 +178,141 @@ void Semantic::analyzeConditionalOperation(const ConditionalNode& node) inferType(*node.condition); analyzeBlockOperation(*node.ifNode); if (node.elseNode) + { analyzeBlockOperation(*node.elseNode); + } +} + +void Semantic::analyzeBlockOperation(const TreeNode& node) +{ + auto& symbolTable = ScopedSymbolTable::getInstance(); + symbolTable.enterScope(); + + if (node.token.type == LexerTokenType::PrintToken) + { + analyzePrintOperation(node); + } + else + { + + for (const auto& statement : node.children) + { + analyzeTree(*statement); + } + } + + symbolTable.exitScope(); } -bool Semantic::isValidConditionType(const LexerToken& token) +void Semantic::analyzePrintOperation(const TreeNode& node) { - return (token.type == LexerTokenType::EqualToken || - token.type == LexerTokenType::GreaterEqualToken || - token.type == LexerTokenType::GreaterToken || - token.type == LexerTokenType::LessEqualToken || + if (node.children.empty()) + { + throw Error("Print statement requires at least one argument", node.token.location, ErrorType::SEMANTIC); + } + + // Ensure each child of the print statement is a valid expression. + for (const auto& child : node.children) + { + analyzePrintExpression(*child); // Analyze the child expression. + } +} + +void Semantic::analyzePrintExpression(const ASTNode& node) +{ + + if (isSimpleLiteralOrVariable(node)) + { + return; + } + else if (node.getType() == NodeType::BinaryOperation) + { + const auto& binaryNode = static_cast(node); + if (binaryNode.left && binaryNode.right) + { + ensureTypeMatch(inferType(*binaryNode.left), inferType(*binaryNode.right), binaryNode.token); + } + } + else + { + throw Error("Invalid expression in print statement", node.token.location, ErrorType::SEMANTIC); + } +} + +bool Semantic::isSimpleLiteralOrVariable(const ASTNode& node) const +{ + return (node.token.type == LexerTokenType::IntToken || node.token.type == LexerTokenType::FloatToken || + node.token.type == LexerTokenType::StringToken || node.token.type == LexerTokenType::BoolToken || + node.token.type == LexerTokenType::VarToken); +} + +void Semantic::checkDivisionByZero(const ASTNode& node) +{ + if (node.token.type == LexerTokenType::IntToken && std::stoi(std::string(node.token.value)) == 0) + { + throw Error("Division by zero", node.token.location, ErrorType::SEMANTIC); + } + else if (node.token.type == LexerTokenType::FloatToken && + std::abs(std::stof(std::string(node.token.value))) < std::numeric_limits::epsilon()) + { + throw Error("Division by zero", node.token.location, ErrorType::SEMANTIC); + } +} + +bool Semantic::isValidConditionType(const LexerToken& token) const +{ + return (token.type == LexerTokenType::EqualToken || token.type == LexerTokenType::GreaterEqualToken || + token.type == LexerTokenType::GreaterToken || token.type == LexerTokenType::LessEqualToken || token.type == LexerTokenType::LessToken || token.type == LexerTokenType::NotEqualToken); } -bool Semantic::isValidBinaryType(const LexerToken& token) +bool Semantic::isValidBinaryType(const LexerToken& token) const { return (token.type == LexerTokenType::PlusToken || token.type == LexerTokenType::MinusToken || - token.type == LexerTokenType::MultiplyToken || - token.type == LexerTokenType::DivideToken); + token.type == LexerTokenType::MultiplyToken || token.type == LexerTokenType::DivideToken); } -void Semantic::analyzeBlockOperation(const TreeNode& node) +std::string Semantic::getVariableName(const ASTNode& node) const +{ + return std::string(node.token.value); +} + +nlohmann::json Semantic::tableToJson(const symbolTable& table) { - ScopedSymbolTable::getInstance().enterScope(); + nlohmann::json jArray = nlohmann::json::array(); + for (const auto& scope : table) + { + nlohmann::json scopeJson = nlohmann::json::object(); + for (const auto& [name, info] : scope) + { + scopeJson[name] = { + {"type", getInferredTypeDescription(info.type)}, + {"value", name}, + }; + } + jArray.push_back(scopeJson); + } + return jArray; +} - for (const auto& statement : node.children) +constexpr std::string_view Semantic::getInferredTypeDescription(const InferredType& t) +{ + switch (t) { - analyze(*statement); + case InferredType::BOOL: + return "Boolean"; + case InferredType::FLOAT: + return "Float"; + case InferredType::INTEGER: + return "Integer"; + case InferredType::STRING: + return "String"; + default: + return "An unknown error occurred"; } - ScopedSymbolTable::getInstance().exitScope(); } -const std::vector> Semantic::getSymbolTable() +void Semantic::addSymbolTableToOutput() { - return ScopedSymbolTable::getInstance().getSymbolTable(); + m_output.getJson()["SymbolTable"] = tableToJson(ScopedSymbolTable::getInstance().getSymbolTable()); } \ No newline at end of file diff --git a/CuriousX/Semantic/Semantic.hpp b/CuriousX/Semantic/Semantic.hpp index a7896d1..9b2a8fc 100644 --- a/CuriousX/Semantic/Semantic.hpp +++ b/CuriousX/Semantic/Semantic.hpp @@ -1,33 +1,46 @@ #pragma once -#include "CompilerOutputParser.hpp" -#include +#include "CompilerOutput.hpp" +#include "SymbolTable.hpp" class Semantic { - public: - explicit Semantic() : flag(false) {} + explicit Semantic(CompilerOutput& output) : m_output(output) {} - bool analyze(const ASTNode& node); + void analyzeTree(const ASTNode& node); + void addSymbolTableToOutput(); - void analyzeAssignment(const BinaryNode& node); - void analyzeExpression(const BinaryNode& node); + private: + // Analysis methods void analyzeBinaryOperation(const BinaryNode& node); void analyzeConditionalOperation(const ConditionalNode& node); + void analyzeBlockOperation(const TreeNode& node); + void analyzeAssignment(const BinaryNode& node); + void analyzeExpression(const BinaryNode& node); + void analyzePrintOperation(const TreeNode& node); + void analyzePrintExpression(const ASTNode& node); + - void checkDivisionByZero(const ASTNode& node); - + // Type inference methods InferredType inferType(const ASTNode& node); InferredType inferTypeFromVariable(const ASTNode& node); InferredType inferTypeFromOperation(const BinaryNode& node); - bool isValidConditionType(const LexerToken& type); - void analyzeBlockOperation(const TreeNode& node); - bool isValidBinaryType(const LexerToken& token); - - const std::vector> getSymbolTable(); - - private: - bool flag; -}; + // Validation methods + void checkDivisionByZero(const ASTNode& node); + bool isValidConditionType(const LexerToken& token) const; + bool isValidBinaryType(const LexerToken& token) const; + bool containsNonLiteral(const ASTNode& node) const; + bool isSimpleLiteralOrVariable(const ASTNode& node) const; + void ensureTypeMatch(InferredType left, InferredType right, const LexerToken& token) const; + + // Helper methods + std::string getVariableName(const ASTNode& node) const; + nlohmann::json tableToJson(const symbolTable& table); + constexpr std::string_view getInferredTypeDescription(const InferredType& t); + bool isComparisonOp(const BinaryNode& node); + + // Member variables + CompilerOutput& m_output; +}; \ No newline at end of file diff --git a/CompilerUtils/SymbolTable.hpp b/CuriousX/Semantic/SymbolTable.hpp similarity index 80% rename from CompilerUtils/SymbolTable.hpp rename to CuriousX/Semantic/SymbolTable.hpp index c47eec3..a41a3c8 100644 --- a/CompilerUtils/SymbolTable.hpp +++ b/CuriousX/Semantic/SymbolTable.hpp @@ -6,6 +6,8 @@ #include "Node.hpp" + + enum class InferredType { INTEGER, @@ -17,19 +19,21 @@ enum class InferredType struct SymbolInfo { InferredType type; - LexerToken token; + LexerToken token; }; +using symbolTable = std::vector>; + class ScopedSymbolTable { private: - std::vector> scopes; - int currentScopeLevel; + symbolTable scopes; + int currentScopeLevel; ScopedSymbolTable() : currentScopeLevel(-1) { enterScope(); // Create global scope } - ScopedSymbolTable(const ScopedSymbolTable&) = delete; + ScopedSymbolTable(const ScopedSymbolTable&) = delete; ScopedSymbolTable& operator=(const ScopedSymbolTable&) = delete; public: @@ -106,13 +110,7 @@ class ScopedSymbolTable return std::nullopt; } - bool isFloatType(std::string_view varName) const - { - return lookup(std::string(varName)) == InferredType::FLOAT; - } + bool isFloatType(std::string_view varName) const { return lookup(std::string(varName)) == InferredType::FLOAT; } - const std::vector> getSymbolTable() - { - return scopes; - } + const symbolTable getSymbolTable() { return scopes; } }; \ No newline at end of file diff --git a/CuriousX/main.cpp b/CuriousX/main.cpp index d582ea1..03de393 100644 --- a/CuriousX/main.cpp +++ b/CuriousX/main.cpp @@ -1,27 +1,15 @@ -#include "Parser.hpp" -#include +#include "Compiler.hpp" +#include "CompilerOutput.hpp" #include - - +#include std::string processFileContent(const std::string& content) { - std::ostringstream output; - try - { - Parser parse(content); - parse.parseTokens(); - - output << CompilerOutputParser::getInstance().getJson(); - } - catch (const Error& ex) - { - CompilerOutputParser::getInstance().setErrorOutput(ex); - output << CompilerOutputParser::getInstance().getJson(); - std::cout< " << std::endl; } - try - { - std::string jsonString = processFileContent(CompilerOutputParser::getInstance().readInputFile(argv[1])); - CompilerOutputParser::getInstance().formatTokens(jsonString, argv[2]); - std::cout< + +class CompilerIntegrationTest : public ::testing::Test +{ + protected: + CompilerOutput output; + + bool compile(const std::string& source) + { + Compiler compiler(source, output); + return compiler.compile(); + } +}; + +TEST_F(CompilerIntegrationTest, SimpleProgram) +{ + EXPECT_FALSE(compile(R"( +x = 42 +if x > 0: + y = x + 10 + print(y) +)")); +} + +TEST_F(CompilerIntegrationTest, ComplexExpression) +{ + EXPECT_TRUE(compile(R"( +result = 10 * (20 + 30) / 2 +)")); +} diff --git a/tests/codegen_test.cpp b/tests/codegen_test.cpp new file mode 100644 index 0000000..2a15c9e --- /dev/null +++ b/tests/codegen_test.cpp @@ -0,0 +1,142 @@ +#include +#include "Lexer/Lexer.hpp" +#include "Parser/Parser.hpp" +#include "Semantic/Semantic.hpp" +#include "CompilerOutput.hpp" +#include "Generation/Codegen.hpp" +#include + + +class WasmGenTest : public ::testing::Test { +protected: + CompilerOutput output; + std::unique_ptr generator; + + void SetUp() override { + generator = std::make_unique(output); // Pass output to constructor + } + + // Helper for creating leaf nodes + std::unique_ptr createLeafNode(std::string_view value, LexerTokenType type) { + return ASTNodeFactory::createBinaryNode( + nullptr, + nullptr, + LexerToken{value, {0, 0}, type} + ); + } + + void verifyInstructions(const std::vector& actual, + const std::vector>& expected) { + ASSERT_EQ(actual.size(), expected.size()) << "Instruction count mismatch"; + + for (size_t i = 0; i < actual.size(); ++i) { + EXPECT_EQ(actual[i].instruction, expected[i].first) + << "Instruction mismatch at position " << i; + if (!expected[i].second.empty()) { + EXPECT_TRUE(actual[i].has_data) + << "Missing data at position " << i; + EXPECT_EQ(actual[i].data, expected[i].second) + << "Data mismatch at position " << i + << "\nExpected: '" << expected[i].second + << "'\nActual: '" << actual[i].data << "'"; + } + } + } +}; + +TEST_F(WasmGenTest, IntegerArithmetic) { + // Create: 5 + 3 + auto left = createLeafNode("5", LexerTokenType::IntToken); + auto right = createLeafNode("3", LexerTokenType::IntToken); + auto node = ASTNodeFactory::createBinaryNode( + std::move(left), + std::move(right), + LexerToken{"+", {0, 0}, LexerTokenType::PlusToken} + ); + + generator->generate(*node); + auto instructions = generator->getInstructions(); + + std::vector> expected = { + {WasmInstruction::I32Const, "5"}, + {WasmInstruction::I32Const, "3"}, + {WasmInstruction::I32Add, ""} + }; + verifyInstructions(instructions, expected); +} + +TEST_F(WasmGenTest, FloatArithmetic) { + auto left = createLeafNode("5.0", LexerTokenType::FloatToken); + auto right = createLeafNode("3.0", LexerTokenType::FloatToken); + auto node = ASTNodeFactory::createBinaryNode( + std::move(left), + std::move(right), + LexerToken{"*", {0, 0}, LexerTokenType::MultiplyToken} + ); + + generator->generate(*node); + auto instructions = generator->getInstructions(); + + std::vector> expected = { + {WasmInstruction::F32Const, "5.0"}, + {WasmInstruction::F32Const, "3.0"}, + {WasmInstruction::F32Mul, ""} + }; + verifyInstructions(instructions, expected); +} + +TEST_F(WasmGenTest, VariableAssignment) { + auto left = createLeafNode("x", LexerTokenType::VarToken); + auto right = createLeafNode("42", LexerTokenType::IntToken); + auto node = ASTNodeFactory::createBinaryNode( + std::move(left), + std::move(right), + LexerToken{"=", {0, 0}, LexerTokenType::AssignToken} + ); + + generator->generate(*node); + auto instructions = generator->getInstructions(); + + std::vector> expected = { + {WasmInstruction::I32Const, "42"}, + {WasmInstruction::LocalSet, "0"} + }; + verifyInstructions(instructions, expected); +} + +TEST_F(WasmGenTest, IfStatement) { + // Create: if x > 5: print(x) + auto condition = ASTNodeFactory::createBinaryNode( + createLeafNode("x", LexerTokenType::VarToken), + createLeafNode("5", LexerTokenType::IntToken), + LexerToken{">", {0, 0}, LexerTokenType::GreaterToken} + ); + + std::vector> thenChildren; + thenChildren.push_back(createLeafNode("x", LexerTokenType::VarToken)); + auto thenBranch = ASTNodeFactory::createTreeNode( + std::move(thenChildren), + LexerToken{"print", {0, 0}, LexerTokenType::PrintToken} + ); + + auto ifNode = ASTNodeFactory::createConditionalNode( + std::move(condition), + std::move(thenBranch), + nullptr, + LexerToken{"if", {0, 0}, LexerTokenType::IfToken} + ); + + generator->generate(*ifNode); + auto instructions = generator->getInstructions(); + + std::vector> expected = { + {WasmInstruction::LocalGet, "0"}, + {WasmInstruction::I32Const, "5"}, + {WasmInstruction::I32GtS, ""}, + {WasmInstruction::If, ""}, + {WasmInstruction::LocalGet, "0"}, + {WasmInstruction::CallPrint, ""}, + {WasmInstruction::End, ""} + }; + verifyInstructions(instructions, expected); +} diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp new file mode 100644 index 0000000..b16d12f --- /dev/null +++ b/tests/lexer_test.cpp @@ -0,0 +1,102 @@ +#include "Lexer/Lexer.hpp" +#include "Parser/Parser.hpp" +#include + +std::vector tokenize(std::string_view input) +{ + Lexer lexer(input); + std::vector tokens; + for (auto token = lexer.nextNWToken(); token.type != LexerTokenType::Eof; token = lexer.nextNWToken()) + { + tokens.push_back(token); + } + return tokens; +} + +void expectToken(const LexerToken& token, LexerTokenType expectedType, std::string_view expectedValue) +{ + EXPECT_EQ(token.type, expectedType); + EXPECT_EQ(token.value, expectedValue); +} + +class LexerTest : public ::testing::Test +{ + protected: + void verifyTokenSequence(const std::vector& tokens, + const std::vector>& expected) + { + ASSERT_EQ(tokens.size(), expected.size()); + for (size_t i = 0; i < tokens.size(); ++i) + { + expectToken(tokens[i], expected[i].first, expected[i].second); + } + } +}; + +TEST_F(LexerTest, Numbers) +{ + auto tokens = tokenize(R"(42 3.14 -17 -2.5 "hello" true #comment)"); + + std::vector> expected = { + {LexerTokenType::IntToken, "42"}, + {LexerTokenType::FloatToken, "3.14"}, + {LexerTokenType::MinusToken, "-"}, + {LexerTokenType::IntToken, "17"}, + {LexerTokenType::MinusToken, "-"}, + {LexerTokenType::FloatToken, "2.5"}, + {LexerTokenType::StringToken, "\"hello\""}, + {LexerTokenType::BoolToken, "true"}, + {LexerTokenType::CommentToken, "#comment"}, + }; + + verifyTokenSequence(tokens, expected); +} + +TEST_F(LexerTest, Operators) +{ + auto tokens = tokenize("+-*/ == != < <= > >="); + + std::vector> expected = { + {LexerTokenType::PlusToken, "+"}, + {LexerTokenType::MinusToken, "-"}, + {LexerTokenType::MultiplyToken, "*"}, + {LexerTokenType::DivideToken, "/"}, + {LexerTokenType::EqualToken, "=="}, + {LexerTokenType::NotEqualToken, "!="}, + {LexerTokenType::LessToken, "<"}, + {LexerTokenType::LessEqualToken, "<="}, + {LexerTokenType::GreaterToken, ">"}, + {LexerTokenType::GreaterEqualToken, ">="}, + }; + + verifyTokenSequence(tokens, expected); +} + +TEST_F(LexerTest, Keywords) +{ + + auto tokens = tokenize(R"( +x = 42 +if (x >= 0){ +y = x + 10 + print(y) +})"); + + std::vector> expected = { + {LexerTokenType::Newline, "\\n"}, {LexerTokenType::VarToken, "x"}, + {LexerTokenType::AssignToken, "="}, {LexerTokenType::IntToken, "42"}, + {LexerTokenType::Newline, "\\n"}, {LexerTokenType::IfToken, "if"}, + {LexerTokenType::ParenOpen, "("}, {LexerTokenType::VarToken, "x"}, + {LexerTokenType::GreaterEqualToken, ">="}, {LexerTokenType::IntToken, "0"}, + {LexerTokenType::ParenClose, ")"}, {LexerTokenType::BracesOpen, "{"}, + {LexerTokenType::Newline, "\\n"}, {LexerTokenType::VarToken, "y"}, + {LexerTokenType::AssignToken, "="}, {LexerTokenType::VarToken, "x"}, + {LexerTokenType::PlusToken, "+"}, {LexerTokenType::IntToken, "10"}, + {LexerTokenType::Newline, "\\n"}, {LexerTokenType::PrintToken, "print"}, + {LexerTokenType::ParenOpen, "("}, {LexerTokenType::VarToken, "y"}, + {LexerTokenType::ParenClose, ")"}, {LexerTokenType::Newline, "\\n"}, + {LexerTokenType::BracesClose, "}"}, + }; + + verifyTokenSequence(tokens, expected); +} diff --git a/tests/parser_test.cpp b/tests/parser_test.cpp new file mode 100644 index 0000000..6d9c761 --- /dev/null +++ b/tests/parser_test.cpp @@ -0,0 +1,109 @@ +#include "Lexer/Lexer.hpp" +#include "Parser/Parser.hpp" +#include + +class ParserTest : public ::testing::Test +{ + protected: + std::unique_ptr createParser(std::string_view input) { return std::make_unique(input, output); } + CompilerOutput output; +}; + +// Binary Operations Tests +TEST_F(ParserTest, BinaryArithmetic) +{ + std::vector> ops = {{"x + 5", LexerTokenType::PlusToken}, + {"x - 5", LexerTokenType::MinusToken}, + {"x * 5", LexerTokenType::MultiplyToken}, + {"x / 5", LexerTokenType::DivideToken}}; + + for (const auto& [expr, tokenType] : ops) + { + auto parser = createParser(expr); + LexerToken token; + parser->advanceToken(token); + auto node = parser->parseStatement(token); + const BinaryNode& binNode = static_cast(*node); + + EXPECT_EQ(binNode.token.type, tokenType); + EXPECT_EQ(binNode.left->token.type, LexerTokenType::VarToken); + EXPECT_EQ(binNode.right->token.type, LexerTokenType::IntToken); + } +} + +TEST_F(ParserTest, CompoundAssignment) +{ + auto parser = createParser("result = x + y * z"); + LexerToken token; + parser->advanceToken(token); + auto node = parser->parseStatement(token); + const BinaryNode& assign = static_cast(*node); + + EXPECT_EQ(assign.token.type, LexerTokenType::AssignToken); + EXPECT_EQ(assign.left->token.type, LexerTokenType::VarToken); + EXPECT_EQ(assign.left->token.value, "result"); + + const BinaryNode& plus = static_cast(*assign.right); + EXPECT_EQ(plus.token.type, LexerTokenType::PlusToken); + EXPECT_EQ(plus.left->token.type, LexerTokenType::VarToken); + + const BinaryNode& mult = static_cast(*plus.right); + EXPECT_EQ(mult.token.type, LexerTokenType::MultiplyToken); + EXPECT_EQ(mult.left->token.type, LexerTokenType::VarToken); + EXPECT_EQ(mult.right->token.type, LexerTokenType::VarToken); +} + +// Print Node Tests +TEST_F(ParserTest, SimplePrint) +{ + auto parser = createParser("print(x)"); + LexerToken token; + parser->advanceToken(token); + auto node = parser->parseStatement(token); + const TreeNode& printNode = static_cast(*node); + + EXPECT_EQ(printNode.token.type, LexerTokenType::PrintToken); + EXPECT_EQ(printNode.children[0]->token.type, LexerTokenType::VarToken); +} + +TEST_F(ParserTest, PrintExpression) +{ + auto parser = createParser("print(x + y * z)"); + LexerToken token; + parser->advanceToken(token); + auto node = parser->parseStatement(token); + const TreeNode& printNode = static_cast(*node); + + EXPECT_EQ(printNode.token.type, LexerTokenType::PrintToken); + + const BinaryNode& plus = static_cast(*printNode.children[0]); + EXPECT_EQ(plus.token.type, LexerTokenType::PlusToken); + + const BinaryNode& mult = static_cast(*plus.right); + EXPECT_EQ(mult.token.type, LexerTokenType::MultiplyToken); +} + +// Error Cases +TEST_F(ParserTest, MissingCondition) +{ + auto parser = createParser("if:\n print(x)\n"); + LexerToken token; + parser->advanceToken(token); + EXPECT_THROW(parser->parseStatement(token), Error); +} + +TEST_F(ParserTest, MissingPrintExpression) +{ + auto parser = createParser("print()"); + LexerToken token; + parser->advanceToken(token); + EXPECT_THROW(parser->parseStatement(token), Error); +} + +TEST_F(ParserTest, InvalidBinaryOperation) +{ + auto parser = createParser("x + * y"); + LexerToken token; + parser->advanceToken(token); + EXPECT_THROW(parser->parseStatement(token), Error); +} \ No newline at end of file diff --git a/tests/semantic_test.cpp b/tests/semantic_test.cpp new file mode 100644 index 0000000..a99bfdf --- /dev/null +++ b/tests/semantic_test.cpp @@ -0,0 +1,92 @@ +#include "CompilerOutput.hpp" +#include "Lexer/Lexer.hpp" +#include "Parser/Parser.hpp" +#include "Semantic/Semantic.hpp" +#include + + +class SemanticTest : public ::testing::Test +{ + protected: + CompilerOutput output; + Semantic semantic{output}; + + std::unique_ptr createLeafNode(std::string_view value, LexerTokenType type) + { + return ASTNodeFactory::createBinaryNode(nullptr, nullptr, LexerToken{value, {0, 0}, type}); + } + + // Helper to create a token + LexerToken createToken(std::string_view value, LexerTokenType type) { return LexerToken{value, {0, 0}, type}; } + + // Helper for binary operations + std::unique_ptr createBinaryOperation(std::unique_ptr left, + std::unique_ptr right, + LexerTokenType opType, + std::string_view opValue) + { + return ASTNodeFactory::createBinaryNode(std::move(left), std::move(right), LexerToken{opValue, {0, 0}, opType}); + } +}; + +TEST_F(SemanticTest, ValidAssignment) +{ + auto left = createLeafNode("x", LexerTokenType::VarToken); + auto right = createLeafNode("42", LexerTokenType::IntToken); + auto node = ASTNodeFactory::createBinaryNode( + std::move(left), std::move(right), createToken("=", LexerTokenType::AssignToken)); + + EXPECT_NO_THROW(semantic.analyzeTree(*node)); +} + +TEST_F(SemanticTest, ValidArithmetic) +{ + auto left = createLeafNode("x", LexerTokenType::VarToken); + auto right = createLeafNode("5", LexerTokenType::IntToken); + auto node = createBinaryOperation(std::move(left), std::move(right), LexerTokenType::PlusToken, "+"); + + EXPECT_NO_THROW(semantic.analyzeTree(*node)); +} + +TEST_F(SemanticTest, TypeMismatch) +{ + auto left = createLeafNode("hello", LexerTokenType::StringToken); + auto right = createLeafNode("42", LexerTokenType::IntToken); + auto node = createBinaryOperation(std::move(left), std::move(right), LexerTokenType::PlusToken, "+"); + + EXPECT_THROW(semantic.analyzeTree(*node), Error); +} + +TEST_F(SemanticTest, ValidComparison) +{ + auto left = createLeafNode("x", LexerTokenType::VarToken); + auto right = createLeafNode("5", LexerTokenType::IntToken); + auto node = createBinaryOperation(std::move(left), std::move(right), LexerTokenType::GreaterToken, ">"); + + EXPECT_NO_THROW(semantic.analyzeTree(*node)); +} + +TEST_F(SemanticTest, ValidIfCondition) +{ + // Create condition: x > 5 + auto condLeft = createLeafNode("x", LexerTokenType::VarToken); + auto condRight = createLeafNode("5", LexerTokenType::IntToken); + auto condition = + createBinaryOperation(std::move(condLeft), std::move(condRight), LexerTokenType::GreaterToken, ">"); + + // Create then branch: y = 1 + std::vector> thenChildren; + auto assignLeft = createLeafNode("y", LexerTokenType::VarToken); + auto assignRight = createLeafNode("1", LexerTokenType::IntToken); + auto assignment = + createBinaryOperation(std::move(assignLeft), std::move(assignRight), LexerTokenType::AssignToken, "="); + thenChildren.push_back(std::move(assignment)); + + auto thenBranch = + ASTNodeFactory::createTreeNode(std::move(thenChildren), createToken("block", LexerTokenType::ProgramToken)); + + auto ifNode = ASTNodeFactory::createConditionalNode( + std::move(condition), std::move(thenBranch), nullptr, createToken("if", LexerTokenType::IfToken)); + + EXPECT_NO_THROW(semantic.analyzeTree(*ifNode)); +}