diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..f884711 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,138 @@ +# This file is autogenerated by maturin v1.5.1 +# To update, run +# +# maturin generate-ci github +# +name: CI + +on: + push: + branches: + - main + - master + tags: + - '*' + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + linux: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64 + - runner: ubuntu-latest + target: x86 + - runner: ubuntu-latest + target: aarch64 + - runner: ubuntu-latest + target: armv7 + - runner: ubuntu-latest + target: s390x + - runner: ubuntu-latest + target: ppc64le + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-linux-${{ matrix.platform.target }} + path: dist + + windows: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: windows-latest + target: x64 + - runner: windows-latest + target: x86 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + architecture: ${{ matrix.platform.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-windows-${{ matrix.platform.target }} + path: dist + + macos: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: macos-latest + target: x86_64 + - runner: macos-14 + target: aarch64 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-macos-${{ matrix.platform.target }} + path: dist + + sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist + - name: Upload sdist + uses: actions/upload-artifact@v4 + with: + name: wheels-sdist + path: dist + + release: + name: Release + runs-on: ubuntu-latest + if: "startsWith(github.ref, 'refs/tags/')" + needs: [linux, windows, macos, sdist] + steps: + - uses: actions/download-artifact@v4 + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + with: + command: upload + args: --non-interactive --skip-existing wheels-*/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c8f0442 --- /dev/null +++ b/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..e98a047 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,3137 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "async-stream" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "async-trait" +version = "0.1.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "axum" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" +dependencies = [ + "async-trait", + "axum-core", + "bitflags 1.3.2", + "bytes", + "futures-util", + "http", + "http-body", + "hyper", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "mime", + "rustversion", + "tower-layer", + "tower-service", +] + +[[package]] +name = "backtrace" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "bitpacking" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" +dependencies = [ + "crunchy", +] + +[[package]] +name = "blake3" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bstr" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + +[[package]] +name = "cc" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] + +[[package]] +name = "census" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "circular-queue" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d34327ead1c743a10db339de35fb58957564b99d248a67985c55638b22c59b5" +dependencies = [ + "version_check", +] + +[[package]] +name = "code_nav_devon" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-stream", + "async-trait", + "bincode", + "blake3", + "console-subscriber", + "erased-serde", + "expect-test", + "futures", + "futures-util", + "globset", + "histogram", + "hyperpolyglot", + "ignore", + "notify-debouncer-mini", + "once_cell", + "petgraph", + "phf 0.11.2", + "phf_codegen 0.11.2", + "pretty_assertions", + "pyo3", + "rand 0.8.5", + "regex", + "regex-syntax 0.6.29", + "select", + "serde", + "serde_json", + "serde_yaml", + "sha2", + "smallvec", + "tantivy", + "tantivy-columnar", + "tempdir", + "thiserror", + "tokio", + "tokio-stream", + "tracing", + "tracing-appender", + "tracing-subscriber", + "tree-sitter 0.20.10", + "tree-sitter-COBOL", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-go", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-md", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-r", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-typescript", + "zstd", + "zstd-safe", + "zstd-sys", +] + +[[package]] +name = "console-api" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2895653b4d9f1538a83970077cb01dfc77a4810524e51a110944688e916b18e" +dependencies = [ + "prost", + "prost-types", + "tonic", + "tracing-core", +] + +[[package]] +name = "console-subscriber" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4cf42660ac07fcebed809cfe561dd8730bcd35b075215e6479c516bcd0d11cb" +dependencies = [ + "console-api", + "crossbeam-channel", + "crossbeam-utils", + "futures", + "hdrhistogram", + "humantime", + "prost-types", + "serde", + "serde_json", + "thread_local", + "tokio", + "tokio-stream", + "tonic", + "tracing", + "tracing-core", + "tracing-subscriber", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", + "serde", +] + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "dissimilar" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f8e79d1fbf76bdfbde321e902714bf6c49df88a7dda6fc682fc2979226962d" + +[[package]] +name = "downcast-rs" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" + +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "erased-serde" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c138974f9d5e7fe373eb04df7cae98833802ae4b11c24ac7039a21d5af4b26c" +dependencies = [ + "serde", +] + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "expect-test" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e0be0a561335815e06dab7c62e50353134c796e7a6155402a64bcff66b6a5e0" +dependencies = [ + "dissimilar", + "once_cell", +] + +[[package]] +name = "fastdivide" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59668941c55e5c186b8b58c391629af56774ec768f73c08bbcd56f09348eb00b" + +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "windows-sys 0.52.0", +] + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "fs4" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eeb4ed9e12f43b7fa0baae3f9cdda28352770132ef2e09a23760c29cae8bd47" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "fsevent-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +dependencies = [ + "libc", +] + +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generator" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186014d53bc231d0090ef8d6f03e0920c54d85a5ed22f4f2f74315ec56cf83fb" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" + +[[package]] +name = "globset" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata 0.4.6", + "regex-syntax 0.8.3", +] + +[[package]] +name = "h2" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap 2.2.6", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hdrhistogram" +version = "7.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" +dependencies = [ + "base64", + "byteorder", + "flate2", + "nom", + "num-traits", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "histogram" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e673d137229619d5c2c8903b6ed5852b43636c0017ff2e66b1aafb8ccf04b80b" +dependencies = [ + "serde", + "thiserror", +] + +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "hyper" +version = "0.14.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-timeout" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" +dependencies = [ + "hyper", + "pin-project-lite", + "tokio", + "tokio-io-timeout", +] + +[[package]] +name = "hyperpolyglot" +version = "0.1.7" +source = "git+https://github.com/bloopai/hyperpolyglot#1bc3c2648c5c9a0a6ace85d2585f408b479f3190" +dependencies = [ + "ignore", + "lazy_static", + "num_cpus", + "phf 0.11.2", + "phf_codegen 0.11.2", + "polyglot_tokenizer", + "regex", + "serde", + "serde_yaml", +] + +[[package]] +name = "ignore" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492" +dependencies = [ + "globset", + "lazy_static", + "log", + "memchr", + "regex", + "same-file", + "thread_local", + "walkdir", + "winapi-util", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown 0.14.5", +] + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "inotify" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff" +dependencies = [ + "bitflags 1.3.2", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "kqueue" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7447f1ca1b7b563588a205fe93dea8df60fd981423a768bc1c0ded35ed147d0c" +dependencies = [ + "kqueue-sys", + "libc", +] + +[[package]] +name = "kqueue-sys" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "pin-utils", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "lru" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a83fb7698b3643a0e34f9ae6f2e8f0178c0fd42f8b59d493aa271ff3a5bf21" +dependencies = [ + "hashbrown 0.14.5", +] + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" + +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf 0.10.1", + "phf_codegen 0.10.0", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "measure_time" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc" +dependencies = [ + "instant", + "log", +] + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "memmap2" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +dependencies = [ + "libc", +] + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "murmurhash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "notify" +version = "6.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d" +dependencies = [ + "bitflags 2.5.0", + "crossbeam-channel", + "filetime", + "fsevent-sys", + "inotify", + "kqueue", + "libc", + "log", + "mio", + "walkdir", + "windows-sys 0.48.0", +] + +[[package]] +name = "notify-debouncer-mini" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e55ee272914f4563a2f8b8553eb6811f3c0caea81c756346bad15b7e3ef969f0" +dependencies = [ + "notify", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "oneshot" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071d1cf3298ad8e543dca18217d198cb6a3884443d204757b9624b935ef09fa0" +dependencies = [ + "loom", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "ownedbytes" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e8a72b918ae8198abb3a18c190288123e1d442b6b9a7d709305fd194688b4b7" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.1", + "smallvec", + "windows-targets 0.52.5", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap 2.2.6", + "serde", + "serde_derive", +] + +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared 0.10.0", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand 0.8.5", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand 0.8.5", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "polyglot_tokenizer" +version = "0.2.1" +source = "git+https://github.com/bloopai/hyperpolyglot#1bc3c2648c5c9a0a6ace85d2585f408b479f3190" +dependencies = [ + "circular-queue", +] + +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "pretty_assertions" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +dependencies = [ + "diff", + "yansi", +] + +[[package]] +name = "proc-macro2" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" +dependencies = [ + "anyhow", + "itertools 0.10.5", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "prost-types" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" +dependencies = [ + "prost", +] + +[[package]] +name = "pyo3" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags 2.5.0", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.6", + "regex-syntax 0.8.3", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.3", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "select" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f9da09dc3f4dfdb6374cbffff7a2cffcec316874d4429899eefdc97b3b94dcd" +dependencies = [ + "bit-set", + "html5ever", + "markup5ever_rcdom", +] + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.2.6", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "sketches-ddsketch" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" +dependencies = [ + "serde", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +dependencies = [ + "serde", +] + +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "tantivy" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6083cd777fa94271b8ce0fe4533772cb8110c3044bab048d20f70108329a1f2" +dependencies = [ + "aho-corasick", + "arc-swap", + "async-trait", + "base64", + "bitpacking", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fastdivide", + "fs4", + "htmlescape", + "itertools 0.11.0", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "murmurhash32", + "num_cpus", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash", + "serde", + "serde_json", + "sketches-ddsketch", + "smallvec", + "tantivy-bitpacker", + "tantivy-columnar", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", + "tempfile", + "thiserror", + "time", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecb164321482301f514dd582264fa67f70da2d7eb01872ccd71e35e0d96655a" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d85f8019af9a78b3118c11298b36ffd21c2314bd76bbcd9d12e00124cbb7e70" +dependencies = [ + "fastdivide", + "fnv", + "itertools 0.11.0", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] + +[[package]] +name = "tantivy-common" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af4a3a975e604a2aba6b1106a04505e1e7a025e6def477fab6e410b4126471e1" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes", + "serde", + "time", +] + +[[package]] +name = "tantivy-fst" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3c506b1a8443a3a65352df6382a1fb6a7afe1a02e871cee0d25e2c3d5f3944" +dependencies = [ + "byteorder", + "regex-syntax 0.6.29", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d39c5a03100ac10c96e0c8b07538e2ab8b17da56434ab348309b31f23fada77" +dependencies = [ + "nom", +] + +[[package]] +name = "tantivy-sstable" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0c1bb43e5e8b8e05eb8009610344dbf285f06066c844032fbb3e546b3c71df" +dependencies = [ + "tantivy-common", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-stacker" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2c078595413f13f218cf6f97b23dcfd48936838f1d3d13a1016e05acd64ed6c" +dependencies = [ + "murmurhash32", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "347b6fb212b26d3505d224f438e3c4b827ab8bd847fe9953ad5ac6b8f9443b66" +dependencies = [ + "serde", +] + +[[package]] +name = "target-lexicon" +version = "0.12.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" + +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand 0.4.6", + "remove_dir_all", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "thiserror" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tokio" +version = "1.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "tracing", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-io-timeout" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf" +dependencies = [ + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-macros" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "tokio-stream" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", + "tokio-util", +] + +[[package]] +name = "tokio-util" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tonic" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a" +dependencies = [ + "async-trait", + "axum", + "base64", + "bytes", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-timeout", + "percent-encoding", + "pin-project", + "prost", + "tokio", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand 0.8.5", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-appender" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" +dependencies = [ + "crossbeam-channel", + "thiserror", + "time", + "tracing-subscriber", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "tree-sitter" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e747b1f9b7b931ed39a548c1fae149101497de3c1fc8d9e18c62c1a66c683d3d" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter-COBOL" +version = "0.0.1" +source = "git+https://github.com/BloopAI/tree-sitter-cobol#8ba6692cc3c2bded0693d198936c6e26e6501230" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-c" +version = "0.20.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bbd5f3d8658c08581f8f2adac6c391c2e9fa00fe9246bf6c5f52213b9cc6b72" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ab3dc608f34924fa9e10533a95f62dbc14b6de0ddd7107722eba66fe19ae31" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.20.0" +source = "git+https://github.com/tree-sitter/tree-sitter-cpp?rev=5ead1e2#5ead1e26c6ab71919db0f1880c46a278a93bc5ea" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-go" +version = "0.19.1" +source = "git+https://github.com/tree-sitter/tree-sitter-go?rev=05900fa#05900faa3cdb5d2d8c8bd5e77ee698487e0a8611" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-java" +version = "0.20.0" +source = "git+https://github.com/tree-sitter/tree-sitter-java?tag=v0.20.0#ac14b4b1884102839455d32543ab6d53ae089ab7" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.20.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d015c02ea98b62c806f7329ff71c383286dfc3a7a7da0cc484f6e42922f73c2c" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-md" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c20d3ef8d202430b644a307e6299d84bf8ed87fa1b796e4638f8805a595060c" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-php" +version = "0.22.5" +source = "git+https://github.com/tree-sitter/tree-sitter-php#b38c53537769df05871643c9688c264074fb6076" +dependencies = [ + "cc", + "tree-sitter 0.22.6", +] + +[[package]] +name = "tree-sitter-python" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dda114f58048f5059dcf158aff691dffb8e113e6d2b50d94263fd68711975287" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-r" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "522c13f4cc46213148b19d4ad40a988ffabd51fd90eb7de759844fbde49bda0c" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d50ef383469df8485f024c5fb01faced8cb90368192a7ba02605b43b2427fe" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.20.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0832309b0b2b6d33760ce5c0e818cb47e1d72b468516bfe4134408926fa7594" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.20.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8bc1d2c24276a48ef097a71b56888ac9db63717e8f8d0b324668a27fd619670" +dependencies = [ + "cc", + "tree-sitter 0.20.10", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + +[[package]] +name = "uuid" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" +dependencies = [ + "getrandom", + "serde", +] + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.66", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49" +dependencies = [ + "windows-core", + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-core" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65" +dependencies = [ + "windows-result", + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-result" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "749f0da9cc72d82e600d8d2e44cadd0b9eedb9038f71a1c58556ac1c5791813b" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "xml5ever" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650" +dependencies = [ + "log", + "mac", + "markup5ever", +] + +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.9+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..6dcf550 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,112 @@ +[package] +name = "code_nav_devon" +version = "0.1.0" +edition = "2021" +description = "A python package written in rust for code navigation features like text search, fuzzy search and go to definition/references" +license = "MIT" +authors = ["Arnav Chintawar arnavch04@gmail.com"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "code_nav_devon" +crate-type = ["cdylib"] + +[dependencies] + +sha2 = "0.10.2" +globset = "0.4" +pyo3 = { version = "0.21.2", features = ["extension-module"] } + +# core +tantivy = { version = "0.21.0", features = ["mmap"] } +tantivy-columnar = "0.2.0" +tokio = { version = "1.32.0", features = ["macros", "process", "rt", "rt-multi-thread", "io-std", "io-util", "sync", "fs"] } +tokio-stream = { version = "0.1.14", features = ["sync"]} +async-trait = "0.1.73" +async-stream = "0.3.5" +futures = "0.3.30" +futures-util = "0.3.30" +tracing = "0.1.37" +tracing-subscriber = { version = "0.3.17", features = ["env-filter", "registry"] } +tracing-appender = "0.2.2" +regex = "1.9.5" +regex-syntax = "0.6.28" + + +# for debugging +console-subscriber = { version = "0.1.10", optional = true } +histogram = { version = "0.7.4", optional = true } + +# error handling +anyhow = "1.0.75" +thiserror = "1.0.48" + +# code-nav +tree-sitter = "0.20.10" +tree-sitter-c = "0.20.6" +tree-sitter-go = { git = "https://github.com/tree-sitter/tree-sitter-go", rev = "05900fa" } +tree-sitter-javascript = "0.20.1" +tree-sitter-python = "=0.20.2" +tree-sitter-rust = "0.20.4" +tree-sitter-typescript = "0.20.2" +tree-sitter-c-sharp = "0.20.0" +tree-sitter-java = { git = "https://github.com/tree-sitter/tree-sitter-java", tag = "v0.20.0" } +tree-sitter-cpp = { git = "https://github.com/tree-sitter/tree-sitter-cpp", rev = "5ead1e2" } +tree-sitter-ruby = "0.20.0" +tree-sitter-r = "0.19.5" +tree-sitter-php = { git = "https://github.com/tree-sitter/tree-sitter-php" } +tree-sitter-COBOL = { git = "https://github.com/BloopAI/tree-sitter-cobol" } +petgraph = { version = "0.6.4", default-features = false, features = ["serde-1"] } + +# file processing +ignore = "=0.4.20" +hyperpolyglot = { git = "https://github.com/bloopai/hyperpolyglot" } +blake3 = "1.5.0" +notify-debouncer-mini = { version = "0.3.0", default-features = false } + + +# doc scraper +select = "0.6" +tree-sitter-md = "0.1.5" + +# misc +serde = "1.0.188" +erased-serde = "0.3.31" +smallvec = { version = "1.11.1", features = ["serde"]} +bincode = "1.3.3" +phf = "0.11.2" +rand = "0.8.5" +once_cell = "1.18.0" + + + + +# Directly specify the version for zstd +zstd = "=0.12.4" + +# Optionally, if you want to ensure zstd-safe is the correct version +zstd-safe = "=6.0.6" + +zstd-sys = "=2.0.9+zstd.1.5.5" +serde_json = "1.0.117" + +[dev-dependencies] +pretty_assertions = "1.4.0" +tempdir = "0.3.7" +expect-test = "1.4.1" + +[build-dependencies] +phf_codegen = "0.11.2" +serde = {version = "1.0.188", features = ["derive"]} +serde_yaml = "0.9.25" +blake3 = "1.5.0" + +[package.metadata.maturin] +name = "code_nav_devon" +version = "0.1.0" +description = "A python package written in rust for code navigation features like text search, fuzzy search and go to definition/references" +author = "Arnav Chintawar " +license = "MIT" +readme = "README.md" # Make sure this file exists in the root of your project +home-page = "https://github.com/yourusername/code_nav_devon" +repository = "https://github.com/yourusername/code_nav_devon" \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..36f8795 --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +Sure, here's a README file for your Python package: + +```markdown +# Code Navigation Python Package + +This Python package provides functionality for indexing and searching code repositories using Rust and PyO3. The package supports text search, fuzzy search, and retrieving token information and hoverable ranges from the code. + +## Installation + +To install the package, use the following command: + +```sh +pip install code_nav_devon +``` + +## Usage + +### Import the Package + +```python +import code_nav_devon +``` + +### Functions + +#### `go_to` + +Retrieves token information for a given position in a file. + +##### Parameters +- `root_path_str` (str): The root path of the repository. +- `index_path_str` (str): The path where the index is stored. +- `relative_path` (str): The relative path of the file. +- `line` (int): The line number. +- `start_index` (int): The start index in the line. +- `end_index` (int): The end index in the line. + +##### Returns +- `str`: Token information. + +##### Example + +```python +result = code_nav_devon.go_to("/path/to/repo", "/path/to/index", "src/main.rs", 10, 0, 5) +print(result) +``` + +#### `text_search` + +Performs a text search in the code repository. + +##### Parameters +- `root_path_str` (str): The root path of the repository. +- `index_path_str` (str): The path where the index is stored. +- `query` (str): The search query. +- `case_sensitive` (bool): Whether the search should be case sensitive. + +##### Returns +- `str`: Search results. + +##### Example + +```python +result = code_nav_devon.text_search("/path/to/repo", "/path/to/index", "search term", True) +print(result) +``` + +#### `get_hoverable_ranges` + +Retrieves the hoverable ranges for a given file. + +##### Parameters +- `root_path_str` (str): The root path of the repository. +- `index_path_str` (str): The path where the index is stored. +- `relative_path` (str): The relative path of the file. + +##### Returns +- `str`: Hoverable ranges in JSON format. + +##### Example + +```python +result = code_nav_devon.get_hoverable_ranges("/path/to/repo", "/path/to/index", "src/main.rs") +print(result) +``` + +## License + +This project is licensed under the MIT License. +``` + +### Notes: +1. This README assumes the package name is `code_nav_devon` as per your `lib.rs` module name. If your package name is different, make sure to replace it accordingly. +2. The usage examples should be adjusted based on the actual paths and use cases. +3. Ensure that your package is properly configured for distribution if you plan to publish it to PyPI or any other repository. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7479051 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["maturin>=1.5,<2.0"] +build-backend = "maturin" + +[project] +name = "code_nav_devon" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/src/content_document.rs b/src/content_document.rs new file mode 100644 index 0000000..4ba4f62 --- /dev/null +++ b/src/content_document.rs @@ -0,0 +1,32 @@ +use crate::{intelligence::TreeSitterFile, symbol::SymbolLocations, text_range::TextRange}; + +#[derive(Debug, Clone)] +pub struct ContentDocument { + pub content: String, + pub lang: Option, + pub relative_path: String, + pub line_end_indices: Vec, + pub symbol_locations: SymbolLocations, +} + +impl ContentDocument { + pub fn hoverable_ranges(&self) -> Option> { + TreeSitterFile::try_build(self.content.as_bytes(), self.lang.as_ref()?) + .and_then(TreeSitterFile::hoverable_ranges) + .ok() + } +} + +impl std::hash::Hash for ContentDocument { + fn hash(&self, state: &mut H) { + self.relative_path.hash(state); + self.content.hash(state); + } +} + +impl PartialEq for ContentDocument { + fn eq(&self, other: &Self) -> bool { + self.relative_path == other.relative_path && self.content == other.content + } +} +impl Eq for ContentDocument {} diff --git a/src/file.rs b/src/file.rs new file mode 100644 index 0000000..b89307b --- /dev/null +++ b/src/file.rs @@ -0,0 +1,245 @@ +use std::path::Path; +use tantivy::{schema::Schema, IndexWriter, doc, Term}; +use anyhow::Result; +use async_trait::async_trait; +use tokio::fs; +use tokio::task::spawn_blocking; +use futures::future::BoxFuture; +use std::collections::{HashSet, HashMap}; +use crate::indexes::Indexable; +use crate::intelligence::{TreeSitterFile, TSLanguage}; +use crate::symbol::SymbolLocations; +use crate::schema::build_schema; +use sha2::{Sha256, Digest}; +use globset::{Glob, GlobSet, GlobSetBuilder}; + +pub struct File { + pub schema: Schema, + pub path_field: tantivy::schema::Field, + pub content_field: tantivy::schema::Field, + pub symbol_locations_field: tantivy::schema::Field, + pub symbols_field: tantivy::schema::Field, + pub line_end_indices_field: tantivy::schema::Field, + pub lang_field: tantivy::schema::Field, + pub hash_field: tantivy::schema::Field, + content_insensitive_field: tantivy::schema::Field +} + +impl File { + pub fn new() -> Self { + let schema = build_schema(); + let path_field = schema.get_field("path").unwrap(); + let content_field = schema.get_field("content").unwrap(); + let symbol_locations_field = schema.get_field("symbol_locations").unwrap(); + let symbols_field = schema.get_field("symbols").unwrap(); + let line_end_indices_field = schema.get_field("line_end_indices").unwrap(); + let lang_field = schema.get_field("lang").unwrap(); + let hash_field = schema.get_field("hash").unwrap(); + let content_insensitive_field = schema.get_field("content_insensitive").unwrap(); + + Self { + schema, + path_field, + content_field, + symbol_locations_field, + symbols_field, + line_end_indices_field, + lang_field, + hash_field, + content_insensitive_field + } + } + + fn detect_language(path: &Path) -> &'static str { + let extension = path.extension().and_then(std::ffi::OsStr::to_str).unwrap_or(""); + TSLanguage::from_extension(extension).unwrap_or("plaintext") + } +} + +#[async_trait] +impl Indexable for File { + async fn index_repository(&self, root_path: &Path, writer: &IndexWriter) -> Result<()> { + let existing_docs = load_existing_docs(writer, &self.hash_field, &self.path_field)?; + let mut gitignore = GlobSetBuilder::new(); + + traverse_and_index_files( + root_path, writer, &self.schema, self.path_field, self.content_field, + self.symbol_locations_field, self.symbols_field, self.line_end_indices_field, + self.lang_field, self.hash_field, self.content_insensitive_field, + &existing_docs, &mut gitignore, root_path).await + } + + fn schema(&self) -> Schema { + self.schema.clone() + } +} + +fn load_existing_docs(writer: &IndexWriter, hash_field: &tantivy::schema::Field, path_field: &tantivy::schema::Field) -> Result> { + let searcher = writer.index().reader()?.searcher(); + let mut existing_docs = HashMap::new(); + + for segment_reader in searcher.segment_readers() { + let store_reader = segment_reader.get_store_reader(0)?; + let alive_bitset = segment_reader.alive_bitset(); + + for doc in store_reader.iter(alive_bitset) { + let doc = doc?; + let path = doc.get_first(*path_field).unwrap().as_text().unwrap().to_string(); + let hash = doc.get_first(*hash_field).unwrap().as_text().unwrap().to_string(); + existing_docs.insert(path, hash); + } + } + + Ok(existing_docs) +} + +async fn parse_gitignore(current_path: &Path, builder: &mut GlobSetBuilder) -> Result<()> { + let gitignore_path = current_path.join(".gitignore"); + + if gitignore_path.exists() { + let contents = tokio::fs::read_to_string(&gitignore_path).await?; + for line in contents.lines() { + let trimmed_line = line.trim(); + if !trimmed_line.starts_with('#') && !trimmed_line.is_empty() { + let absolute_pattern = if trimmed_line.starts_with('/') { + // The pattern is already an absolute path, so we just use it as is + current_path.join(trimmed_line.trim_start_matches('/')) + } else { + // The pattern is a relative path, so we join it with the current path + current_path.join(trimmed_line) + }; + let pattern = absolute_pattern.to_string_lossy().replace("\\", "/"); + // println!("Adding to gitignore: {}", pattern); + builder.add(Glob::new(&pattern)?); + } + } + } + + Ok(()) +} + + +fn traverse_and_index_files<'a>( + path: &'a Path, + writer: &'a IndexWriter, + schema: &'a Schema, + path_field: tantivy::schema::Field, + content_field: tantivy::schema::Field, + symbol_locations_field: tantivy::schema::Field, + symbols_field: tantivy::schema::Field, + line_end_indices_field: tantivy::schema::Field, + lang_field: tantivy::schema::Field, + hash_field: tantivy::schema::Field, + content_insensitive_field: tantivy::schema::Field, // New field + existing_docs: &'a HashMap, + gitignore: &'a mut GlobSetBuilder, + root_path: &'a Path, +) -> BoxFuture<'a, Result<()>> { + Box::pin(async move { + // Parse .gitignore in the current directory and update the builder + parse_gitignore(path, gitignore).await?; + + // Build the GlobSet from the builder + let globset = gitignore.build()?; + + let mut entries = fs::read_dir(path).await?; + while let Some(entry) = entries.next_entry().await? { + let path = entry.path(); + + // Convert the path to an absolute path + let absolute_path = path.canonicalize()?; + let absolute_path_str = absolute_path.to_string_lossy().replace("\\", "/"); + + // Skip paths that match .gitignore patterns + if globset.is_match(&absolute_path_str) { + continue; + } + + if path.is_dir() { + traverse_and_index_files( + &path, writer, schema, path_field, content_field, symbol_locations_field, + symbols_field, line_end_indices_field, lang_field, hash_field, content_insensitive_field, + existing_docs, gitignore, root_path).await?; + } else if path.is_file() { + let path_clone = path.clone(); + let content = spawn_blocking(move || std::fs::read(&path_clone)).await??; + + let content_str = match String::from_utf8(content) { + Ok(content_str) => content_str, + Err(_) => continue, // Skip if the content is not valid UTF-8 + }; + + // Compute the hash of the content + let mut hasher = Sha256::new(); + hasher.update(&content_str); + let hash = format!("{:x}", hasher.finalize()); + + let path_str = absolute_path_str.clone(); + if let Some(existing_hash) = existing_docs.get(&path_str) { + if existing_hash == &hash { + // File has not changed, skip reindexing + continue; + } else { + // Delete the old document + writer.delete_term(Term::from_field_text(path_field, &path_str)); + } + } + + let lang_str = File::detect_language(&path); + + if lang_str == "plaintext" { + continue; + } + + let symbol_locations: SymbolLocations = { + let scope_graph = TreeSitterFile::try_build(content_str.as_bytes(), lang_str) + .and_then(TreeSitterFile::scope_graph); + + match scope_graph { + Ok(graph) => SymbolLocations::TreeSitter(graph), + Err(_) => SymbolLocations::Empty, + } + }; + + // Flatten the list of symbols into a string with just text + let symbols = symbol_locations + .list() + .iter() + .map(|sym| content_str[sym.range.start.byte..sym.range.end.byte].to_owned()) + .collect::>() + .into_iter() + .collect::>() + .join("\n"); + + // Collect line end indices as bytes + let mut line_end_indices = content_str + .match_indices('\n') + .flat_map(|(i, _)| u32::to_le_bytes(i as u32)) + .collect::>(); + + // Add the byte index of the last character to the line_end_indices vector + let last_char_byte_index = content_str.chars().map(|c| c.len_utf8()).sum::(); + line_end_indices.extend_from_slice(&u32::to_le_bytes(last_char_byte_index as u32)); + + // Convert content to lower case for case-insensitive search + let content_insensitive = content_str.to_lowercase(); + + // println!("{}", absolute_path_str); + + let doc = tantivy::doc!( + path_field => path_str, + content_field => content_str, + content_insensitive_field => content_insensitive, // Add case-insensitive content + symbol_locations_field => bincode::serialize(&symbol_locations).unwrap(), + symbols_field => symbols, + line_end_indices_field => line_end_indices, + lang_field => lang_str.to_string(), + hash_field => hash, + ); + + writer.add_document(doc)?; + } + } + Ok(()) + }) +} diff --git a/src/indexes.rs b/src/indexes.rs new file mode 100644 index 0000000..af2613a --- /dev/null +++ b/src/indexes.rs @@ -0,0 +1,113 @@ +use std::{fs, path::Path}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use tantivy::{schema::Schema, Index, IndexReader, IndexWriter}; +use tokio::sync::Mutex; +use crate::file::File; + +#[async_trait] +pub trait Indexable: Send + Sync { + async fn index_repository(&self, root_path: &Path, writer: &IndexWriter) -> Result<()>; + fn schema(&self) -> Schema; +} + +pub struct IndexWriteHandle<'a> { + source: &'a dyn Indexable, + reader: &'a IndexReader, + writer: IndexWriter, +} + +impl<'a> IndexWriteHandle<'a> { + pub async fn index(&self, root_path: &Path) -> Result<()> { + self.source.index_repository(root_path, &self.writer).await + } + + pub fn commit(&mut self) -> Result<()> { + self.writer.commit()?; + self.reader.reload()?; + Ok(()) + } + + pub fn rollback(&mut self) -> Result<()> { + self.writer.rollback()?; + Ok(()) + } +} + +pub struct Indexer { + pub source: T, + pub index: Index, + pub reader: IndexReader, + pub buffer_size: usize, + pub threads: usize, +} + +impl Indexer { + fn write_handle(&self) -> Result> { + Ok(IndexWriteHandle { + source: &self.source, + reader: &self.reader, + writer: self.index.writer_with_num_threads(self.threads, self.buffer_size * self.threads)?, + }) + } + + fn init_index(schema: Schema, path: &Path, threads: usize) -> Result { + fs::create_dir_all(path).context("failed to create index dir")?; + let mut index = Index::open_or_create(tantivy::directory::MmapDirectory::open(path)?, schema)?; + index.set_multithread_executor(threads)?; + Ok(index) + } + + pub fn create(source: T, path: &Path, buffer_size: usize, threads: usize) -> Result { + match Self::init_index(source.schema(), path, threads) { + Ok(index) => { + let reader = index.reader()?; + Ok(Self { + reader, + index, + source, + threads, + buffer_size, + }) + }, + Err(e) if e.to_string().contains("Schema error: 'An index exists but the schema does not match.'") => { + // Delete the index directory + fs::remove_dir_all(path)?; + // Retry creating the Indexer instance + let index = Self::init_index(source.schema(), path, threads)?; + let reader = index.reader()?; + Ok(Self { + reader, + index, + source, + threads, + buffer_size, + }) + }, + Err(e) => Err(e), + } + } +} + + +pub struct Indexes { + pub file: Indexer, + pub write_mutex: Mutex<()>, +} + +impl Indexes { + pub async fn new(index_path: &Path, buffer_size: usize, threads: usize) -> Result { + Ok(Self { + file: Indexer::create(File::new(), index_path, buffer_size, threads)?, + write_mutex: Mutex::new(()), + }) + } + + pub async fn index(&self, root_path: &Path) -> Result<()> { + let _write_lock = self.write_mutex.lock().await; + let mut writer = self.file.write_handle()?; + writer.index( root_path).await?; + writer.commit()?; + Ok(()) + } +} diff --git a/src/intelligence.rs b/src/intelligence.rs new file mode 100644 index 0000000..1a361f5 --- /dev/null +++ b/src/intelligence.rs @@ -0,0 +1,97 @@ +mod language; +mod namespace; +mod scope_resolution; +pub mod code_navigation; + + +pub use { + language::{Language, MemoizedQuery, TSLanguage, TSLanguageConfig, ALL_LANGUAGES}, + namespace::*, + scope_resolution::{NodeKind, ScopeGraph}, +}; + +use scope_resolution::ResolutionMethod; +use tree_sitter::{Parser, Tree}; + +/// A tree-sitter representation of a file +pub struct TreeSitterFile<'a> { + /// The original source that was used to generate this file. + src: &'a [u8], + + /// The syntax tree of this file. + tree: Tree, + + /// The supplied language for this file. + language: &'static TSLanguageConfig, +} + +#[derive(Debug)] +pub enum TreeSitterFileError { + UnsupportedLanguage, + ParseTimeout, + LanguageMismatch, + QueryError(tree_sitter::QueryError), + FileTooLarge, +} + +impl<'a> TreeSitterFile<'a> { + /// Create a TreeSitterFile out of a sourcefile + pub fn try_build(src: &'a [u8], lang_id: &str) -> Result { + // no scope-res for files larger than 500kb + if src.len() > 500 * 10usize.pow(3) { + return Err(TreeSitterFileError::FileTooLarge); + } + + let language = match TSLanguage::from_id(lang_id) { + Language::Supported(language) => Ok(language), + Language::Unsupported => Err(TreeSitterFileError::UnsupportedLanguage), + }?; + + let mut parser = Parser::new(); + parser + .set_language((language.grammar)()) + .map_err(|_| TreeSitterFileError::LanguageMismatch)?; + + // do not permit files that take >1s to parse + parser.set_timeout_micros(10u64.pow(6)); + + let tree = parser + .parse(src, None) + .ok_or(TreeSitterFileError::ParseTimeout)?; + + Ok(Self { + src, + tree, + language, + }) + } + + pub fn hoverable_ranges( + self, + ) -> Result, TreeSitterFileError> { + let query = self + .language + .hoverable_query + .query(self.language.grammar) + .map_err(TreeSitterFileError::QueryError)?; + let root_node = self.tree.root_node(); + let mut cursor = tree_sitter::QueryCursor::new(); + Ok(cursor + .matches(query, root_node, self.src) + .flat_map(|m| m.captures) + .map(|c| c.node.range().into()) + .collect::>()) + } + + /// Produce a lexical scope-graph for this TreeSitterFile. + pub fn scope_graph(self) -> Result { + let query = self + .language + .scope_query + .query(self.language.grammar) + .map_err(TreeSitterFileError::QueryError)?; + let root_node = self.tree.root_node(); + + Ok(ResolutionMethod::Generic.build_scope(query, root_node, self.src, self.language)) + } +} diff --git a/src/intelligence/code_navigation.rs b/src/intelligence/code_navigation.rs new file mode 100644 index 0000000..7ef7e11 --- /dev/null +++ b/src/intelligence/code_navigation.rs @@ -0,0 +1,483 @@ +use std::{collections::HashSet, ops::Not}; + +use serde::Serialize; + +use crate::{ + content_document::ContentDocument, + snippet::{Snipper, Snippet}, + text_range::TextRange, + intelligence::NodeKind, +}; + +#[derive(Debug, Serialize)] +pub struct FileSymbols { + /// The file to which the following occurrences belong + pub file: String, + /// A collection of symbol locations with context in this file + pub data: Vec, +} + +#[derive(Serialize, Debug)] +pub struct Occurrence { + pub kind: OccurrenceKind, + pub range: TextRange, + pub snippet: Snippet, +} + +impl Occurrence { + pub fn is_definition(&self) -> bool { + matches!(self.kind, OccurrenceKind::Definition) + } +} + +#[derive(Serialize, Debug, Default)] +#[serde(rename_all = "snake_case")] +pub enum OccurrenceKind { + #[default] + Reference, + Definition, +} + +pub enum CodeNavigationError {} + +pub struct CodeNavigationContext<'a, 'b> { + pub token: Token<'a>, + pub all_docs: &'b [ContentDocument], + pub source_document_idx: usize, + pub snipper: Option, +} + +impl<'a, 'b> CodeNavigationContext<'a, 'b> { + pub fn files_importing( + all_docs: &'b [ContentDocument], + source_document_idx: usize, + ) -> HashSet<&'b ContentDocument> { + let source_doc = all_docs.get(source_document_idx).unwrap(); + let Some(source_sg) = source_doc.symbol_locations.scope_graph() else { + return HashSet::default(); + }; + source_sg + .graph + .node_indices() + .filter(|idx| source_sg.is_definition(*idx) && source_sg.is_top_level(*idx)) + .flat_map(|idx| { + let range = source_sg.graph[idx].range(); + let token = Token { + relative_path: &source_doc.relative_path, + start_byte: range.start.byte, + end_byte: range.end.byte, + }; + let active_token_text = &source_doc.content[token.start_byte..token.end_byte]; + all_docs + .iter() + .filter(|doc| doc.relative_path != source_doc.relative_path) + .filter(|doc| { + let Some(scope_graph) = doc.symbol_locations.scope_graph() else { + return false; + }; + let content = doc.content.as_bytes(); + scope_graph + .graph + .node_indices() + .any(|idx| match scope_graph.get_node(idx).unwrap() { + NodeKind::Import(n) => n.name(content) == active_token_text.as_bytes(), + _ => false, + }) + }) + .collect::>() + }) + .collect() + } + + pub fn files_imported( + all_docs: &'b [ContentDocument], + source_document_idx: usize, + ) -> HashSet<&'b ContentDocument> { + let source_doc = all_docs.get(source_document_idx).unwrap(); + let Some(source_sg) = source_doc.symbol_locations.scope_graph() else { + return HashSet::default(); + }; + + source_sg + .graph + .node_indices() + .filter(|idx| source_sg.is_reference(*idx) || source_sg.is_import(*idx)) + .filter(|&idx| { + CodeNavigationContext { + all_docs, + source_document_idx, + token: Token { + relative_path: &source_doc.relative_path, + start_byte: source_sg.graph[idx].range().start.byte, + end_byte: source_sg.graph[idx].range().end.byte, + }, + snipper: None, + } + .local_definitions() + .is_none() + }) + .flat_map(|idx| { + let range = source_sg.graph[idx].range(); + let token = Token { + relative_path: &source_doc.relative_path, + start_byte: range.start.byte, + end_byte: range.end.byte, + }; + let active_token_text = &source_doc.content[token.start_byte..token.end_byte]; + all_docs + .iter() + .filter(|doc| doc.relative_path != source_doc.relative_path) + .filter(|doc| { + let Some(scope_graph) = doc.symbol_locations.scope_graph() else { + return false; + }; + let content = doc.content.as_bytes(); + scope_graph + .graph + .node_indices() + .any(|idx| { + if let Some(NodeKind::Def(d)) = scope_graph.get_node(idx) { + d.name(content) == active_token_text.as_bytes() + } else { + false + } + }) + }) + .collect::>() + }) + .collect::>() + } + + fn singleton(source_document: &'b ContentDocument, token: Token<'a>) -> Self { + Self { + all_docs: std::slice::from_ref(source_document), + source_document_idx: 0, + token, + snipper: None, + } + } + + fn source_document(&self) -> &ContentDocument { + self.all_docs.get(self.source_document_idx).unwrap() + } + + pub fn token_info(&self) -> Vec { + if self.is_definition() { + let local_references = self.local_references(); + let repo_wide_references = self + .is_top_level() + .then(|| self.repo_wide_references()) + .unwrap_or_default(); + + local_references + .into_iter() + .chain(repo_wide_references) + .collect() + } else if self.is_reference() { + + let local_definitions = self.local_definitions(); + + let repo_wide_definitions = local_definitions + .is_none() + .then(|| self.repo_wide_definitions()) + .unwrap_or_default(); + + let local_references = self.local_references(); + let repo_wide_references = local_definitions + .is_none() + .then(|| self.repo_wide_references()) + .unwrap_or_default(); + + let imports = self.imports(); + + local_definitions + .or(imports) + .into_iter() + .chain(repo_wide_definitions) + .chain(local_references) + .chain(repo_wide_references) + .collect() + } else if self.is_import() { + let local_references = self.local_references(); + let repo_wide_definitions = self.repo_wide_definitions(); + + repo_wide_definitions + .into_iter() + .chain(local_references) + .collect() + } else { + Vec::new() + } + } + + fn is_definition(&self) -> bool { + self.source_document() + .symbol_locations + .scope_graph() + .and_then(|sg| { + let idx = sg.node_by_range(self.token.start_byte, self.token.end_byte)?; + Some(matches!(sg.get_node(idx).unwrap(), NodeKind::Def(_))) + }) + .unwrap_or_default() + } + + fn is_reference(&self) -> bool { + self.source_document() + .symbol_locations + .scope_graph() + .and_then(|sg| { + let idx = sg.node_by_range(self.token.start_byte, self.token.end_byte)?; + Some(matches!(sg.get_node(idx).unwrap(), NodeKind::Ref(_))) + }) + .unwrap_or_default() + } + + fn is_import(&self) -> bool { + self.source_document() + .symbol_locations + .scope_graph() + .and_then(|sg| { + let idx = sg.node_by_range(self.token.start_byte, self.token.end_byte)?; + Some(matches!(sg.get_node(idx).unwrap(), NodeKind::Import(_))) + }) + .unwrap_or_default() + } + + fn is_top_level(&self) -> bool { + self.source_document() + .symbol_locations + .scope_graph() + .and_then(|sg| { + let idx = sg.node_by_range(self.token.start_byte, self.token.end_byte)?; + Some(sg.is_top_level(idx)) + }) + .unwrap_or_default() + } + + fn non_source_documents(&self) -> impl Iterator { + self.all_docs + .iter() + .filter(|doc| doc.relative_path != self.source_document().relative_path) + } + + pub fn active_token_range(&self) -> std::ops::Range { + self.token.start_byte..self.token.end_byte + } + + pub fn active_token_text(&self) -> &str { + &self.source_document().content[self.active_token_range()] + } + + fn local_definitions(&self) -> Option { + let scope_graph = self.source_document().symbol_locations.scope_graph()?; + let node_idx = scope_graph.node_by_range(self.token.start_byte, self.token.end_byte)?; + let mut data = scope_graph + .definitions(node_idx) + .map(|idx| Occurrence { + kind: OccurrenceKind::Definition, + range: scope_graph.graph[idx].range(), + snippet: to_occurrence( + self.source_document(), + scope_graph.graph[idx].range(), + self.snipper, + ), + }) + .collect::>(); + + data.sort_by_key(|occurrence| occurrence.range.start.byte); + + data.is_empty().not().then(|| FileSymbols { + file: self.token.relative_path.to_owned(), + data, + }) + } + + fn repo_wide_definitions(&self) -> Vec { + self.non_source_documents() + .filter_map(|doc| { + let scope_graph = doc.symbol_locations.scope_graph()?; + let content = doc.content.as_bytes(); + let mut data = scope_graph + .graph + .node_indices() + .filter(|idx| scope_graph.is_top_level(*idx)) + .filter(|idx| { + if let Some(NodeKind::Def(d)) = scope_graph.get_node(*idx) { + d.name(content) == self.active_token_text().as_bytes() + } else { + false + } + }) + .map(|idx| Occurrence { + kind: OccurrenceKind::Definition, + range: scope_graph.graph[idx].range(), + snippet: to_occurrence(doc, scope_graph.graph[idx].range(), self.snipper), + }) + .collect::>(); + + data.sort_by_key(|occurrence| occurrence.range.start.byte); + + data.is_empty().not().then(|| FileSymbols { + file: doc.relative_path.to_owned(), + data, + }) + }) + .collect() + } + + fn local_references(&self) -> Option { + let scope_graph = self.source_document().symbol_locations.scope_graph()?; + let node_idx = scope_graph.node_by_range(self.token.start_byte, self.token.end_byte)?; + let mut data = scope_graph + .definitions(node_idx) + .chain(scope_graph.imports(node_idx)) + .flat_map(|idx| scope_graph.references(idx)) + .chain(scope_graph.references(node_idx)) + .map(|idx| Occurrence { + kind: OccurrenceKind::Reference, + range: scope_graph.graph[idx].range(), + snippet: to_occurrence( + self.source_document(), + scope_graph.graph[idx].range(), + self.snipper, + ), + }) + .collect::>(); + + data.retain(|occurrence| { + occurrence.range != scope_graph.get_node(node_idx).unwrap().range() + }); + data.sort_by_key(|occurrence| occurrence.range.start.byte); + data.is_empty().not().then(|| FileSymbols { + file: self.token.relative_path.to_owned(), + data, + }) + } + + fn repo_wide_references(&self) -> Vec { + self.non_source_documents() + .filter_map(|doc| { + let scope_graph = doc.symbol_locations.scope_graph()?; + let content = doc.content.as_bytes(); + let mut data = scope_graph + .graph + .node_indices() + .filter(|idx| scope_graph.is_top_level(*idx)) + .filter(|idx| match scope_graph.get_node(*idx).unwrap() { + NodeKind::Def(n) => n.name(content) == self.active_token_text().as_bytes(), + NodeKind::Import(n) => { + n.name(content) == self.active_token_text().as_bytes() + } + _ => false, + }) + .flat_map(|idx| scope_graph.references(idx)) + .map(|idx| Occurrence { + kind: OccurrenceKind::Reference, + range: scope_graph.graph[idx].range(), + snippet: to_occurrence(doc, scope_graph.graph[idx].range(), self.snipper), + }) + .collect::>(); + + data.sort_by_key(|occurrence| occurrence.range.start.byte); + + data.is_empty().not().then(|| FileSymbols { + file: doc.relative_path.to_owned(), + data, + }) + }) + .collect() + } + + fn imports(&self) -> Option { + let scope_graph = self.source_document().symbol_locations.scope_graph()?; + let node_idx = scope_graph.node_by_range(self.token.start_byte, self.token.end_byte)?; + let mut data = scope_graph + .imports(node_idx) + .map(|idx| Occurrence { + kind: OccurrenceKind::Definition, + range: scope_graph.graph[idx].range(), + snippet: to_occurrence( + self.source_document(), + scope_graph.graph[idx].range(), + self.snipper, + ), + }) + .collect::>(); + + data.sort_by_key(|occurrence| occurrence.range.start.byte); + + data.is_empty().not().then(|| FileSymbols { + file: self.token.relative_path.to_owned(), + data, + }) + } +} + +pub struct Token<'a> { + pub relative_path: &'a str, + pub start_byte: usize, + pub end_byte: usize, +} + +fn to_occurrence(doc: &ContentDocument, range: TextRange, snipper: Option) -> Snippet { + let src = &doc.content; + let line_end_indices = &doc.line_end_indices; + let highlight = range.start.byte..range.end.byte; + snipper + .unwrap_or_default() + .expand(highlight, src, line_end_indices) + .reify(src, &[]) +} + +// ranges of defs in related_file_document used in source_document +pub fn imported_ranges( + source_document: &ContentDocument, + related_file_document: &ContentDocument, +) -> HashSet { + let Some(source_sg) = source_document.symbol_locations.scope_graph() else { + return HashSet::new(); + }; + + let Some(related_file_sg) = related_file_document.symbol_locations.scope_graph() else { + return HashSet::new(); + }; + let related_file_content = &related_file_document.content; + + source_sg + .graph + .node_indices() + .filter(|idx| source_sg.is_reference(*idx) || source_sg.is_import(*idx)) + .filter(|&idx| { + let token = Token { + relative_path: &source_document.relative_path, + start_byte: source_sg.graph[idx].range().start.byte, + end_byte: source_sg.graph[idx].range().end.byte, + }; + CodeNavigationContext::singleton(source_document, token) + .local_definitions() + .is_none() + }) + .flat_map(|idx| { + let range = source_sg.graph[idx].range(); + let token = Token { + relative_path: &source_document.relative_path, + start_byte: range.start.byte, + end_byte: range.end.byte, + }; + let active_token_text = &source_document.content[token.start_byte..token.end_byte]; + related_file_sg + .graph + .node_indices() + .filter(|idx| related_file_sg.is_top_level(*idx)) + .filter(|idx| { + if let Some(NodeKind::Def(d)) = related_file_sg.get_node(*idx) { + d.name(related_file_content.as_bytes()) == active_token_text.as_bytes() + } else { + false + } + }) + .filter_map(|idx| related_file_sg.value_of_definition(idx)) + .map(|idx| related_file_sg.graph[idx].range()) + }) + .collect() +} diff --git a/src/intelligence/language.rs b/src/intelligence/language.rs new file mode 100644 index 0000000..3b454ff --- /dev/null +++ b/src/intelligence/language.rs @@ -0,0 +1,180 @@ +mod c; +mod c_sharp; +mod cobol; +mod cpp; +mod go; +mod java; +mod javascript; +mod python; +mod r; +mod ruby; +mod rust; +mod typescript; + +#[cfg(test)] +mod test_utils; + +use once_cell::sync::OnceCell; + +use super::NameSpaces; + +/// A collection of all language definitions +pub static ALL_LANGUAGES: &[&TSLanguageConfig] = &[ + &c::C, + &go::GO, + &javascript::JAVASCRIPT, + &python::PYTHON, + &rust::RUST, + &typescript::TYPESCRIPT, + &c_sharp::C_SHARP, + &java::JAVA, + &cpp::CPP, + &ruby::RUBY, + &r::R, + &cobol::COBOL, +]; + +/// A generic language wrapper type. +/// +/// The backing grammars/parser are supplied through the `Config` type. +pub enum Language { + /// A supported language, with some `Config`. + Supported(&'static Config), + + /// An unsupported language + Unsupported, +} + +/// Languages based on tree-sitter grammars +#[derive(Debug)] +pub struct TSLanguageConfig { + /// A list of language names that can be processed by these scope queries + /// e.g.: ["Typescript", "TSX"], ["Rust"] + pub language_ids: &'static [&'static str], + + /// Extensions that can help classify the file: .rs, .rb, .cabal + pub file_extensions: &'static [&'static str], + + /// tree-sitter grammar for this language + pub grammar: fn() -> tree_sitter::Language, + + /// Compiled tree-sitter scope query for this language. + pub scope_query: MemoizedQuery, + + /// Compiled tree-sitter hoverables query + pub hoverable_query: MemoizedQuery, + + /// Namespaces defined by this language, + /// E.g.: type namespace, variable namespace, function namespace + pub namespaces: NameSpaces, +} + +#[derive(Debug)] +pub struct MemoizedQuery { + slot: OnceCell, + scope_query: &'static str, +} + +impl MemoizedQuery { + pub const fn new(scope_query: &'static str) -> Self { + Self { + slot: OnceCell::new(), + scope_query, + } + } + + /// Get a reference to the relevant tree sitter compiled query. + /// + /// This method compiles the query if it has not already been compiled. + pub fn query( + &self, + grammar: fn() -> tree_sitter::Language, + ) -> Result<&tree_sitter::Query, tree_sitter::QueryError> { + self.slot + .get_or_try_init(|| tree_sitter::Query::new(grammar(), self.scope_query)) + } +} + +pub type TSLanguage = Language; + +impl TSLanguageConfig { + pub fn from_extension(extension: &str) -> Option<&'static str> { + for lang in ALL_LANGUAGES.iter() { + if lang.file_extensions.contains(&extension) { + return Some(lang.language_ids[0]); + } + } + None + } +} + +impl TSLanguage { + /// Find a tree-sitter language configuration from a language identifier + /// + /// See [0] for a list of valid language identifiers. + /// + /// [0]: https://github.com/monkslc/hyperpolyglot/blob/master/src/codegen/languages.rs + pub fn from_id(lang_id: &str) -> Self { + ALL_LANGUAGES + .iter() + .copied() + .find(|target| { + target + .language_ids + .iter() + .any(|&id| id.to_lowercase() == lang_id.to_lowercase()) + }) + .map_or(Language::Unsupported, Language::Supported) + } + + pub fn from_extension(extension: &str) -> Option<&'static str> { + TSLanguageConfig::from_extension(extension) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + use crate::intelligence::NameSpaceMethods; + + use std::collections::HashSet; + + use tree_sitter::Query; + + // ensure that the symbols in all queries files are supported symbols + #[test] + fn verify_all_symbol_kinds() { + let mut failed_languages = Vec::new(); + + for language in ALL_LANGUAGES { + let kinds = language.namespaces.all_symbols(); + if !has_valid_symbol_kinds(language.scope_query.query(language.grammar).unwrap(), kinds) + { + for id in language.language_ids { + failed_languages.push(*id); + } + } + } + + if !failed_languages.is_empty() { + panic!("invalid symbol kinds for {}", failed_languages.join(", ")) + } + } + + fn has_valid_symbol_kinds(query: &Query, kinds: Vec<&str>) -> bool { + let query_file_symbol_names = query + .capture_names() + .iter() + .filter_map(|name| name.split('.').nth(2)) + .map(ToOwned::to_owned) + .collect::>(); + + let supported_symbol_kinds = kinds + .iter() + .map(ToString::to_string) + .collect::>(); + + query_file_symbol_names == supported_symbol_kinds + } +} diff --git a/src/intelligence/language/c/mod.rs b/src/intelligence/language/c/mod.rs new file mode 100644 index 0000000..342de19 --- /dev/null +++ b/src/intelligence/language/c/mod.rs @@ -0,0 +1,677 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static C: TSLanguageConfig = TSLanguageConfig { + language_ids: &["C"], + file_extensions: &["c", "h"], + grammar: tree_sitter_c::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r" + [(identifier) + (field_identifier) + (statement_identifier) + (type_identifier)] @hoverable + ", + ), + namespaces: &[&[ + // imports + "header", + // functions + "macro", + "function", + // types + "struct", + "enum", + "enumerator", + "union", + "typedef", + // variables + "variable", + // misc. + "label", + ]], +}; + +#[cfg(test)] +mod tests { + use crate::intelligence::language::test_utils::*; + + #[test] + fn trivial() { + test_scopes( + "C", + r#" + #include + + #define PI 355/113 + #define AREA(r) PI * r * r + + int main() { + int radius = 5; + printf("%d", AREA(radius)); + } + "# + .as_bytes(), + expect![[r##" + scope { + definitions: [ + { + kind: "header", + context: "#include §§", + }, + PI { + kind: "macro", + context: "#define §PI§ 355/113", + }, + AREA { + kind: "macro", + context: "#define §AREA§(r) PI * r * r", + referenced in (1): [ + `printf("%d", §AREA§(radius));`, + ], + }, + main { + kind: "function", + context: "int §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + radius { + kind: "variable", + context: "int §radius§ = 5;", + referenced in (1): [ + `printf("%d", AREA(§radius§));`, + ], + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "##]], + ) + } + + #[test] + fn declarations() { + test_scopes( + "C", + r#" + int main() { + int a; + int *b; + struct S c; + struct S *d; + T1 *e(T2); + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "int §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "int §a§;", + }, + b { + kind: "variable", + context: "int *§b§;", + }, + c { + kind: "variable", + context: "struct S §c§;", + }, + d { + kind: "variable", + context: "struct S *§d§;", + }, + e { + kind: "function", + context: "T1 *§e§(T2);", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn types() { + test_scopes( + "C", + r#" + + // defs + struct A { + int e; + }; + + enum B { + C, D, E + }; + + union F { + int x; + char *y; + }; + + typedef struct { + int x; + } G; + + // refs + struct A *main(enum B b, void* g) { + union F *f = foo((struct G*) g); + switch (b) { + case C: + case D: + case E: + } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + A { + kind: "struct", + context: "struct §A§ {", + referenced in (1): [ + `struct §A§ *main(enum B b, void* g) {`, + ], + }, + B { + kind: "enum", + context: "enum §B§ {", + referenced in (1): [ + `struct A *main(enum §B§ b, void* g) {`, + ], + }, + C { + kind: "enumerator", + context: "§C§, D, E", + referenced in (1): [ + `case §C§:`, + ], + }, + D { + kind: "enumerator", + context: "C, §D§, E", + referenced in (1): [ + `case §D§:`, + ], + }, + E { + kind: "enumerator", + context: "C, D, §E§", + referenced in (1): [ + `case §E§:`, + ], + }, + F { + kind: "union", + context: "union §F§ {", + referenced in (1): [ + `union §F§ *f = foo((struct G*) g);`, + ], + }, + G { + kind: "typedef", + context: "} §G§;", + referenced in (1): [ + `union F *f = foo((struct §G§*) g);`, + ], + }, + main { + kind: "function", + context: "struct A *§main§(enum B b, void* g) {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [ + b { + kind: "variable", + context: "struct A *main(enum B §b§, void* g) {", + referenced in (1): [ + `switch (§b§) {`, + ], + }, + g { + kind: "variable", + context: "struct A *main(enum B b, void* §g§) {", + referenced in (1): [ + `union F *f = foo((struct G*) §g§);`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + f { + kind: "variable", + context: "union F *§f§ = foo((struct G*) g);", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn function_parameters() { + test_scopes( + "C", + r#" + void main(int argc, char **argv) {} + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "void §main§(int argc, char **argv) {}", + }, + ], + child scopes: [ + scope { + definitions: [ + argc { + kind: "variable", + context: "void main(int §argc§, char **argv) {}", + }, + argv { + kind: "variable", + context: "void main(int argc, char **§argv§) {}", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ); + } + + // const sizes in array declarations should be refs + #[test] + fn const_dimension_array_declaration_regression() { + test_scopes( + "C", + r#" + #define CLUSTER_SLOTS 16384 + + typedef struct clusterState { + clusterNode *migrating_slots_to[CLUSTER_SLOTS]; + } clusterState; + "# + .as_bytes(), + expect![[r##" + scope { + definitions: [ + CLUSTER_SLOTS { + kind: "macro", + context: "#define §CLUSTER_SLOTS§ 16384", + referenced in (1): [ + `clusterNode *migrating_slots_to[§CLUSTER_SLOTS§];`, + ], + }, + clusterState { + kind: "struct", + context: "typedef struct §clusterState§ {", + }, + clusterState { + kind: "typedef", + context: "} §clusterState§;", + }, + ], + child scopes: [ + scope { + definitions: [ + migrating_slots_to { + kind: "variable", + context: "clusterNode *§migrating_slots_to§[CLUSTER_SLOTS];", + }, + ], + child scopes: [], + }, + ], + } + "##]], + ) + } + + // handle params correctly + #[test] + fn unresolved_function_parameters() { + test_scopes( + "C", + r#" + sds getNewBaseFileNameAndMarkPreAsHistory(aofManifest *am) { + serverAssert(am != NULL); + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + getNewBaseFileNameAndMarkPreAsHistory { + kind: "function", + context: "sds §getNewBaseFileNameAndMarkPreAsHistory§(aofManifest *am) {", + }, + ], + child scopes: [ + scope { + definitions: [ + am { + kind: "variable", + context: "sds getNewBaseFileNameAndMarkPreAsHistory(aofManifest *§am§) {", + referenced in (1): [ + `serverAssert(§am§ != NULL);`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // rhs of declarators should be references and not defs + #[test] + fn declarator_rhs_is_reference() { + test_scopes( + "C", + r#" + void main(const char *pe) { + const char *end = pe + ind; + const char *curr = pe; + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "void §main§(const char *pe) {", + }, + ], + child scopes: [ + scope { + definitions: [ + pe { + kind: "variable", + context: "void main(const char *§pe§) {", + referenced in (2): [ + `const char *end = §pe§ + ind;`, + `const char *curr = §pe§;`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + end { + kind: "variable", + context: "const char *§end§ = pe + ind;", + }, + curr { + kind: "variable", + context: "const char *§curr§ = pe;", + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn function_prototype_vs_function_definition() { + test_scopes( + "C", + r#" + void *foo(int *am, int ip); + void *foo(int *am, int ip) { + *am + ip; + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + foo { + kind: "function", + context: "void *§foo§(int *am, int ip);", + }, + foo { + kind: "function", + context: "void *§foo§(int *am, int ip) {", + }, + ], + child scopes: [ + scope { + definitions: [ + am { + kind: "variable", + context: "void *foo(int *§am§, int ip);", + }, + ip { + kind: "variable", + context: "void *foo(int *am, int §ip§);", + }, + ], + child scopes: [], + }, + scope { + definitions: [ + am { + kind: "variable", + context: "void *foo(int *§am§, int ip) {", + referenced in (1): [ + `*§am§ + ip;`, + ], + }, + ip { + kind: "variable", + context: "void *foo(int *am, int §ip§) {", + referenced in (1): [ + `*am + §ip§;`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn type_refs_in_field_declarations() { + test_scopes( + "C", + r#" + typedef enum { + CONN_STATE_NONE = 0, + } ConnectionState; + + struct connection { + ConnectionState state; + }; + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + CONN_STATE_NONE { + kind: "enumerator", + context: "§CONN_STATE_NONE§ = 0,", + }, + ConnectionState { + kind: "typedef", + context: "} §ConnectionState§;", + referenced in (1): [ + `§ConnectionState§ state;`, + ], + }, + connection { + kind: "struct", + context: "struct §connection§ {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + } + "#]], + ) + } + + #[test] + fn bug_report_ternary_expression() { + test_scopes( + "C", + r#" + int main() { + int a, b, c; + a ? b : c; + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "int §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "int §a§, b, c;", + referenced in (1): [ + `§a§ ? b : c;`, + ], + }, + b { + kind: "variable", + context: "int a, §b§, c;", + referenced in (1): [ + `a ? §b§ : c;`, + ], + }, + c { + kind: "variable", + context: "int a, b, §c§;", + referenced in (1): [ + `a ? b : §c§;`, + ], + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } +} diff --git a/src/intelligence/language/c/scopes.scm b/src/intelligence/language/c/scopes.scm new file mode 100644 index 0000000..0c73f0b --- /dev/null +++ b/src/intelligence/language/c/scopes.scm @@ -0,0 +1,245 @@ +;; scopes + +;; blocks +(compound_statement) @local.scope +(for_statement) @local.scope +(case_statement) @local.scope +(field_declaration_list) @local.scope + +;; functions are finicky, the are of two forms: +;; +;; 1. "declaration" with a "function_declarator" descendant +;; 2. "function_definition" node + +;; 1. function prototypes +;; these further seem to appear in two forms: +;; +;; type ident(...); +;; // or +;; type *ident(...); +;; +(declaration + [(function_declarator) @local.scope + (pointer_declarator + (function_declarator) @local.scope)]) + +;; 2. function definitions +(function_definition) @local.scope + +;; similar logic applies to typedefs with functions +;; in them +(type_definition + (function_declarator) @local.scope) + + +;; defs + +;; #include +(preproc_include + [(system_lib_string) + (string_literal)] @local.definition.header) + +;; #define PI 355/113 +(preproc_def + (identifier) @local.definition.macro) + +;; #define AREA(r) PI * r * r +(preproc_function_def + (identifier) @local.definition.macro) + +;; a[SIZE] = {1, 2, ..} +(array_declarator + declarator: (identifier) @local.definition.variable) +(array_declarator + declarator: (field_identifier) @local.definition.variable) + +;; int (a) = 2; +(parenthesized_declarator + (identifier) @local.definition.variable) + +;; int *a = b; +(pointer_declarator + (identifier) @local.definition.variable) + +(declaration + (identifier) @local.definition.variable) + +(declaration + (init_declarator + declarator: (identifier) @local.definition.variable)) + +(parameter_declaration + (identifier) @local.definition.variable) + +;; structs +(struct_specifier + name: (type_identifier) @local.definition.struct + body: (_)) + +;; unions +(union_specifier + name: (type_identifier) @local.definition.union + body: (_)) + +;; enums +(enum_specifier + name: (type_identifier) @local.definition.enum + body: (_)) +(enumerator + name: (identifier) @local.definition.enumerator) + +;; typedef struct { int e; } X; +(type_definition + (type_identifier) @local.definition.typedef) + +;; function definition +(function_declarator + (identifier) @hoist.definition.function) + +;; labels +(labeled_statement + (statement_identifier) @local.definition.label) + + + +;; refs + +;; abc; +(expression_statement + (identifier) @local.reference) + +;; (abc) +(parenthesized_expression + (identifier) @local.reference) + +;; !z +(unary_expression + (identifier) @local.reference) + +;; a + b +(binary_expression + (identifier) @local.reference) + +;; a ? b : c +(conditional_expression + (identifier) @local.reference) + +;; ++a +(update_expression + (identifier) @local.reference) + +;; call(_, _, _) +(call_expression + (identifier) @local.reference) + +;; _(arg, arg, arg) +(argument_list + (identifier) @local.reference) + +;; field access +;; +;; three types of field access: +;; - a[b]: a and b are refs +;; - a.b +;; - a->b +;; +;; a[b] +(subscript_expression + (identifier) @local.reference) +;; a.b +;; a->b +(field_expression + . + (identifier) @local.reference) + +;; array[CONST] +;; ^^^^^ is a ref +(array_declarator + size: (identifier) @local.reference) + +;; comma operator +;; (a, a++, a <= 2) +(comma_expression + (identifier) @local.reference) + +;; ref and deref +(pointer_expression + (identifier) @local.reference) + +;; assignment expressions +(assignment_expression + (identifier) @local.reference) + +;; type refs in declarations +(declaration + type: + [(struct_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference]) + +;; type refs in field declarations +(field_declaration + type: + [(struct_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference]) + +;; type refs in return types +(function_definition + type: + [(struct_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference]) + +;; type refs in params +(parameter_declaration + type: + [(struct_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference]) + +;; type refs in casts +(cast_expression + type: + (type_descriptor + [(struct_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference])) + +;; type refs in casts + +;; rhs of a declaration +(init_declarator + value: (identifier) @local.reference) + +;; (void *) a; +(cast_expression + value: (identifier) @local.reference) + +;; (SomeStruct) { .field = ident } +(initializer_pair + (identifier) @local.reference) +(subscript_designator + (identifier) @local.reference) + +;; lists +(initializer_list + (identifier) @local.reference) + +;; return a; +(return_statement + (identifier) @local.reference) + +;; goto a; +(goto_statement + (statement_identifier) @local.reference) + +;; case ident: +;; stmt; +(case_statement + (identifier) @local.reference) diff --git a/src/intelligence/language/c_sharp/mod.rs b/src/intelligence/language/c_sharp/mod.rs new file mode 100644 index 0000000..0bb9f90 --- /dev/null +++ b/src/intelligence/language/c_sharp/mod.rs @@ -0,0 +1,239 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static C_SHARP: TSLanguageConfig = TSLanguageConfig { + language_ids: &["C#"], + file_extensions: &["cs"], + grammar: tree_sitter_c_sharp::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + (identifier) @hoverable + "#, + ), + namespaces: &[&[ + // variables, functions + "local", + // types + "class", + "struct", + "enum", + "typedef", + "interface", + "enumerator", + // methods + "method", + // namespaces + "namespace", + ]], +}; + +#[cfg(test)] +mod tests { + use crate::intelligence::language::test_utils::*; + + // tests the following constructs: + // + // - class declarations + #[test] + fn trivial() { + test_scopes( + "C#", + r#" + using System; + namespace HelloWorldApp { + class HelloWorld { + static void Main(string[] args) { + Console.WriteLine("Hello World!"); + } + } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + HelloWorldApp { + kind: "namespace", + context: "namespace §HelloWorldApp§ {", + }, + ], + child scopes: [ + scope { + definitions: [ + HelloWorld { + kind: "class", + context: "class §HelloWorld§ {", + }, + ], + child scopes: [ + scope { + definitions: [ + Main { + kind: "method", + context: "static void §Main§(string[] args) {", + }, + ], + child scopes: [ + scope { + definitions: [ + args { + kind: "local", + context: "static void Main(string[] §args§) {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn generics_and_type_constraints() { + test_scopes( + "C#", + r#" + namespace N { + public interface I1 { + public void F1() {} + public void F2() {} + } + + public class C1 where T: I1 where U: struct { + public string Prop1; + + public void M1(T t, U u) { + this.Prop1 = t; + } + } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + N { + kind: "namespace", + context: "namespace §N§ {", + }, + ], + child scopes: [ + scope { + definitions: [ + I1 { + kind: "interface", + context: "public interface §I1§ {", + referenced in (1): [ + `public class C1 where T: §I1§ where U: struct {`, + ], + }, + C1 { + kind: "class", + context: "public class §C1§ where T: I1 where U: struct {", + }, + ], + child scopes: [ + scope { + definitions: [ + F1 { + kind: "method", + context: "public void §F1§() {}", + }, + F2 { + kind: "method", + context: "public void §F2§() {}", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + scope { + definitions: [ + T { + kind: "typedef", + context: "public class C1<§T§, U> where T: I1 where U: struct {", + referenced in (2): [ + `public class C1 where §T§: I1 where U: struct {`, + `public void M1(§T§ t, U u) {`, + ], + }, + U { + kind: "typedef", + context: "public class C1 where T: I1 where U: struct {", + referenced in (2): [ + `public class C1 where T: I1 where §U§: struct {`, + `public void M1(T t, §U§ u) {`, + ], + }, + Prop1 { + kind: "local", + context: "public string §Prop1§;", + referenced in (1): [ + `this.§Prop1§ = t;`, + ], + }, + M1 { + kind: "method", + context: "public void §M1§(T t, U u) {", + }, + ], + child scopes: [ + scope { + definitions: [ + t { + kind: "local", + context: "public void M1(T §t§, U u) {", + referenced in (1): [ + `this.Prop1 = §t§;`, + ], + }, + u { + kind: "local", + context: "public void M1(T t, U §u§) {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } +} diff --git a/src/intelligence/language/c_sharp/scopes.scm b/src/intelligence/language/c_sharp/scopes.scm new file mode 100644 index 0000000..dcfd842 --- /dev/null +++ b/src/intelligence/language/c_sharp/scopes.scm @@ -0,0 +1,445 @@ +;; scopes +[ + (block) + (switch_expression_arm) + (anonymous_method_expression) + (lambda_expression) + + ;; functions + (local_function_statement) + (arrow_expression_clause) + + ;; switch statements + (switch_section) + (case_pattern_switch_label) + + ;; namespaces + (namespace_declaration) + + ;; class items + (class_declaration) + (constructor_declaration) + (destructor_declaration) + (indexer_declaration) + (method_declaration) + + ;; enum items + (enum_member_declaration_list) + + ;; interface items + (interface_declaration) + + ;; record items + (record_declaration) + (record_struct_declaration) + + ;; struct items + (struct_declaration) + + ;; catch statements + (catch_clause) + + ;; using statement: + ;; + ;; using (var a = b) { .. } + (using_statement) + + ;; fixed statement: + ;; + ;; fixed (var a = b) { .. } + (fixed_statement) + + ;; for (int i = 0; cond; step) { .. } + (for_statement) + + ;; foreach(int x in y) { .. } + (for_each_statement) +] @local.scope + +;; defs + +;; var declarations +(variable_declarator + (identifier) @local.definition.local) + +(declaration_expression + (identifier) @local.definition.local) + +;; namespaces +(namespace_declaration + (identifier) @hoist.definition.namespace) + +;; classes +;; +;; - class name +;; - type params +;; - constructors +;; - destructors + +(class_declaration + name: (identifier) @hoist.definition.class) +(constructor_declaration + name: (identifier) @hoist.definition.method) +(destructor_declaration + name: (identifier) @hoist.definition.method) +(method_declaration + name: (identifier) @hoist.definition.method) + +;; enums +(enum_declaration + (identifier) @local.definition.enum) +(enum_member_declaration + (identifier) @local.definition.enumerator) + +;; interfaces +(interface_declaration + name: (identifier) @hoist.definition.interface) + +;; records +;; +;; record F {} +(record_declaration + name: (identifier) @hoist.definition.class) +;; record struct F {} +(record_struct_declaration + name: (identifier) @hoist.definition.struct) + +;; structs +(struct_declaration + name: (identifier) @hoist.definition.struct) + +;; functions +(local_function_statement + name: (identifier) @hoist.definition.local) + + +;; patterns are defs +;; x is a +(constant_pattern + (identifier) @local.definition.local) + +;; x is var a +(var_pattern + (identifier) @local.definition.local) + +;; (x, y, z) = _ +(tuple_pattern + (identifier) @local.definition.local) + +;; x is var (x, y) +(parenthesized_variable_designation + (identifier) @local.definition.local) + +;; x is var a +(declaration_pattern + name: (identifier) @local.definition.local) + +;; params are defs +(parameter + name: (identifier) @local.definition.local) + +;; type params make defs +(type_parameter + (identifier) @local.definition.typedef) + +;; [params string[] args] +(bracketed_parameter_list + (identifier) @local.definition.local) + +;; lambda params +(lambda_expression + (modifier)* + . + (identifier) @local.definition.local) + +;; catch (Exception ex) {} +(catch_declaration + name: (identifier) @local.definition.local) + +;; foreach(Type x in y) { .. } +;; +;; `Type` is a ref +;; `x` is a def +;; `y` is a ref +(for_each_statement + left: (identifier) @local.definition.local) + +;; imports + +;; using System.Text +;; +;; `Text` is an import +(using_directive + . + (qualified_name + (_) + . + (identifier) @local.import)) + +;; using Named = System.Text; +;; +;; `Named` is a def +(using_directive + (name_equals + (identifier) @local.import)) + + +;; refs + +(binary_expression + (identifier) @local.reference) + +;; ternary expr +(conditional_expression + (identifier) @local.reference) + +;; a; +(expression_statement + (identifier) @local.reference) + +;; x is int +(is_expression + (identifier) @local.reference) +;; x is String +(is_pattern_expression + (identifier) @local.reference) + +;; ident as Type +(as_expression + (identifier) @local.reference) + +;; ++x +(prefix_unary_expression + (identifier) @local.reference) +;; x++ +(postfix_unary_expression + (identifier) @local.reference) + +;; a = b +(assignment_expression + (identifier) @local.reference) + +;; rhs of equal to signs +;; +;; _ = b +(equals_value_clause + (identifier) @local.reference) + +;; (Type)v +(cast_expression + type: (identifier) @local.reference) +(cast_expression + value: (identifier) @local.reference) + +;; a[] +(element_access_expression + (identifier) @local.reference) + +;; range exprs +(range_expression + (identifier) @local.reference) + +;; function or array args +(argument + (identifier) @local.reference) + +;; ident switch {} +(switch_expression + (identifier) @local.reference) +(switch_expression_arm + (identifier) @local.reference) + +;; checked(ident) +(checked_expression + (identifier) @local.reference) + +;; __makeref(ident) +(make_ref_expression + (identifier) @local.reference) + +;; __reftype(ident) +(ref_type_expression + (identifier) @local.reference) + +;; __refvalue(ident, type) +(ref_value_expression + (identifier) @local.reference) + +;; sizeof(type) +(size_of_expression + (identifier) @local.reference) + +;; typeof(ident) +(type_of_expression + (identifier) @local.reference) + +;; default(Type) +(default_expression + (identifier) @local.reference) + +;; new Obj {} +(object_creation_expression + (identifier) @local.reference) + +;; foo() +(invocation_expression + (identifier) @local.reference) + +;; A.b +(member_access_expression + . + (identifier) @local.reference) + +;; this.b +;; +;; we can resolve `b` here +(member_access_expression + (this_expression) + (identifier) @local.reference) + +;; return t +(return_statement + (identifier) @local.reference) + +;; await t +(await_expression + (identifier) @local.reference) + +;; throw t +(throw_expression + (identifier) @local.reference) + +;; lock (mutex) +(lock_statement + (identifier) @local.reference) + +;; lambda body +(lambda_expression + body: (identifier) @local.reference) + +;; new [] {a, b, c} +(initializer_expression + (identifier) @local.reference) + +;; b?.member +(conditional_access_expression + condition: (identifier) @local.reference) + +;; (a) +(parenthesized_expression + (identifier) @local.reference) + +;; A.b +(qualified_name + . + (identifier) @local.reference) + +;; $"Good morning {name}" +(interpolation + (identifier) @local.reference) + +;; record updates +;; +;; item with { field = new_field, } +(with_expression + (identifier) @local.reference) +(simple_assignment_expression + (identifier) + "=" + (identifier) @local.reference) + +;; while (ident) { .. } +(while_statement + (identifier) @local.reference) + +;; do { .. } while (ident) +(do_statement + (identifier) @local.reference) + +;; if (ident) { .. } +(if_statement + (identifier) @local.reference) + +;; switch (ident) { .. } +;; +;; different from switch_expression +(switch_statement + (identifier) @local.reference) +(when_clause + (identifier) @local.reference) + +;; foreach(Type x in y) { .. } +;; +;; `Type` is a ref +;; `x` is a def +;; `y` is a ref +(for_each_statement + type: (identifier) @local.reference) +(for_each_statement + right: (identifier) @local.reference) + + +;; type refs + +;; Type? +(nullable_type + (identifier) @local.reference) + +;; types in params +(parameter + type: (identifier) @local.reference) + +;; Type[] +(array_type + (identifier) @local.reference) +(array_rank_specifier + (identifier) @local.reference) + +;; tuple types +(tuple_element + type: (identifier) @local.reference) + +;; generics +(generic_name + (identifier) @local.reference) +(generic_name + (type_argument_list + (identifier) @local.reference)) + +;; type ref in pattern +(declaration_pattern + type: (identifier) @local.reference) + +;; catch decl +(catch_declaration + type: (identifier) @local.reference) + +;; type ref in object patterns +(recursive_pattern + (identifier) @local.reference) + +;; type patterns +(type_pattern + (identifier) @local.reference) + +;; type constraints +(type_parameter_constraints_clause + (identifier) @local.reference) +(type_constraint + (identifier) @local.reference) + +;; base types in enums & interfaces +;; +;; enum Direction: Type { +;; ... +;; } +(base_list + (identifier) @local.reference) + +(base_list + (primary_constructor_base_type + (identifier) @local.reference)) + +;; function return type +(local_function_statement + type: (identifier) @local.reference) diff --git a/src/intelligence/language/cobol/mod.rs b/src/intelligence/language/cobol/mod.rs new file mode 100644 index 0000000..f063fcf --- /dev/null +++ b/src/intelligence/language/cobol/mod.rs @@ -0,0 +1,16 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static COBOL: TSLanguageConfig = TSLanguageConfig { + language_ids: &["COBOL"], + file_extensions: &["cbl", "cpy", "cob", "ccp", "cobol"], + grammar: tree_sitter_COBOL::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r" + [(program_name) + (entry_name) + (WORD)] @hoverable + ", + ), + namespaces: &[&["program", "file", "data", "paragraph"]], +}; diff --git a/src/intelligence/language/cobol/scopes.scm b/src/intelligence/language/cobol/scopes.scm new file mode 100644 index 0000000..01da444 --- /dev/null +++ b/src/intelligence/language/cobol/scopes.scm @@ -0,0 +1,273 @@ +;; there are no scopes to a cobol program + +(program_name) @local.definition.program + +;; defs +(file_description + (file_description_entry + (WORD) @local.definition.file)) +(data_description + (entry_name) @local.definition.data) +(paragraph_header + name: (_) @local.definition.paragraph) + +;; refs +(select_statement + file_name: (_) @local.reference) +(select_statement + (assign_clause + to: (qualified_word + (WORD) @local.reference))) + +(copy_statement + book: (_) @local.reference) + +(record_key_clause + reference: (qualified_word + (WORD) @local.reference)) + +(file_status_clause + reference: (qualified_word + (WORD) @local.reference)) + +(read_statement + file_name: (_) @local.reference) + +(read_statement + into: (qualified_word + (WORD) @local.reference)) + +(release_statement + record: (qualified_word + (WORD) @local.reference)) + +(release_statement + from: (qualified_word + (WORD) @local.reference)) + +(return_statement + file_name: (WORD) @local.reference) + +(return_statement + into: (qualified_word + (WORD) @local.reference)) + +(rewrite_statement + record: (qualified_word + (WORD) @local.reference) + from: (qualified_word + (WORD) @local.reference)) + +(search_statement + table_name: (qualified_word + (WORD) @local.reference)) + +(search_statement + varying: (qualified_word + (WORD) @local.reference)) + +(set_statement + (set_environment + (qualified_word + (WORD) @local.reference))) + +(set_statement + (set_to + (qualified_word + (WORD) @local.reference))) + +(set_statement + (set_up_down + (qualified_word + (WORD) @local.reference))) + +(move_statement + (qualified_word + (WORD) @local.reference)) + +(perform_statement_call_proc + procedure: (perform_procedure + (label + (qualified_word + (WORD) @local.reference)))) + +(display_statement + (qualified_word + (WORD) @local.reference)) + +(accept_statement + (qualified_word + (WORD) @local.reference)) + +(add_statement + (qualified_word + (WORD) @local.reference)) + +(multiply_statement + (qualified_word + (WORD) @local.reference)) + +(subtract_statement + (qualified_word + (WORD) @local.reference)) + +(allocate_statement + x: (WORD) @local.reference) + +(allocate_statement + returning: (qualified_word + (WORD) @local.reference)) + +(alter_statement + (alter_option + proc_name: (qualified_word + (WORD) @local.reference))) + +(alter_statement + (alter_option + to: (qualified_word + (WORD) @local.reference))) + +(call_statement + x: (qualified_word + (WORD) @local.reference)) + +;; todo GIVING FOO returns +(call_statement + returning: (qualified_word + (WORD) @local.reference)) + +(call_param_arg + (qualified_word + (WORD) @local.reference)) + +(cancel_statement + (qualified_word + (WORD) @local.reference)) + +(close_statement + (close_arg + (WORD) @local.reference)) + +(delete_statement + file_name: (_) @local.reference) + +(divide_statement + x: (qualified_word + (WORD) @local.reference)) + +(goto_statement + to: (label + (qualified_word + (WORD) @local.reference))) + +(initialize_statement + (qualified_word + (WORD) @local.reference)) + +(inspect_statement + send: (qualified_word + (WORD) @local.reference)) + +(inspect_converting + (qualified_word + (WORD) @local.reference)) + +(inspect_tallying + (qualified_word + (WORD) @local.reference)) + +(inspect_replacing + (replacing_item + (replacing_region + (qualified_word + (WORD) @local.reference)))) + +(merge_statement + x: (qualified_word + (WORD) @local.reference)) + +(merge_statement + collating: (qualified_word + (WORD) @local.reference)) + +(merge_statement + output: (sort_output_giving + (WORD) @local.reference)) + +(merge_statement + output: (sort_output_procedure + (perform_procedure + (label + (qualified_word + (WORD) @local.reference))))) + +(start_statement + file_name: (WORD) @local.reference) + +(sort_key + key_list: (qualified_word + (WORD) @local.reference)) + +; todo call this key list +(start_key + keys: (qualified_word + (WORD) @local.reference)) + +(stop_statement + (qualified_word + (WORD) @local.reference)) + +(string_statement + (qualified_word + (WORD) @local.reference)) + +(unstring_statement + (qualified_word + (WORD) @local.reference)) + +(string_statement + (string_item + (qualified_word + (WORD) @local.reference))) + +(unstring_statement + (unstring_delimited + (unstring_delimited_item + (qualified_word + (WORD) @local.reference)))) + +(unstring_statement + (unstring_into_item + (qualified_word + (WORD) @local.reference))) + +(use_statement + (WORD) @local.reference) + +(use_statement + (qualified_word + (WORD) @local.reference)) + +(write_statement + (qualified_word + (WORD) @local.reference)) + +(use_statement + (label + (qualified_word + (WORD) @local.reference))) + +(open_statement + (open_arg + (WORD) @local.reference)) + +(expr + (qualified_word + (WORD) @local.reference)) + +(arithmetic_x + (qualified_word + (WORD) @local.reference)) + + diff --git a/src/intelligence/language/cpp/mod.rs b/src/intelligence/language/cpp/mod.rs new file mode 100644 index 0000000..1aed9dc --- /dev/null +++ b/src/intelligence/language/cpp/mod.rs @@ -0,0 +1,415 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static CPP: TSLanguageConfig = TSLanguageConfig { + language_ids: &["C++"], + file_extensions: &["cpp", "cc", "h"], + grammar: tree_sitter_cpp::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + [(identifier) + (field_identifier) + (type_identifier) + (statement_identifier) + (qualified_identifier) + (namespace_identifier)] @hoverable + "#, + ), + namespaces: &[&[ + // imports + "header", + // namespacing + "namespace", + // functions + "macro", + "function", + // types + "class", + "struct", + "enum", + "enumerator", + "union", + "typedef", + "concept", + // variables + "variable", + // misc. + "label", + "alias", + ]], +}; + +#[cfg(test)] +mod tests { + + use crate::intelligence::language::test_utils::*; + + // tests the following constructs: + // + // - templates on classes + // - classes + // - class props + #[test] + fn trivial() { + test_scopes( + "C++", + r#" + template + class AdvancedColumnFamilyOptions { + private: + const std::vector& options; + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + AdvancedColumnFamilyOptions { + kind: "class", + context: "class §AdvancedColumnFamilyOptions§ {", + }, + ], + child scopes: [ + scope { + definitions: [ + T { + kind: "typedef", + context: "template ", + referenced in (1): [ + `const std::vector<§T§>& options;`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + options { + kind: "variable", + context: "const std::vector& §options§;", + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // this syntax is not present in C + #[test] + fn for_range_loops() { + test_scopes( + "C++", + r#" + struct I { + }; + + int main() { + struct I *items = {}; + for(I item: items) { + print(item); + } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + I { + kind: "struct", + context: "struct §I§ {", + referenced in (2): [ + `struct §I§ *items = {};`, + `for(§I§ item: items) {`, + ], + }, + main { + kind: "function", + context: "int §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + items { + kind: "variable", + context: "struct I *§items§ = {};", + referenced in (1): [ + `for(I item: §items§) {`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + item { + kind: "variable", + context: "for(I §item§: items) {", + referenced in (1): [ + `print(§item§);`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + // the syntax nodes seem to have changed from the C impl. + #[test] + fn if_while_switch() { + test_scopes( + "C++", + r#" + int main() { + int a; + if (a == 0) {}; + switch (a) {}; + while (a) {}; + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "int §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "int §a§;", + referenced in (3): [ + `if (§a§ == 0) {};`, + `switch (§a§) {};`, + `while (§a§) {};`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn concepts() { + test_scopes( + "C++", + r#" + template concept C2 = + requires(T x) { + {*x} -> std::convertible_to; + {x + 1} -> std::same_as; + {x * 1} -> std::convertible_to; + }; + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + C2 { + kind: "concept", + context: "template concept §C2§ =", + }, + ], + child scopes: [ + scope { + definitions: [ + T { + kind: "typedef", + context: "template concept C2 =", + referenced in (3): [ + `requires(§T§ x) {`, + `{*x} -> std::convertible_to;`, + `{x * 1} -> std::convertible_to<§T§>;`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + x { + kind: "variable", + context: "requires(T §x§) {", + referenced in (3): [ + `{*§x§} -> std::convertible_to;`, + `{§x§ + 1} -> std::same_as;`, + `{§x§ * 1} -> std::convertible_to;`, + ], + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // variables in throw statements do not resolve, reported by @ggordonhall + #[test] + fn bug_report_throw_statement() { + test_scopes( + "C++", + r#" + int main() { + try + { } + catch (Exception ex) + { throw ex; } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "int §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [ + ex { + kind: "variable", + context: "catch (Exception §ex§)", + referenced in (1): [ + `{ throw §ex§; }`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + // ternary expressions do not resolve correctly, reported by @ggordonhall + #[test] + fn bug_report_ternary_expression() { + test_scopes( + "C++", + r#" + int main() { + int a, b, c; + a ? b : c; + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "int §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "int §a§, b, c;", + referenced in (1): [ + `§a§ ? b : c;`, + ], + }, + b { + kind: "variable", + context: "int a, §b§, c;", + referenced in (1): [ + `a ? §b§ : c;`, + ], + }, + c { + kind: "variable", + context: "int a, b, §c§;", + referenced in (1): [ + `a ? b : §c§;`, + ], + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } +} diff --git a/src/intelligence/language/cpp/scopes.scm b/src/intelligence/language/cpp/scopes.scm new file mode 100644 index 0000000..b89b8bd --- /dev/null +++ b/src/intelligence/language/cpp/scopes.scm @@ -0,0 +1,453 @@ +;; scopes + +;; blocks +(compound_statement) @local.scope +(for_statement) @local.scope +(for_range_loop) @local.scope +(case_statement) @local.scope +(field_declaration_list) @local.scope +(lambda_expression) @local.scope +(requires_expression) @local.scope +(namespace_definition) @local.scope + +;; functions are finicky, the are of two forms: +;; +;; 1. "declaration" with a "function_declarator" descendant +;; 2. "function_definition" node + +;; 1. function prototypes +;; these further seem to appear in two forms: +;; +;; type ident(...); +;; // or +;; type *ident(...); +;; +(declaration + [(function_declarator) @local.scope + (pointer_declarator + (function_declarator) @local.scope) + (reference_declarator + (function_declarator) @local.scope)]) + +;; 2. function definitions +(function_definition) @local.scope + +;; similar logic applies to typedefs with functions +;; in them +(type_definition + (function_declarator) @local.scope) + +;; catch blocks create scopes +(catch_clause) @local.scope + +;; templates create type variables +;; +;; FIXME: templates are limited to classes for +;; now this is because templates add a level +;; of nesting to all items except classes, which +;; puts items in smaller scopes than the ones they +;; are declared in. +(template_declaration + (class_specifier)) @local.scope +(template_declaration + (concept_definition)) @local.scope + + +;; defs + +;; #include +(preproc_include + [(system_lib_string) + (string_literal)] @local.definition.header) + +;; #define PI 355/113 +(preproc_def + (identifier) @local.definition.macro) + +;; #define AREA(r) PI * r * r +(preproc_function_def + (identifier) @local.definition.macro) + +;; a[SIZE] = {1, 2, ..} +(array_declarator + declarator: (identifier) @local.definition.variable) +(array_declarator + declarator: (field_identifier) @local.definition.variable) + +;; int (a) = 2; +(parenthesized_declarator + (identifier) @local.definition.variable) + +;; int *a = b; +(pointer_declarator + (identifier) @local.definition.variable) + +;; int &&a = _; +(reference_declarator + [(identifier) + (field_identifier)] @local.definition.variable) + +(declaration + (identifier) @local.definition.variable) + +(declaration + (init_declarator + declarator: (identifier) @local.definition.variable)) + +;; rhs patterns of a declaration +(structured_binding_declarator + (identifier) @local.definition.variable) + +(parameter_declaration + (identifier) @local.definition.variable) + +(optional_parameter_declaration + declarator: + (identifier) @local.definition.variable) + +(variadic_parameter_declaration + (variadic_declarator + (identifier) @local.definition.variable)) + +;; type params in templates +;; +;; FIXME: limited to classes for now +(template_declaration + (template_parameter_list + (type_parameter_declaration + (type_identifier) @local.definition.typedef)) + [(class_specifier) + (concept_definition)]) + +;; concepts +(concept_definition + name: (identifier) @hoist.definition.concept) + +;; namespaces +(namespace_definition + (identifier) @hoist.definition.namespace) + +;; for (int a: b) { .. } +;; +;; `a` is a def +(for_range_loop + declarator: (identifier) @local.definition.variable) + +;; structs +(struct_specifier + name: (type_identifier) @local.definition.struct + body: (_)) + +;; unions +(union_specifier + name: (type_identifier) @local.definition.union + body: (_)) + +;; enums +(enum_specifier + name: (type_identifier) @local.definition.enum + body: (_)) +(enumerator + name: (identifier) @local.definition.enumerator) + +;; classes +(class_specifier + name: (type_identifier) @hoist.definition.class + body: (_)) +;; class fields +(field_declaration + (field_identifier) @local.definition.variable) + +;; typedef struct { int e; } X; +(type_definition + (type_identifier) @local.definition.typedef) + +;; function definition +(function_declarator + (identifier) @hoist.definition.function) +;; methods +(function_declarator + (field_identifier) @hoist.definition.function) + +;; labels +(labeled_statement + (statement_identifier) @local.definition.label) + +;; using statements are declarations +(using_declaration + (identifier) @local.definition) + +;; using a::b; +(using_declaration + (qualified_identifier + name: (identifier) @local.definition)) + +;; using a = b; +(alias_declaration + name: (type_identifier) @local.definition.alias) + + + +;; refs + +;; abc; +(expression_statement + (identifier) @local.reference) + +;; (abc) +(parenthesized_expression + (identifier) @local.reference) + +;; a::b +(qualified_identifier + (namespace_identifier) @local.reference) + +;; !z +(unary_expression + (identifier) @local.reference) + +;; a + b +(binary_expression + (identifier) @local.reference) + +;; ++a +(update_expression + (identifier) @local.reference) + +;; a? b : c +(conditional_expression + (identifier) @local.reference) + +;; call(_, _, _) +(call_expression + (identifier) @local.reference) + +;; _(arg, arg, arg) +(argument_list + (identifier) @local.reference) + +;; field access +;; +;; three types of field access: +;; - a[b]: a and b are refs +;; - a.b +;; - a->b +;; +;; a[b] +(subscript_expression + (identifier) @local.reference) +;; a.b +;; a->b +(field_expression + . + (identifier) @local.reference) +;; if the ident is a destructor, we can +;; attempt to resolve it to its class +(field_expression + (_) + (destructor_name + (identifier) @local.reference)) + +;; array[CONST] +;; ^^^^^ is a ref +(array_declarator + size: (identifier) @local.reference) + +;; comma operator +;; (a, a++, a <= 2) +(comma_expression + (identifier) @local.reference) + +;; ref and deref +(pointer_expression + (identifier) @local.reference) + +;; assignment expressions +(assignment_expression + (identifier) @local.reference) + +;; for (Type a: b) {.. } +;; +;; `Type` is a ref +;; `b` is a ref +(for_range_loop + type: (type_identifier) @local.reference) +(for_range_loop + right: (identifier) @local.reference) + +(condition_clause + (identifier) @local.reference) + +;; rhs of a concept +(concept_definition + name: (_) + (identifier) @local.reference) + +;; type refs in declarations +(declaration + type: + [(struct_specifier (type_identifier) @local.reference) + (class_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference]) + +;; type refs in return types +(function_definition + type: + [(struct_specifier (type_identifier) @local.reference) + (class_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference]) + +;; type refs in params +(parameter_declaration + type: + [(struct_specifier (type_identifier) @local.reference) + (class_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference]) + +;; type refs in optional params +(optional_parameter_declaration + type: + [(struct_specifier (type_identifier) @local.reference) + (class_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference]) + +;; type refs in casts +(cast_expression + type: + (type_descriptor + [(struct_specifier (type_identifier) @local.reference) + (class_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference])) + +;; type refs in field declarations +(field_declaration + type: + [(struct_specifier (type_identifier) @local.reference) + (class_specifier (type_identifier) @local.reference) + (enum_specifier (type_identifier) @local.reference) + (union_specifier (type_identifier) @local.reference) + (type_identifier) @local.reference]) + +;; default value in rhs of field decls. +(field_declaration + (identifier) @local.reference) + +;; rhs of optional parameter decls. +(optional_parameter_declaration + default_value: + (identifier) @local.reference) + + +;; type refs in friend declarations +(friend_declaration + (type_identifier) @local.reference) + +;; rhs of a declaration +(init_declarator + value: (identifier) @local.reference) + +;; (void *) a; +(cast_expression + value: (identifier) @local.reference) + +;; (SomeStruct) { .field = ident } +(initializer_pair + (identifier) @local.reference) +(subscript_designator + (identifier) @local.reference) + +;; lists +(initializer_list + (identifier) @local.reference) + +;; return a; +(return_statement + (identifier) @local.reference) + +;; delete a; +(delete_expression + (identifier) @local.reference) + +;; new T; +(new_expression + (type_identifier) @local.reference) + +;; goto a; +(goto_statement + (statement_identifier) @local.reference) + +;; co_await var; +(co_await_expression + (identifier) @local.reference) + +;; throw ex; +(throw_statement + (identifier) @local.reference) + +;; (a + ... + b) +(fold_expression + (identifier) @local.reference) + +(lambda_capture_specifier + (identifier) @local.reference) + +;; case ident: +;; stmt; +(case_statement + (identifier) @local.reference) + +;; inherited classes are refs +(base_class_clause + (type_identifier) @local.reference) + +;; base types in enums are refs +(enum_specifier + base: (type_identifier) @local.reference) + +;; Type{} +(compound_literal_expression + (type_identifier) @local.reference) + +;; operator Type() T; +(operator_cast + (type_identifier) @local.reference) + +;; template types +;; T<_> +(template_type + (type_identifier) @local.reference) + +;; V<_>() +(template_function + (identifier) @local.reference) + +;; _ +(template_argument_list + (type_descriptor + (type_identifier) @local.reference)) + +;; variadic in type descriptors +(parameter_pack_expansion + (type_descriptor + (type_identifier) @local.reference)) + +;; a.template F<_>() +(template_method + (field_identifier) @local.reference) + +;; type constraints +(compound_requirement + (identifier) @local.reference) +(trailing_return_type + (type_descriptor + (type_identifier) @local.reference)) diff --git a/src/intelligence/language/go/mod.rs b/src/intelligence/language/go/mod.rs new file mode 100644 index 0000000..c06d18f --- /dev/null +++ b/src/intelligence/language/go/mod.rs @@ -0,0 +1,746 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static GO: TSLanguageConfig = TSLanguageConfig { + language_ids: &["Go"], + file_extensions: &["go"], + grammar: tree_sitter_go::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + [(identifier) + (type_identifier) + (package_identifier) + (field_identifier)] @hoverable + "#, + ), + namespaces: &[ + // variables + &["const", "var", "func", "module"], + // types + &["struct", "interface", "type"], + // misc. + &["member"], + &["label"], + ], +}; + +#[cfg(test)] +mod tests { + use crate::intelligence::language::test_utils::*; + + #[test] + fn declare_const_no_type() { + let src = r#" + const one = 1 + const two, three = 2, 3 + "#; + + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 3); + } + + #[test] + fn declare_const_with_type() { + let src = r#" + const one uint64 = 1 + const two, three uint64 = 2, 3 + "#; + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 3); + } + + #[test] + fn declare_const_grouped() { + let src = r#" + const ( + zero = 0 + one = 1 + ) + "#; + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 2); + } + + #[test] + fn declare_const_implicit_value() { + let src = r#" + const ( + zero = iota + one + ) + "#; + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 2); + } + + #[test] + fn declare_var_no_type() { + let src = r#" + package main + + var zero = 0 + var one, two = 1, 2 + var three, four, five = 3, 4, 5 + "#; + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 6); + } + + #[test] + fn declare_var_with_types() { + let src = r#" + package main + + var zero uint64 = 0 + var one, two uint64 = 1, 2 + "#; + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 3); + } + + #[test] + fn declare_var_grouped() { + let src = r#" + package main + + var ( + zero = 0 + one = 1 + ) + "#; + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 2); + } + + #[test] + fn declare_short_var() { + let src = r#" + func main() { + x := 2 + res, err := f() + } + "#; + + // main, x, res, err + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 4); + } + + #[test] + fn declare_func() { + let src = r#" + package main + + func f1() {} + func f2() int {} + func f3() (File, Thing) {} + func f4(result int, err error) {} // declares result, err + func f5(x ... uint64, y ... uint64) {} // declares x, y + "#; + let (_, d, _, _) = counts(src, "Go"); + + // f1, f2, f3, f4, f5, result, err, x, y + assert_eq!(d, 9); + } + + #[test] + fn declare_type() { + let src = r#" + package main + + type a uint64 + type ( + b uint64 + c uint64 + ) + type s struct {} + type i interface {} + "#; + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 5); + } + + #[test] + fn declare_type_grouped() { + let src = r#" + package main + + type ( + a uint64 + b uint64 + ) + "#; + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 2); + } + + #[test] + fn declare_loop_label() { + let src = r#" + func main() { + loop: for ;; { + break loop + } + } + "#; + + // main, loop + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 2); + } + + #[test] + fn declare_func_literal() { + let src = r#" + func main() { + const t := func () {} + } + "#; + + // main, t + let (_, d, _, _) = counts(src, "Go"); + assert_eq!(d, 2); + } + + #[test] + fn refer_binary_expr() { + let src = r#" + const a = 2 + const b = 2 + const _ = a + b + const _ = a * b + const _ = a << b + "#; + + // 3 refs to a, 3 refs to b + let (_, _, r, _) = counts(src, "Go"); + assert_eq!(r, 6); + } + + #[test] + fn refer_func_call() { + let src = r#" + func a() { + b() + } + func b() { + a() + } + "#; + + let (_, _, r, _) = counts(src, "Go"); + assert_eq!(r, 2); + } + + #[test] + fn refer_array_index() { + let src = r#" + func main() { + a := [3] int{1, 2, 3} + a[0] = 3 + a[2] = 1 + } + "#; + + let (_, _, r, _) = counts(src, "Go"); + assert_eq!(r, 2); + } + + #[test] + fn refer_slice_expr() { + let src = r#" + func main() { + a := [3] int{1, 2, 3} + b := a[0:3] + } + "#; + + let (_, _, r, _) = counts(src, "Go"); + assert_eq!(r, 1); + } + + #[test] + fn refer_parenthesized_expr() { + let src = r#" + func main() { + a := 2 + (a) + } + "#; + + let (_, _, r, _) = counts(src, "Go"); + assert_eq!(r, 1); + } + + #[test] + fn refer_selector_expr() { + let src = r#" + type person struct { + name string + age int + } + func main() { + p := person{ "bob", 20 }; + p.age = 42 + } + "#; + + let (_, _, r, _) = counts(src, "Go"); + + // p (variable ref), person (type ref) + assert_eq!(r, 2); + } + + #[test] + fn refer_type_assert_expr() { + let src = r#" + func main() { + a := 3 + a.(uint64) + } + "#; + + let (_, _, r, _) = counts(src, "Go"); + assert_eq!(r, 1); + } + + #[test] + fn refer_unary_expr() { + let src = r#" + func main() { + a := 2 + !a + } + "#; + + let (_, _, r, _) = counts(src, "Go"); + assert_eq!(r, 1); + } + + #[test] + fn refer_statements() { + let src = r#" + func main() { + a := 3 + + a++ + a-- + a = 3 + + // control flow + if a { } + switch a { } + defer a + go a + return a + + label: + continue label + break label + } + "#; + + let (_, _, r, _) = counts(src, "Go"); + assert_eq!(r, 10); + } + + #[test] + fn no_ref() { + let src = r#" + func f1() { + a := 3 + } + func f2() { + return a + } + func f3() {} + "#; + let (_, d, r, _) = counts(src, "Go"); + + // f1, f1::a, f2, f3 + assert_eq!(d, 4); + + // `a` in f2 found no defs, and is dropped from the graph + assert_eq!(r, 0); + } + + #[test] + fn symbol_consts() { + let src = r#" + package main + + const one uint64 = 1 + const ( + two = 2 + three = 2 + ) + + func four() {} + + var five = 3 + + func six() { + seven: for ;; {} + } + + type eight struct { + nine string + ten uint64 + } + + type eleven interface {} + "#; + assert_eq_defs( + src.as_bytes(), + "Go", + vec![ + ("one", "const"), + ("two", "const"), + ("three", "const"), + ("four", "func"), + ("five", "var"), + ("five", "var"), + ("six", "func"), + ("seven", "label"), + ("eight", "struct"), + ("nine", "member"), + ("ten", "member"), + ("eleven", "interface"), + ], + ) + } + + #[test] + fn scoping_rules() { + test_scopes( + "Go", + r#" + func main() { + var args = os.Args; + var length = len(args); + fmt.Printf("%d", l); + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "func", + context: "func §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + args { + kind: "var", + context: "var §args§ = os.Args;", + referenced in (1): [ + `var length = len(§args§);`, + ], + }, + length { + kind: "var", + context: "var §length§ = len(args);", + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn function_params() { + test_scopes( + "Go", + r#" + func main(t string, u string) { + v := 0 + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "func", + context: "func §main§(t string, u string) {", + }, + ], + child scopes: [ + scope { + definitions: [ + t { + kind: "var", + context: "func main(§t§ string, u string) {", + }, + u { + kind: "var", + context: "func main(t string, §u§ string) {", + }, + ], + child scopes: [ + scope { + definitions: [ + v { + kind: "var", + context: "§v§ := 0", + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // types and variables belong to different namespaces, preventing + // items in the variable position to resolve to typedefs, and vice-versa + #[test] + fn namespacing_of_types_and_variables() { + test_scopes( + "Go", + r#" + type repoFilters struct { + topics []string + } + + func (repoFilters repoFilters) { + repoFilters + 1 + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + repoFilters { + kind: "struct", + context: "type §repoFilters§ struct {", + referenced in (1): [ + `func (repoFilters §repoFilters§) {`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + topics { + kind: "member", + context: "§topics§ []string", + }, + ], + child scopes: [], + }, + ], + }, + scope { + definitions: [ + repoFilters { + kind: "var", + context: "func (§repoFilters§ repoFilters) {", + referenced in (1): [ + `§repoFilters§ + 1`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // modules and variables are in the same namespace, allowing + // things like module.Type to resolve correctly + #[test] + fn namespacing_of_modules_and_variables() { + test_scopes( + "Go", + r#" + import x "github.com/golang/go/x" + + var t x.Type := 2 + + t++ + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + t { + kind: "var", + context: "var §t§ x.Type := 2", + referenced in (1): [ + `§t§++`, + ], + }, + ], + imports: [ + x { + context: "import §x§ \"github.com/golang/go/x\"", + referenced in (1): [ + `var t §x§.Type := 2`, + ], + }, + ], + child scopes: [], + } + "#]], + ) + } + + // labels can only be referred to in break and continue statements + #[test] + fn namespacing_of_labels() { + test_scopes( + "Go", + r#" + func main() { + const OUTER = 2 + + OUTER: + for { + continue OUTER + } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "func", + context: "func §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + OUTER { + kind: "const", + context: "const §OUTER§ = 2", + }, + OUTER { + kind: "label", + context: "§OUTER§:", + referenced in (1): [ + `continue §OUTER§`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + // bug report: https://www.notion.so/teambloop/Go-Bug-2a82ef59b72548f2ad51bac1ddad62b6 + #[test] + fn bug_report_type_def_slice_type() { + test_scopes( + "Go", + r#" + type runeOffsetMap []runeOffsetCorrection + + func makeRuneOffsetMap(off []uint32) runeOffsetMap { + + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + runeOffsetMap { + kind: "type", + context: "type §runeOffsetMap§ []runeOffsetCorrection", + referenced in (1): [ + `func makeRuneOffsetMap(off []uint32) §runeOffsetMap§ {`, + ], + }, + makeRuneOffsetMap { + kind: "func", + context: "func §makeRuneOffsetMap§(off []uint32) runeOffsetMap {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [ + off { + kind: "var", + context: "func makeRuneOffsetMap(§off§ []uint32) runeOffsetMap {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // bug report: https://www.notion.so/teambloop/Go-Bug-2a82ef59b72548f2ad51bac1ddad62b6 + #[test] + fn bug_report_rhs_declaration() { + test_scopes( + "Go", + r#" + // this used to create 2 definitions: x, y + x := y + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + x { + kind: "var", + context: "§x§ := y", + }, + ], + child scopes: [], + } + "#]], + ) + } +} diff --git a/src/intelligence/language/go/scopes.scm b/src/intelligence/language/go/scopes.scm new file mode 100644 index 0000000..0119e49 --- /dev/null +++ b/src/intelligence/language/go/scopes.scm @@ -0,0 +1,397 @@ +;; scopes + +;; function declarations create a scope for +;; their args, and another for their bodies +;; +;; func f(x uint64, y uint64) { +;; var z = 2 +;; } +;; +;; should resolve to: +;; +;; scope: { +;; defs: f +;; scope: { +;; defs: x, y +;; scope: { +;; defs: z +;; } +;; } +;; } +;; +(function_declaration) @local.scope +(method_declaration) @local.scope +(func_literal) @local.scope +(field_declaration_list) @local.scope +(type_switch_statement) @local.scope +(type_declaration) @local.scope + +(block) @local.scope + +;; select statements with assignments seem +;; to create scopes, without using blocks +;; +;; select { +;; case x := <- channel: // creates a scope and defines `x` +;; doThing(x) +;; case y := <- channel: +;; doThing(y) +;; } +(communication_case) @local.scope + + +;; defs + +;; const x = ... +(const_declaration + (const_spec + (identifier) @local.definition.const)) + +;; var x = ... +(var_declaration + (var_spec + (identifier) @local.definition.var)) + +;; x := ... +(short_var_declaration + left: + (expression_list + (identifier) @local.definition.var)) + +;; func x() { ... } +(function_declaration + name: (identifier) @hoist.definition.func) + +;; func (s S) x() { ... } +(method_declaration + name: (field_identifier) @hoist.definition.func) + +;; type a struct { ... } +(type_declaration + (type_spec + (type_identifier) @hoist.definition.struct + (struct_type))) + +;; type a interface { ... } +(type_declaration + (type_spec + (type_identifier) @hoist.definition.interface + (interface_type))) + +;; interface methods +(method_spec + (field_identifier) @local.definition.func) + +;; type a b +;; all other type defs +(type_declaration + (type_spec + (type_identifier) @hoist.definition.type + [(array_type) + (channel_type) + (function_type) + (map_type) + (pointer_type) + (qualified_type) + (slice_type) + (type_identifier) @local.reference.type])) + +;; type parameters +(type_parameter_list + (parameter_declaration + (type_identifier) @local.definition.type)) + +;; type alias lhs +(type_alias + . + (type_identifier) @local.definition.type) + +;; type _ struct { +;; x T +;; } +(field_declaration_list + (field_declaration + (field_identifier) @local.definition.member)) + +;; func _(x) +(function_declaration + parameters: + (parameter_list + (parameter_declaration + (identifier) @local.definition.var))) + +;; method params +(method_declaration + receiver: + (parameter_list + (parameter_declaration + (identifier) @local.definition.var))) + +(method_declaration + parameters: + (parameter_list + (parameter_declaration + (identifier) @local.definition.var))) + +;; variadic params +;; func _(x ... T) +(parameter_list + (variadic_parameter_declaration + (identifier) @local.definition.var)) + +;; function literal syntax +;; const _ = func(x) {} +(func_literal + (parameter_list + (parameter_declaration + (identifier) @local.definition.var))) + +;; loop: for i := ... +(labeled_statement + (label_name) @local.definition.label) + +;; imports +(import_spec + (package_identifier) @local.import) + +;; switch t := q.(type) +(type_switch_statement + (expression_list + (identifier) @local.definition.var)) + +;; select { +;; case x := <- c +;; } +;; +;; beats me why this is different from +;; short_var_declaration :shrug: +(receive_statement + left: + (expression_list + (identifier) @local.definition.var)) + +;; for range +;; +;; for i, e := range +;; +;; `i` and `e` are def +(for_statement + (range_clause + (expression_list + (identifier) @local.definition.var))) + + +;; refs + +;; a op b +(binary_expression + (identifier) @local.reference.var) + +;; x() +(call_expression + (identifier) @local.reference.var) + +;; x(ident, ident) +;; +;; arguments to a call expression also create references +(call_expression + (argument_list + (identifier) @local.reference.var)) + +;; x[_] +(index_expression + (identifier) @local.reference.var) + +;; (x) +(parenthesized_expression + (identifier) @local.reference.var) + +;; x.b +(selector_expression + . (identifier) @local.reference) + +;; x[y:z] +(slice_expression + (identifier) @local.reference.var) + +;; a.(Type) +(type_assertion_expression + (identifier) @local.reference.var) +(type_assertion_expression + (type_identifier) @local.reference.type) + +;; Type(x) +;; +;; some type conversions are equivalent +;; to call expressions, the grammar lacks +;; info to distinguish among them +(type_conversion_expression + (identifier) @local.reference) + +;; !a +(unary_expression + (identifier) @local.reference.var) + +;; x <- item +(send_statement + (identifier) @local.reference.var) + +;; x := <- c +(send_statement + (identifier) @local.reference.var) + +;; x++ +(inc_statement + (identifier) @local.reference.var) + +;; x-- +(dec_statement + (identifier) @local.reference.var) + +;; assignment +;; a = 2 +(assignment_statement + (expression_list + (identifier) @local.reference.var)) + +;; if x { .. } +(if_statement + (identifier) @local.reference.var) + +;; switch x { .. } +(expression_switch_statement + (identifier) @local.reference.var) + +;; typed-switch +(type_switch_statement + (identifier) @local.reference.var) + +;; defer x +(defer_statement + (identifier) @local.reference.var) + +;; go x +(go_statement + (identifier) @local.reference.var) + +;; return x +(return_statement + (expression_list + (identifier) @local.reference.var)) + +;; break x +(break_statement + (label_name) @local.reference.label) + +;; continue x +(continue_statement + (label_name) @local.reference.label) + +;; for range +;; +;; `i` and `e` are def +(for_statement + (range_clause + right: (identifier) @local.reference.var)) + +;; return parameter list +(function_declaration + result: + (parameter_list + (parameter_declaration + (identifier) @local.reference.var))) +(method_declaration + result: + (parameter_list + (parameter_declaration + (identifier) @local.reference.var))) +(method_spec + result: + (type_identifier) @local.definition.type) + +;; struct literals +(literal_value + [ + ;; field: value + (keyed_element + (literal_element + (identifier) @local.reference.var)) + + ;; value + (literal_element + (identifier) @local.reference.var) + ]) + +;; _ := y +(short_var_declaration + right: + (expression_list + (identifier) @local.reference.var)) + + +;; type refs + +;; func _(var type) +(parameter_list + (parameter_declaration + type: + (type_identifier) @local.reference.type)) + +;; return type ident +;; +;; func _(..) type {} +(function_declaration + result: (type_identifier) @local.reference.type) +(method_declaration + result: (type_identifier) @local.reference.type) + +;; func (var... type) +(variadic_parameter_declaration + type: (type_identifier) @local.reference.type) + +;; *T +(pointer_type + (type_identifier) @local.reference.type) + +;; []T +(slice_type + (type_identifier) @local.reference.type) + +;; map type +(map_type + (type_identifier) @local.reference.type) + +;; chan <- Type +(channel_type + (type_identifier) @local.reference.type) + +;; var x type; const x type +(var_spec + (type_identifier) @local.reference.type) +(const_spec + (type_identifier) @local.reference.type) + +;; module.Type +(qualified_type + (package_identifier) @local.reference.module) + +;; struct literal +(composite_literal + (type_identifier) @local.reference.type) + +;; type constraints +(constraint_term + (type_identifier) @local.reference.type) + +;; type alias rhs +(type_alias + (_) + (type_identifier) @local.reference.type) + +;; generic type +;; Type[T, U] +(generic_type + (type_identifier) @local.reference.type) +(type_arguments + (type_identifier) @local.reference.type) + diff --git a/src/intelligence/language/java/mod.rs b/src/intelligence/language/java/mod.rs new file mode 100644 index 0000000..557077a --- /dev/null +++ b/src/intelligence/language/java/mod.rs @@ -0,0 +1,313 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static JAVA: TSLanguageConfig = TSLanguageConfig { + language_ids: &["Java"], + file_extensions: &["java"], + grammar: tree_sitter_java::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + [(identifier) + (type_identifier)] @hoverable + "#, + ), + namespaces: &[&[ + // variables + "local", + // functions + "method", + // namespacing, modules + "package", + "module", + // types + "class", + "enum", + "enumConstant", + "record", + "interface", + "typedef", + // misc. + "label", + ]], +}; + +#[cfg(test)] +mod tests { + use crate::intelligence::language::test_utils::*; + + // tests the following constructs: + // + // - class declarations + // - method declarations + // - formal parameters + // - method invocations + // - array access + #[test] + fn trivial() { + test_scopes( + "Java", + r#" + class HelloWorld { + public static void main(string[] args) { + System.Out.Println("Hello " + args[0]); + } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + HelloWorld { + kind: "class", + context: "class §HelloWorld§ {", + }, + ], + child scopes: [ + scope { + definitions: [ + main { + kind: "method", + context: "public static void §main§(string[] args) {", + }, + ], + child scopes: [ + scope { + definitions: [ + args { + kind: "local", + context: "public static void main(string[] §args§) {", + referenced in (1): [ + `System.Out.Println("Hello " + §args§[0]);`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + // tests the following constructs: + // + // - class declarations + // - interface declarations + // - super classes + // - interfaces implementations + // - generics + #[test] + fn classes_interfaces_generics() { + test_scopes( + "Java", + r#" + public class C1 {} + public class C2 {} + + public interface I1 {} + public interface I2 {} + + public class C3 extends C2 implements I1, I2 { + private T element; + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + C1 { + kind: "class", + context: "public class §C1§ {}", + referenced in (1): [ + `public class C3 extends C2 implements I1, I2 {`, + ], + }, + C2 { + kind: "class", + context: "public class §C2§ {}", + referenced in (1): [ + `public class C3 extends §C2§ implements I1, I2 {`, + ], + }, + I1 { + kind: "interface", + context: "public interface §I1§ {}", + referenced in (1): [ + `public class C3 extends C2 implements §I1§, I2 {`, + ], + }, + I2 { + kind: "interface", + context: "public interface §I2§ {}", + referenced in (1): [ + `public class C3 extends C2 implements I1, §I2§ {`, + ], + }, + C3 { + kind: "class", + context: "public class §C3§ extends C2 implements I1, I2 {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [ + T { + kind: "typedef", + context: "public class C3<§T§ extends C1> extends C2 implements I1, I2 {", + referenced in (1): [ + `private §T§ element;`, + ], + }, + element { + kind: "local", + context: "private T §element§;", + }, + ], + child scopes: [], + }, + ], + } + "#]], + ) + } + + // `this` is specially handled: + // + // - `this.member` raises `member` as a reference + // - `this.method()` raises `method` as a reference + #[test] + fn this_keyword() { + test_scopes( + "Java", + r#" + public class Adder { + private int a; + private int b; + + Adder(int first, int second) { + this.a = first; + this.b = second; + } + + private int add_helper() { + return this.a + this.b; + } + + public int add() { + return this.add_helper(); + } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + Adder { + kind: "class", + context: "public class §Adder§ {", + }, + ], + child scopes: [ + scope { + definitions: [ + a { + kind: "local", + context: "private int §a§;", + referenced in (2): [ + `this.§a§ = first;`, + `return this.§a§ + this.b;`, + ], + }, + b { + kind: "local", + context: "private int §b§;", + referenced in (2): [ + `this.§b§ = second;`, + `return this.a + this.§b§;`, + ], + }, + Adder { + kind: "method", + context: "§Adder§(int first, int second) {", + }, + add_helper { + kind: "method", + context: "private int §add_helper§() {", + referenced in (1): [ + `return this.§add_helper§();`, + ], + }, + add { + kind: "method", + context: "public int §add§() {", + }, + ], + child scopes: [ + scope { + definitions: [ + first { + kind: "local", + context: "Adder(int §first§, int second) {", + referenced in (1): [ + `this.a = §first§;`, + ], + }, + second { + kind: "local", + context: "Adder(int first, int §second§) {", + referenced in (1): [ + `this.b = §second§;`, + ], + }, + ], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } +} diff --git a/src/intelligence/language/java/scopes.scm b/src/intelligence/language/java/scopes.scm new file mode 100644 index 0000000..34598c2 --- /dev/null +++ b/src/intelligence/language/java/scopes.scm @@ -0,0 +1,400 @@ +;; scopes + +[ + (block) + + ;; class items + (class_declaration) + (method_declaration) + (constructor_declaration) + + ;; interface items + (interface_declaration) + ;; alternate syntax + (annotation_type_declaration . "@interface") + + ;; enums + (enum_declaration) + + ;; records + (record_declaration) + + ;; modules + (module_declaration) + + ;; switch + ;; + ;; traditional switch case block + (switch_block_statement_group) + ;; functional switch case block + (switch_rule) + + ;; try-catch + (try_with_resources_statement) + (catch_clause) + +] @local.scope + + +;; defs + +;; int a = ..; +(variable_declarator + name: (identifier) @local.definition.local) + +;; package ident; +(package_declaration + (identifier) @local.definition.package) + +;; module com.foo { .. } +;; +;; defines `foo` as a module +(module_declaration + name: (identifier) @hoist.definition.module) +(module_declaration + name: + (scoped_identifier + (_) + (identifier) @hoist.definition.module)) + + +;; class Main { .. } +(class_declaration + (identifier) @hoist.definition.class) + +;; class Main { .. } +(type_parameters + (type_parameter + (type_identifier) @local.definition.typedef)) + +;; methods +(method_declaration + name: (identifier) @hoist.definition.method) + +;; constructors +(constructor_declaration + (identifier) @hoist.definition.method) + +;; interface Iface { .. } +(interface_declaration + (identifier) @hoist.definition.interface) + +;; alternate iface declaration syntax +;; +;; @interface Foo { .. } +(annotation_type_declaration + "@interface" + (identifier) @hoist.definition.interface) + +;; enums +(enum_declaration + name: (identifier) @hoist.definition.enum) +;; enum variants +(enum_constant + (identifier) @local.definition.enumConstant) + +;; records +(record_declaration + name: (identifier) @hoist.definition.record) + +;; for (Type item: iterator) { .. } +;; +;; `item` is a def +(enhanced_for_statement + name: (identifier) @local.definition.local) + +;; pattern matching creates defs +(instanceof_expression + . + (identifier) + (identifier)* @local.definition.local) + +;; param list with types +(formal_parameters + (formal_parameter + (identifier) @local.definition.local)) +;; param list without types +(inferred_parameters + (identifier) @local.definition.local) + +;; catch declaration +(catch_formal_parameter + (identifier) @local.definition.local) + +;; try-resource declaration +(resource + name: (identifier) @local.definition.local) + +;; singluar lambda param +;; +;; arg -> body; +(lambda_expression + parameters: (identifier) @local.definition.local) + +;; imports +;; +;; import item; +;; ^^^^ is an import +(import_declaration + (identifier) @local.import) + +;; import java.util.Vector; +;; ^^^^^^ is an import +(import_declaration + (scoped_identifier + (_) + (identifier) @local.import)) + +;; labels +(labeled_statement + (identifier) @local.definition.label) + + +;; refs + +;; a; +(expression_statement + (identifier) @local.reference) + +;; a op b +(binary_expression + (identifier) @local.reference) + +;; !a +(unary_expression + (identifier) @local.reference) + +;; rhs of a decl. is a ref +;; +;; int _ = b; +(variable_declarator + value: (identifier) @local.reference) + +;; a.b +(field_access + . + (identifier) @local.reference) + +;; this.field +(field_access + (this) + (identifier) @local.reference) + +;; a = b; +;; +;; both `a` and `b` are refs +(assignment_expression + (identifier) @local.reference) + +;; a instanceOf pattern; +;; +;; the first ident is a ref +;; subsequent idents should be defs +(instanceof_expression + . + (identifier) @local.reference) + +;; (a) +(parenthesized_expression + (identifier) @local.reference) + +;; a() +(method_invocation + . + (identifier) @local.reference) + +;; this.b(); +(method_invocation + (this) + (identifier) @local.reference) + +;; class::method +(method_reference + . + (identifier) @local.reference) + +;; _(x, y, z) +(argument_list + (identifier) @local.reference) + +;; a ? b : c +(ternary_expression + (identifier) @local.reference) + +;; i++ +(update_expression + (identifier) @local.reference) + +;; a[b] +(array_access + (identifier) @local.reference) + +;; (T)ident; +(cast_expression + (identifier) @local.reference) +(cast_expression + (type_identifier) @local.reference) + +;; {a, b, c} +(array_initializer + (identifier) @local.reference) + +;; new Object(); +;; ^^^^^^ +(object_creation_expression + (type_identifier) @local.reference) +;; Foo.new Object(); +;; ^^^ +(object_creation_expression + (identifier) @local.reference) + +;; for (Type item: iterator) { .. } +;; +;; `iterator` is a ref +;; `Type` is a ref +(enhanced_for_statement + value: (identifier) @local.reference) + +;; return ident; +(return_statement + (identifier) @local.reference) + +;; assert ident; +(assert_statement + (identifier) @local.reference) + +;; break label; +(break_statement + (identifier) @local.reference) + +;; continue label; +(continue_statement + (identifier) @local.reference) + +;; yield item; +(yield_statement + (identifier) @local.reference) + +;; lambda body +(lambda_expression + body: (identifier) @local.reference) + +;; annotations +;; +;; @Documented class C { .. } +(annotation (identifier) @local.reference) +(marker_annotation (identifier) @local.reference) + +;; case-patterns +;; +;; case IDENT -> { .. } +(switch_label + (identifier) @local.reference) + +;; try-resource rhs +(resource + value: (identifier) @local.reference) + +;; uses com.foo.item; +(uses_module_directive . "uses" . (_) @local.reference) +;; requires com.foo.item; +(requires_module_directive . "requires" . (_) @local.reference) +;; exports com.foo.submodule; +(exports_module_directive . "exports" . (_) @local.reference) +;; opens com.foo.item to some, other, modules; +(opens_module_directive . "opens" + . (_) @local.reference) +;; provides com.foo.item with com.bar.item +(provides_module_directive . "provides" + . (_) @local.reference + . "with" + . (_) @local.reference) + + + +;; type refs + +;; variable declarations with types +(local_variable_declaration + (type_identifier) @local.reference) + +;; class field declarations with type +(field_declaration + (type_identifier) @local.reference) + +;; List<_, _> +(generic_type + (type_identifier) @local.reference) + +;; _ +;; type args in generics +(type_arguments + (type_identifier) @local.reference) + +;; wildcard type +;; +;; ? Type +(wildcard + (type_identifier) @local.reference) + +;; String[] +(array_type + (type_identifier) @local.reference) + +;; type refs in the pattern +(instanceof_expression + (type_identifier) @local.reference) + +;; for (Type _: _) { .. } +(enhanced_for_statement + type: (type_identifier) @local.reference) + +;; +(type_bound + (type_identifier) @local.reference) + +;; class _ extends B +(superclass + (type_identifier) @local.reference) + +;; class _ implements B +(super_interfaces + (type_list + (type_identifier) @local.reference)) + +;; interface _ extends I, J, K +(extends_interfaces + (type_list + (type_identifier) @local.reference)) + +;; sealed interface _ permits I, J, K +(permits + (type_list + (type_identifier) @local.reference)) + +;; parameter types +(formal_parameter + (type_identifier) @local.reference) + +;; type refs in method signatures +;; +;; return type +(method_declaration + type: (type_identifier) @local.reference) +;; throws type +(method_declaration + (throws + (type_identifier) @local.reference)) + +;; catch Type1 | Type2 exception +(catch_type + (type_identifier) @local.reference) + +;; A.B +(scoped_type_identifier + . + (type_identifier) @local.reference) + +;; try-resource type +(resource + type: (type_identifier) @local.reference) diff --git a/src/intelligence/language/javascript/mod.rs b/src/intelligence/language/javascript/mod.rs new file mode 100644 index 0000000..cea94c1 --- /dev/null +++ b/src/intelligence/language/javascript/mod.rs @@ -0,0 +1,416 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static JAVASCRIPT: TSLanguageConfig = TSLanguageConfig { + language_ids: &["JavaScript", "JSX"], + file_extensions: &["js", "jsx"], + grammar: tree_sitter_javascript::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + [(identifier) + (property_identifier) + (shorthand_property_identifier) + (shorthand_property_identifier_pattern) + (private_property_identifier) + (statement_identifier)] @hoverable + "#, + ), + namespaces: &[&[ + //variables + "constant", + "variable", + "property", + "function", + "method", + "generator", + // types + "class", + // misc. + "label", + ]], +}; + +#[cfg(test)] +mod test { + use crate::intelligence::language::test_utils::*; + + #[test] + fn declare_lexical() { + let src = r#" + const a = 2; + var b = 2; + let c = 2; + + // this is an "assignment", but introduces `d` + // if if does not exist, and hence counted as a decl. + d = a; + "#; + + let (_, def_count, _, _) = counts(src, "JavaScript"); + + // a, b, c, d + assert_eq!(def_count, 4); + } + + #[test] + fn declare_functions() { + let src = r#" + function one() {} + { + two() {}, + get three() {}, + set four() {} + }; + + function* five() {} + "#; + + let (_, def_count, _, _) = counts(src, "JavaScript"); + + assert_eq!(def_count, 5); + } + + #[test] + fn declare_destructuring() { + let src = r#" + var [a, b] = 5; + + function(c, ...d) {} + function(e, f = y) {} + + const g = (h) => {} + const i = (j, k) => {} + + // TODO: object patterns with shorthand patterns are + // not handled in every situation right now (only in const/var decls.) + // function({field: {l, m}}) {} + + function({...n}) {} + "#; + + let (_, def_count, _, _) = counts(src, "JavaScript"); + + assert_eq!(def_count, 12); + } + + #[test] + fn declare_class() { + let src = r#" + class One { + #two + static #three + } + "#; + + let (_, def_count, _, _) = counts(src, "JavaScript"); + + // class, prop, prop + assert_eq!(def_count, 3); + } + + #[test] + fn declare_imports() { + let src = r#" + import defaultOne from "module"; + import { two, three } from "module"; + import { four, member as five } from "module"; + "#; + + let (_, _, _, import_count) = counts(src, "JavaScript"); + + assert_eq!(import_count, 5); + } + + #[test] + fn declare_misc() { + let src = r#" + for (one in items) + thing(); + + for (var two = 0; a <= 0; a++) + thing(); + + three: + for (;;) + break three; + "#; + + let (_, def_count, _, _) = counts(src, "JavaScript"); + + assert_eq!(def_count, 3); + } + + #[test] + fn refer_primitive_expressions() { + let src = r#" + var a = 2; + + a; + { "field": a }; + [ a ]; + (a); + a.length(); + "#; + + let (_, _, ref_count, _) = counts(src, "JavaScript"); + + assert_eq!(ref_count, 5); + } + + #[test] + fn refer_statements() { + let src = r#" + var a = 2; + + return a; + yield a; + await a; + "#; + + let (_, _, ref_count, _) = counts(src, "JavaScript"); + + assert_eq!(ref_count, 3); + } + + #[test] + fn refer_operators() { + let src = r#" + var a = 2; + var b = 3; + var c = 4; + + // update expr + a++; + + // unary + -a; + + // binary + a + b; + + // ternary + c ? a : b; + + // spread + {a, b, ...c}; + + // index + a[b]; + + // member + // `b` is not a reference here + a.b + "#; + + let (_, _, ref_count, _) = counts(src, "JavaScript"); + + assert_eq!(ref_count, 13); + } + + #[test] + fn refer_exports() { + let src = r#" + var a = 2; + var b = 3; + var c = 4; + + export { a, b }; + + // `alias` is ignored + export { a as alias, b }; + export default c; + "#; + + let (_, _, ref_count, _) = counts(src, "JavaScript"); + + assert_eq!(ref_count, 5); + } + + #[test] + fn refer_misc() { + let src = r#" + function foo() {} + + var a = 2; + + for (item in a) // ref a + foo(a); // ref a, foo + + for (var b = 0; b <= 5; b++) // ref b, b + foo(a); // ref a, foo + + "#; + + let (_, _, ref_count, _) = counts(src, "JavaScript"); + + assert_eq!(ref_count, 7); + } + + #[test] + fn refer_embedded_jsx() { + let src = r#" + const a = 5; + b = {string(a)}b c; + "#; + + let (_, _, ref_count, _) = counts(src, "JavaScript"); + + assert_eq!(ref_count, 1); + } + + #[test] + fn refer_jsx_opening_element() { + let src = r#" + import Button from '../../Button'; + import ChevronRightIcon from '../../../icons/ChevronRightIcon'; + + const NavBarNoUser = () => { + return ( + + + + + ); + }; + export default NavBarNoUser; + "#; + + // Button x 4, + // ChevronRightIcon x 1, + // NavBarNoUser x 1 + let (_, _, ref_count, _) = counts(src, "JSX"); + + assert_eq!(ref_count, 6); + } + + // https://github.com/BloopAI/bloop/issues/213 + #[test] + fn function_params() { + test_scopes( + "JavaScript", + r#" + function main(a, b) { } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "function §main§(a, b) { }", + }, + ], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "function main(§a§, b) { }", + }, + b { + kind: "variable", + context: "function main(a, §b§) { }", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ); + } + + #[test] + fn new_expression_regression() { + test_scopes( + "JavaScript", + r#" + const { Client } = require("@elastic/elasticsearch"); + const elasticClient = new Client({node: ELASTIC_CONNECTION_STRING}); + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + elasticClient { + kind: "constant", + context: "const §elasticClient§ = new Client({node: ELASTIC_CONNECTION_STRING});", + }, + ], + imports: [ + Client { + context: "const { §Client§ } = require(\"@elastic/elasticsearch\");", + referenced in (1): [ + `const elasticClient = new §Client§({node: ELASTIC_CONNECTION_STRING});`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + } + "#]], + ) + } + + #[test] + fn catch_clause_regression() { + test_scopes( + "JavaScript", + r#" + try { + someFn(); + } catch (err) { + return err; + } finally { + return 0; + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [ + err { + kind: "variable", + context: "} catch (§err§) {", + referenced in (1): [ + `return §err§;`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + scope { + definitions: [], + child scopes: [], + }, + ], + } + "#]], + ) + } +} diff --git a/src/intelligence/language/javascript/scopes.scm b/src/intelligence/language/javascript/scopes.scm new file mode 100644 index 0000000..c1ffdd5 --- /dev/null +++ b/src/intelligence/language/javascript/scopes.scm @@ -0,0 +1,383 @@ +;; scopes + +[ + (statement_block) + (class_body) + (arrow_function) + (object) + ;; nameless functions create scopes, just like arrow functions + (function !name) + (function_declaration) + (method_definition) + (generator_function_declaration) + (for_statement) + (for_in_statement) + (switch_case) + (catch_clause) + ;; assignments are permitted inside sequence exprs: + ;; + ;; const a = 2; + ;; throw f = 1, f, a; + ;; + ;; should produce: + ;; + ;; { + ;; defs: [ a ], + ;; scopes [{ + ;; defs: [ f ], + ;; refs: [ f, a ] + ;; }], + ;; } + (sequence_expression) +] @local.scope + + + +;; defs + +;; tree-sitter-javascript has 5 "declaration" kinds: +;; +;; - class +;; - function +;; - generator function +;; - lexical +;; - variable + +;; function x() +(function_declaration + (identifier) @hoist.definition.function) + +(generator_function_declaration + (identifier) @hoist.definition.generator) + +;; function params +(formal_parameters + (identifier) @local.definition.variable) + +;; patterns + +;; f(a, ...b) +(rest_pattern + (identifier) @local.definition.variable) + +;; f(a, y = f) +;; +;; the lhs is a def, the rhs is a ref +(assignment_pattern + (identifier) @local.definition.variable + (identifier) @local.reference) + +;; {x: y} +(pair_pattern + (property_identifier) + (identifier) @local.definition.variable) + +;; var x = _ +;; var [x, y] = _ +;; var {x, y} = _ +(variable_declaration + (variable_declarator . (identifier) @local.definition.variable)) +(variable_declaration + (variable_declarator + name: (array_pattern + (identifier) @local.definition.variable))) +(variable_declaration + (variable_declarator + name: (object_pattern + (shorthand_property_identifier_pattern) @local.definition.variable))) + +;; const _ = require(_) should produce imports +( + (lexical_declaration + ["const" "let"] + (variable_declarator + name: (identifier) @local.import + value: (call_expression + function: (identifier) @_req_call))) + (#match? @_req_call "require") + ) + +;; const _ = anything_else should produce const defs +;; let _ = anything_else should produce var defs +( + (lexical_declaration + "const" + (variable_declarator + name: (identifier) @local.definition.constant + value: (_) @_rest)) + (#not-match? @_rest "require.*") + ) +( + (lexical_declaration + "let" + (variable_declarator + name: (identifier) @local.definition.variable + value: (_) @_rest)) + (#not-match? @_rest "require.*") + ) + +;; perform above dance for pattern matching in const/let patterns +;; - import when +;; * const/let with object pattern +;; * const/let with array pattern +;; - define a const when using +;; * const with object pattern +;; * const with array pattern +;; - define a variable when using +;; * let with object pattern +;; * let with array pattern + +;; case 1 (imports): +( + (lexical_declaration + ["const" "let"] + (variable_declarator + name: + (object_pattern + (shorthand_property_identifier_pattern) @local.import) + value: (call_expression + function: (identifier) @_req_call))) + (#match? @_req_call "require") +) +( + (lexical_declaration + ["const" "let"] + (variable_declarator + name: + (array_pattern + (identifier) @local.import) + value: (call_expression + function: (identifier) @_req_call))) + (#match? @_req_call "require") +) + +;; case 2: +( + (lexical_declaration + "const" + (variable_declarator + name: + (object_pattern + (shorthand_property_identifier_pattern) @local.definition.constant) + value: (_) @_rest)) + (#not-match? @_rest "require.*") +) +( + (lexical_declaration + "let" + (variable_declarator + name: + (object_pattern + (shorthand_property_identifier_pattern) @local.definition.variable) + value: (_) @_rest)) + (#not-match? @_rest "require.*") +) + +;; case 3: +( + (lexical_declaration + "const" + (variable_declarator + name: + (array_pattern + (identifier) @local.definition.constant) + value: (_) @_rest)) + (#not-match? @_rest "require.*") +) +( + (lexical_declaration + "let" + (variable_declarator + name: + (array_pattern + (identifier) @local.definition.variable) + value: (_) @_rest)) + (#not-match? @_rest "require.*") +) + + +;; a = b +(assignment_expression + left: (identifier) @local.definition.variable) + +;; method def +;; +;; TODO: support getters and setters here, blocked on: +;; https://github.com/tree-sitter/tree-sitter/issues/1461 +(method_definition + (property_identifier) @hoist.definition.method) + +;; class +(class_declaration + (identifier) @local.definition.class) + +;; class fields +(class_body + (field_definition + (private_property_identifier) @local.definition.property)) + +;; arrow func +(arrow_function + (identifier) @local.definition.variable) + +;; imports + +;; import defaultMember from "module"; +(import_statement + (import_clause (identifier) @local.import)) + +;; import { member } from "module"; +;; import { member as alias } from "module"; +(import_statement + (import_clause + (named_imports + [(import_specifier !alias (identifier) @local.import) + (import_specifier alias: (identifier) @local.import)]))) + +;; for (item in list) +;; +;; `item` is a def +(for_in_statement + left: (identifier) @local.definition.variable) + +;; labels +(labeled_statement + (statement_identifier) @local.definition.label) + +;; catch clauses +(catch_clause + (identifier) @local.definition.variable) + +;; refs + +;; someVar; +(expression_statement (identifier) @local.reference) + +;; { "a": value } +(object + (pair + (identifier) @local.reference)) + +;; y = {a, b} +(object + (shorthand_property_identifier) @local.reference) + + +;; [ a, b, c ] +(array + (identifier) @local.reference) + +;; new Object() +(new_expression + (identifier) @local.reference) + +;; return x; +(return_statement + (identifier) @local.reference) + +;; yield t; +(yield_expression + (identifier) @local.reference) + +;; call expression +(call_expression + (identifier) @local.reference) + +;; call arguments +(arguments + (identifier) @local.reference) + +;; index expression +(subscript_expression + (identifier) @local.reference) + +;; member expression +(member_expression + (identifier) @local.reference) + +;; await ident; +(await_expression + (identifier) @local.reference) + +;; a + b +(binary_expression + (identifier) @local.reference) + +;; -x +(unary_expression + (identifier) @local.reference) + +;; x++ +(update_expression + (identifier) @local.reference) + +;; a = b +;; `b` is a ref +(assignment_expression + right: (identifier) @local.reference) + +;; a += b +(augmented_assignment_expression + (identifier) @local.reference) + +;; (a) +(parenthesized_expression + (identifier) @local.reference) + +;; tuples +(sequence_expression + (identifier) @local.reference) + +;; c? a : b +(ternary_expression + (identifier) @local.reference) + +;; {...object} +(spread_element + (identifier) @local.reference) + +;; chass _ extends T +;; `T` is a ref +(class_heritage + (identifier) @local.reference) + +;; exports are refs +;; +;; export { name, name }; +;; export { name as alias }; +(export_statement + (export_clause + (export_specifier name: (identifier) @local.reference))) + +;; export default ident; +(export_statement + (identifier) @local.reference) + +;; for (item in list) +;; +;; `list` is a def +(for_in_statement + right: (identifier) @local.reference) + +;; break f; +(break_statement (statement_identifier) @local.reference) + +;; continue f; +(continue_statement (statement_identifier) @local.reference) + +;; jsx +(jsx_expression + (identifier) @local.reference) + +(jsx_opening_element + (identifier) @local.reference) + +(jsx_closing_element + (identifier) @local.reference) + +(jsx_self_closing_element + (identifier) @local.reference) + +;; template strings +(template_substitution + (identifier) @local.reference) diff --git a/src/intelligence/language/python/mod.rs b/src/intelligence/language/python/mod.rs new file mode 100644 index 0000000..48ab60c --- /dev/null +++ b/src/intelligence/language/python/mod.rs @@ -0,0 +1,448 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static PYTHON: TSLanguageConfig = TSLanguageConfig { + language_ids: &["Python"], + file_extensions: &["py"], + grammar: tree_sitter_python::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + (identifier) @hoverable + "#, + ), + namespaces: &[&["class", "function", "parameter", "variable"]], +}; + +#[cfg(test)] +mod tests { + use crate::intelligence::language::test_utils::*; + + // tests the following constructs: + // - function definitions + // - function parameters + // - default parameters + // - block scopes + // - assignments statements + // - augmented assignment statements + // - function calls + #[test] + fn basic() { + test_scopes( + "Python", + r#" + def increment(value, by=1): + value += by + + def main(): + a = 5 + b = 3 + + increment(a) + increment(a, by=b) + + main() + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + increment { + kind: "function", + context: "def §increment§(value, by=1):", + referenced in (2): [ + `§increment§(a)`, + `§increment§(a, by=b)`, + ], + }, + main { + kind: "function", + context: "def §main§():", + referenced in (1): [ + `§main§()`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + value { + kind: "parameter", + context: "def increment(§value§, by=1):", + referenced in (1): [ + `§value§ += by`, + ], + }, + by { + kind: "parameter", + context: "def increment(value, §by§=1):", + referenced in (1): [ + `value += §by§`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "§a§ = 5", + referenced in (2): [ + `increment(§a§)`, + `increment(§a§, by=b)`, + ], + }, + b { + kind: "variable", + context: "§b§ = 3", + referenced in (1): [ + `increment(a, by=§b§)`, + ], + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // tests the following constructs: + // - from imports + // - imports + // - list comprehensions + // - type annotations + #[test] + fn complex() { + test_scopes( + "Python", + r#" + from typings import List + import math + + def sines(items: List[int]) -> List[int]: + return [math.sin(item) for item in items] + + list = [1, 2, 3] + sines(list) + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + sines { + kind: "function", + context: "def §sines§(items: List[int]) -> List[int]:", + referenced in (1): [ + `§sines§(list)`, + ], + }, + list { + kind: "variable", + context: "§list§ = [1, 2, 3]", + referenced in (1): [ + `sines(§list§)`, + ], + }, + ], + imports: [ + List { + context: "from typings import §List§", + referenced in (2): [ + `def sines(items: §List§[int]) -> List[int]:`, + `def sines(items: List[int]) -> §List§[int]:`, + ], + }, + math { + context: "import §math§", + referenced in (1): [ + `return [§math§.sin(item) for item in items]`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + items { + kind: "parameter", + context: "def sines(§items§: List[int]) -> List[int]:", + referenced in (1): [ + `return [math.sin(item) for item in §items§]`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + item { + kind: "variable", + context: "return [math.sin(item) for §item§ in items]", + referenced in (1): [ + `return [math.sin(§item§) for item in items]`, + ], + }, + ], + child scopes: [], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + // tests class definitions + #[test] + fn classes() { + test_scopes( + "Python", + r#" + class Foo(): + def bar(self): + return self + + def main(): + a = Foo() + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + Foo { + kind: "class", + context: "class §Foo§():", + referenced in (1): [ + `a = §Foo§()`, + ], + }, + main { + kind: "function", + context: "def §main§():", + }, + ], + child scopes: [ + scope { + definitions: [ + bar { + kind: "function", + context: "def §bar§(self):", + }, + ], + child scopes: [ + scope { + definitions: [ + self { + kind: "parameter", + context: "def bar(§self§):", + referenced in (1): [ + `return §self§`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "§a§ = Foo()", + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // tests edge cases + #[test] + fn absurd() { + // circular assignment + test_scopes( + "Python", + " + some_list = some_list[0] = [0, 1] + " + .as_bytes(), + expect![[r#" + scope { + definitions: [ + some_list { + kind: "variable", + context: "§some_list§ = some_list[0] = [0, 1]", + referenced in (1): [ + `some_list = §some_list§[0] = [0, 1]`, + ], + }, + ], + child scopes: [], + } + "#]], + ); + + // circular func call + test_scopes( + "Python", + " + fix = lambda f: fix(f) + " + .as_bytes(), + expect![[r#" + scope { + definitions: [ + fix { + kind: "variable", + context: "§fix§ = lambda f: fix(f)", + referenced in (1): [ + `fix = lambda f: §fix§(f)`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + f { + kind: "parameter", + context: "fix = lambda §f§: fix(f)", + referenced in (1): [ + `fix = lambda f: fix(§f§)`, + ], + }, + ], + child scopes: [], + }, + ], + } + "#]], + ); + } + + #[test] + fn decorators() { + test_scopes( + "Python", + r#" + from module import decor + + @decor + def foo(): + pass + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + foo { + kind: "function", + context: "def §foo§():", + }, + ], + imports: [ + decor { + context: "from module import §decor§", + referenced in (1): [ + `@§decor§`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn types() { + test_scopes( + "Python", + r#" + MyType = List[int] + + def foo(t: MyType) -> MyType: + a: MyType = [1, 2, 3] + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + MyType { + kind: "variable", + context: "§MyType§ = List[int]", + referenced in (3): [ + `def foo(t: §MyType§) -> MyType:`, + `def foo(t: MyType) -> §MyType§:`, + `a: §MyType§ = [1, 2, 3]`, + ], + }, + foo { + kind: "function", + context: "def §foo§(t: MyType) -> MyType:", + }, + ], + child scopes: [ + scope { + definitions: [ + t { + kind: "parameter", + context: "def foo(§t§: MyType) -> MyType:", + }, + ], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "§a§: MyType = [1, 2, 3]", + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } +} diff --git a/src/intelligence/language/python/scopes.scm b/src/intelligence/language/python/scopes.scm new file mode 100644 index 0000000..433073a --- /dev/null +++ b/src/intelligence/language/python/scopes.scm @@ -0,0 +1,344 @@ +;; scopes +;; +[(block) + (lambda) + + ;; defs in comprehensions are limited to the + ;; comprehension itself + (list_comprehension) + (dictionary_comprehension) + (set_comprehension) + (generator_expression) + + ;; compound statements + (with_statement) + (for_statement) + + (function_definition) + ] @local.scope + + +;; defs + +;; all assignments are counted as defs +(assignment + left: (identifier) @local.definition.variable) + +;; assignment patterns +;; a, b = 1, 2 +(pattern_list + (identifier) @local.definition.variable) + +;; walrus +(named_expression + . + (identifier) @local.definition.variable) + +;; def a() +(function_definition + (identifier) @hoist.definition.function) + +;; def _(a, b, c): +(parameters + (identifier) @local.definition.parameter) + +;; def_(a: str) +(typed_parameter + (identifier) @local.definition.parameter) + +;; lambda a, b, c: +(lambda_parameters + (identifier) @local.definition.parameter) + +;; default params +;; +;; def foo(printer=val): +;; +;; `printer` is a def +;; `val` is ignored +(default_parameter + . + (identifier) @local.definition.parameter) + +;; patterns +(list_splat_pattern + (identifier) @local.definition.variable) +(dictionary_splat_pattern + (identifier) @local.definition.variable) +(tuple_pattern + (identifier) @local.definition.variable) + +;; with a as b: +;; +;; `b` is a def +(as_pattern + (as_pattern_target + (identifier) @local.definition.variable)) + +;; thing() for x in xs +;; +;; `x` is a def +(for_in_clause + . + "for" + . + (identifier) @local.definition.variable) + +;; for a in b: +;; +;; `a` is a def +(for_statement + . + "for" + . + (identifier) @local.definition.variable) + +;; imports: +;; +;; import a, b +;; import module.submodule.c +;; +;; here, `a`, `b`, `c` are imports, `module` and +;; `submodule` are ignored. so we capture the last +;; child of the `dotted_name` node using an anchor. +(import_statement + (dotted_name + (identifier) @local.import + .)) + +;; import a as b +;; +;; `a` is ignored +;; `b` is an import +(import_statement + (aliased_import + "as" + (identifier) @local.import)) + +;; from module import name1, name2 +(import_from_statement + name: + (dotted_name + (identifier) @local.import)) + +;; from __future__ import name +(future_import_statement + name: + (dotted_name + (identifier) @local.import)) + +;; class A +(class_definition + (identifier) @local.definition.class) + +;; global a, b +(global_statement + (identifier) @local.definition.variable) + + +;; refs + +;;[a, b, c] +(list + (identifier) @local.reference) + +;; f-strings +(interpolation + (identifier) @local.reference) + +;; [ *a ] +(list_splat + (identifier) @local.reference) + +;; {a: A} +;; a is ignored +;; A is a ref +(dictionary + (pair + (identifier) + (identifier) @local.reference)) + +;; **dict +(dictionary_splat + (identifier) @local.reference) + +;; {a, b, c} +(set + (identifier) @local.reference) + +;; a.b +;; `a` is a ref +;; `b` is ignored +(attribute + . + (identifier) @local.reference) + +;; if we have self.field(), we can resolve field +;; safely +(attribute + (identifier) @_self_ident + (identifier) @local.reference + (#eq? @_self_ident "self")) + +;; a[b] +(subscript + (identifier) @local.reference) + +;; a[i:j] +(slice + (identifier) @local.reference) + +;; a() +(call + (identifier) @local.reference) + +;; call arguments +(argument_list + (identifier) @local.reference) + +;; call(keyword=arg) +;; `keyword` is ignored +;; `arg` is a ref +(keyword_argument + (_) + (identifier) @local.reference) + +;; (a, b, c) +(tuple + (identifier) @local.reference) + +;; for t in item +;; +;; `item` is a reference +(for_in_clause + "in" + (identifier) @local.reference) + +;; for a in b: +;; +;; `b` is a ref +(for_statement + "in" + . + (identifier) @local.reference) + +;; with a as b: +;; +;; `a` is a ref +(as_pattern + (identifier) @local.reference) + +;; (a for a in ..) +(generator_expression + (identifier) @local.reference) + +;; await x +(await + (identifier) @local.reference) + +;; return x +(return_statement + (identifier) @local.reference) + +;; a + b +(binary_operator + (identifier) @local.reference) + +;; ~a +(unary_operator + (identifier) @local.reference) + +;; a and b +(boolean_operator + (identifier) @local.reference) + +;; not a +(not_operator + (identifier) @local.reference) + +;; a in b +;; a < b +(comparison_operator + (identifier) @local.reference) + +;; a += 1 +(augmented_assignment + (identifier) @local.reference) + +;; (a) +(parenthesized_expression + (identifier) @local.reference) + +;; a, b, c +(expression_list + (identifier) @local.reference) + +;; a; +(expression_statement + (identifier) @local.reference) + +;; z if x else y +(conditional_expression + (identifier) @local.reference) + +;; comprehensions +(list_comprehension + (identifier) @local.reference) +(dictionary_comprehension + (pair + (identifier) @local.reference)) +(set_comprehension + (identifier) @local.reference) + +;; decorators +(decorator + (identifier) @local.reference) + +;; type refs +;; +;; def foo(a: T) +(parameters + (typed_parameter + (type + (identifier) @local.reference))) + +;; def foo() -> T: +(function_definition + return_type: + (type + (identifier) @local.reference)) + +;; var: T = init() +(assignment + type: + (type + (identifier) @local.reference)) + +;; python 2 +;; +;; print item +(print_statement + (identifier) @local.reference) +;; print >> a +(chevron + (identifier) @local.reference) +;; assert a, b, c +(assert_statement + (identifier) @local.reference) +;; exec '1+1' +(exec_statement + (identifier) @local.reference) + +;; del a, b, c +(delete_statement + (identifier) @local.reference) + +(while_statement + (identifier) @local.reference) + +(if_statement + (identifier) @local.reference) + +;; raise error from e +(raise_statement + (identifier) @local.reference) diff --git a/src/intelligence/language/r/mod.rs b/src/intelligence/language/r/mod.rs new file mode 100644 index 0000000..3fcd817 --- /dev/null +++ b/src/intelligence/language/r/mod.rs @@ -0,0 +1,270 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static R: TSLanguageConfig = TSLanguageConfig { + language_ids: &["R"], + file_extensions: &["R"], + grammar: tree_sitter_r::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + (identifier) @hoverable + "#, + ), + namespaces: &[&[ + // variables + "variable", + ]], +}; + +#[cfg(test)] +mod tests { + use crate::intelligence::language::test_utils::*; + + // Self::method and self.method can be raised as references + #[test] + fn declarations() { + test_scopes( + "R", + r#" + x <- value + value -> y + x[0] <<- value + value ->> y[0] + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + x { + kind: "variable", + context: "§x§ <- value", + referenced in (1): [ + `§x§[0] <<- value`, + ], + }, + y { + kind: "variable", + context: "value -> §y§", + referenced in (1): [ + `value ->> §y§[0]`, + ], + }, + ], + child scopes: [], + } + "#]], + ) + } + + #[test] + fn control_if() { + test_scopes( + "R", + r#" + x <- TRUE + y <- value + if (x) + return(y) + else + return(y) + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + x { + kind: "variable", + context: "§x§ <- TRUE", + referenced in (1): [ + `if (§x§)`, + ], + }, + y { + kind: "variable", + context: "§y§ <- value", + referenced in (2): [ + `return(§y§)`, + `return(§y§)`, + ], + }, + ], + child scopes: [], + } + "#]], + ) + } + + #[test] + fn control_loop() { + test_scopes( + "R", + r#" + x <- TRUE + repeat x + + while (x) return + + y <- c(1, 2, 3) + for (item in y) { + y <- item + 1 + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + x { + kind: "variable", + context: "§x§ <- TRUE", + referenced in (2): [ + `repeat §x§`, + `while (§x§) return`, + ], + }, + y { + kind: "variable", + context: "§y§ <- c(1, 2, 3)", + referenced in (1): [ + `for (item in §y§) {`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + item { + kind: "variable", + context: "for (§item§ in y) {", + referenced in (1): [ + `y <- §item§ + 1`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + y { + kind: "variable", + context: "§y§ <- item + 1", + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn control_switch() { + test_scopes( + "R", + r#" + x <- "add" + + y <- 2 + z <- 1 + switch(x, "add" = y + z, "subtract" = y - z) + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + x { + kind: "variable", + context: "§x§ <- \"add\"", + referenced in (1): [ + `switch(§x§, "add" = y + z, "subtract" = y - z)`, + ], + }, + y { + kind: "variable", + context: "§y§ <- 2", + referenced in (2): [ + `switch(x, "add" = §y§ + z, "subtract" = y - z)`, + `switch(x, "add" = y + z, "subtract" = §y§ - z)`, + ], + }, + z { + kind: "variable", + context: "§z§ <- 1", + referenced in (2): [ + `switch(x, "add" = y + §z§, "subtract" = y - z)`, + `switch(x, "add" = y + z, "subtract" = y - §z§)`, + ], + }, + ], + child scopes: [], + } + "#]], + ) + } + + #[test] + fn indexing() { + test_scopes( + "R", + r#" + x <- c(1, 2, 3) + + idx <- 1 + + y <- x[i] + z <- x $ i + w <- x[[i]] + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + x { + kind: "variable", + context: "§x§ <- c(1, 2, 3)", + referenced in (3): [ + `y <- §x§[i]`, + `z <- §x§ $ i`, + `w <- §x§[[i]]`, + ], + }, + idx { + kind: "variable", + context: "§idx§ <- 1", + }, + y { + kind: "variable", + context: "§y§ <- x[i]", + }, + z { + kind: "variable", + context: "§z§ <- x $ i", + }, + w { + kind: "variable", + context: "§w§ <- x[[i]]", + }, + ], + child scopes: [], + } + "#]], + ) + } + + #[test] + fn value_of_function_definition() { + let src = r#"foo <- function (a, b, c) { // 0 + a <- a + 1 // 1 + b <- a + 1 // 2 + c <- a + 1 // 3 + } // 4"#; + + let sg = build_graph("R", src.as_bytes()); + let foo_function = sg.find_node_by_name(src.as_bytes(), b"foo").unwrap(); + let function_node = &sg.graph[sg.value_of_definition(foo_function).unwrap()]; + + assert_eq!(function_node.range().start.line, 0); + assert_eq!(function_node.range().end.line, 4); + } +} diff --git a/src/intelligence/language/r/scopes.scm b/src/intelligence/language/r/scopes.scm new file mode 100644 index 0000000..1080ec7 --- /dev/null +++ b/src/intelligence/language/r/scopes.scm @@ -0,0 +1,91 @@ +;; scopes +[(brace_list) + (function_definition) + (for)] @local.scope + +;; defs + +;; lhs of assignment +(left_assignment + . + (identifier) @local.definition.variable) +(right_assignment + (identifier) @local.definition.variable + .) +(super_assignment + . + (identifier) @local.definition.variable) +(super_right_assignment + (identifier) @local.definition.variable + .) + +(for + . + (identifier) @local.definition.variable) + +(formal_parameters + (identifier) @local.definition.variable) + +;; refs + +;; rhs of assignment +(left_assignment + (identifier) @local.reference + .) +(right_assignment + . + (identifier) @local.reference) +(super_assignment + (identifier) @local.reference + .) +(super_right_assignment + . + (identifier) @local.reference) + +(call + (identifier) @local.reference @_call_name + (#not-eq? @_call_name "c")) ;; used to refer to vector inits, probably noisy + +(namespace_get + . + (identifier) @local.reference) + +(binary + (identifier) @local.reference) + +(dollar + . + (identifier) @local.reference) + +(subset + (identifier) @local.reference) + +(subset2 + (identifier) @local.reference) + +;; TODO: this matches both a and b in foo(a = b) +;; the grammar does not create a new structure for named arguments +(arguments + (identifier) @local.reference) + +(if + (identifier) @local.reference) + +(repeat + (identifier) @local.reference) + +(while + (identifier) @local.reference) + +(for + "in" + (identifier) @local.reference) + +(switch + (identifier) @local.reference) + +(brace_list + (identifier) @local.reference) + +(function_definition + (identifier) @local.reference) diff --git a/src/intelligence/language/ruby/mod.rs b/src/intelligence/language/ruby/mod.rs new file mode 100644 index 0000000..6ede7d8 --- /dev/null +++ b/src/intelligence/language/ruby/mod.rs @@ -0,0 +1,370 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static RUBY: TSLanguageConfig = TSLanguageConfig { + language_ids: &["Ruby"], + file_extensions: &["rb"], + grammar: tree_sitter_ruby::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + [(identifier) + (class_variable) + (instance_variable) + (constant) + (global_variable) + (hash_key_symbol)] @hoverable + "#, + ), + namespaces: &[ + // everything is an object + &["variable", "constant", "class", "method", "module"], + ], +}; + +#[cfg(test)] +mod tests { + use crate::intelligence::language::test_utils::*; + + // tests the following constructs + // + // - variable assignment + // - if-then + #[test] + fn basic_decl() { + test_scopes( + "Ruby", + r#" + favoriteNumber = 5 + if favoriteNumber == 5 + puts "My favorite number is 5!" + end + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + favoriteNumber { + kind: "variable", + context: "§favoriteNumber§ = 5", + referenced in (1): [ + `if §favoriteNumber§ == 5`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + } + "#]], + ) + } + + // tests the following constructs: + // + // - const decl. + // - class decl. + // - instance variable decl. + // - method decl. + // - method param decl. + // - default method params + #[test] + fn const_and_class_decl() { + test_scopes( + "Ruby", + r#" + X, Y = 2, 3 + class Human + @age, @height = 0, 0 + def age(age=@age) + @age + end + end + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + X { + kind: "constant", + context: "§X§, Y = 2, 3", + }, + Y { + kind: "constant", + context: "X, §Y§ = 2, 3", + }, + Human { + kind: "class", + context: "class §Human§", + }, + ], + child scopes: [ + scope { + definitions: [ + @age { + kind: "variable", + context: "§@age§, @height = 0, 0", + referenced in (1): [ + `§@age§`, + ], + }, + @height { + kind: "variable", + context: "@age, §@height§ = 0, 0", + }, + age { + kind: "method", + context: "def §age§(age=@age)", + }, + ], + child scopes: [ + scope { + definitions: [ + age { + kind: "variable", + context: "def age(§age§=@age)", + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // tests the following constructs + // + // - lambda assignment + // - lambda param decl. + // - lambda body + // - method decl. + // - method param decl. + #[test] + fn methods_and_lambdas() { + test_scopes( + "Ruby", + r#" + l = -> (x) { x + 1 } + def update(l, v) + l.call v + end + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + l { + kind: "variable", + context: "§l§ = -> (x) { x + 1 }", + referenced in (1): [ + `§l§.call v`, + ], + }, + update { + kind: "method", + context: "def §update§(l, v)", + }, + ], + child scopes: [ + scope { + definitions: [ + x { + kind: "variable", + context: "l = -> (§x§) { x + 1 }", + referenced in (1): [ + `l = -> (x) { §x§ + 1 }`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + scope { + definitions: [ + l { + kind: "variable", + context: "def update(§l§, v)", + referenced in (1): [ + `§l§.call v`, + ], + }, + v { + kind: "variable", + context: "def update(l, §v§)", + referenced in (1): [ + `l.call §v§`, + ], + }, + ], + child scopes: [], + }, + ], + } + "#]], + ) + } + + // tests the following constructs: + // + // - until + // - case-when-else + // - interpolation + // - binary ops + // - operator-assignment + #[test] + fn control_flow() { + test_scopes( + "Ruby", + r#" + counter = 1 + until counter > 10 + case + when (counter % 3 == 0) && (counter % 5 == 0) + both_3_and_5 = true + puts " #{counter} is divisible by both 3 and 5!" + when counter % 3 == 0 + only_3 = true + puts " #{counter} is divisible by 3!" + when counter % 5 == 0 + only_5 = true + puts " #{counter} is divisible by 5!" + else + neither = true + puts " #{counter} is not divisible by 3 or 5!" + end + + counter +=1 + end + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + counter { + kind: "variable", + context: "§counter§ = 1", + referenced in (10): [ + `until §counter§ > 10`, + `when (§counter§ % 3 == 0) && (counter % 5 == 0)`, + `when (counter % 3 == 0) && (§counter§ % 5 == 0)`, + `puts " #{§counter§} is divisible by both 3 and 5!"`, + `when §counter§ % 3 == 0`, + `puts " #{§counter§} is divisible by 3!"`, + `when §counter§ % 5 == 0`, + `puts " #{§counter§} is divisible by 5!"`, + `puts " #{§counter§} is not divisible by 3 or 5!"`, + `§counter§ +=1`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + both_3_and_5 { + kind: "variable", + context: "§both_3_and_5§ = true", + }, + ], + child scopes: [], + }, + ], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + only_3 { + kind: "variable", + context: "§only_3§ = true", + }, + ], + child scopes: [], + }, + ], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + only_5 { + kind: "variable", + context: "§only_5§ = true", + }, + ], + child scopes: [], + }, + ], + }, + scope { + definitions: [ + neither { + kind: "variable", + context: "§neither§ = true", + }, + ], + child scopes: [], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + // tests global decls + #[test] + fn globals() { + test_scopes( + "Ruby", + r#" + def foo() + $var = 2 + end + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + foo { + kind: "method", + context: "def §foo§()", + }, + $var { + kind: "constant", + context: "§$var§ = 2", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + } + "#]], + ) + } +} diff --git a/src/intelligence/language/ruby/scopes.scm b/src/intelligence/language/ruby/scopes.scm new file mode 100644 index 0000000..725fefb --- /dev/null +++ b/src/intelligence/language/ruby/scopes.scm @@ -0,0 +1,311 @@ +;; scopes +[ + (block) + (do_block) + (rescue) + (when) + (unless) + (until) + (begin) + (case) + (case_match) + (in_clause) + (then) + (else) + (method) + (singleton_method) + (class) + (module) + (lambda) + ] @local.scope + +;; defs + +;; var = _ +(assignment + left: + [(identifier) + (class_variable) + (instance_variable)] @local.definition.variable) +;; Var = _ +(assignment + left: (constant) @local.definition.constant) +;; $var = _ +(assignment + left: (global_variable) @global.definition.constant) +;; x, y = +(left_assignment_list + [(identifier) + (class_variable) + (instance_variable)] @local.definition.variable) +;; x, y = +(left_assignment_list + (constant) @local.definition.constant) + +;; do block params +(block_parameters + (identifier) @local.definition.variable) + +;; lambda params +(lambda_parameters + (identifier) @local.definition.variable) + +;; Exception => variable +(exception_variable + (identifier) @local.definition.variable) + +;; method def +(method + (identifier) @hoist.definition.method) + +;; params +(method_parameters + (identifier) @local.definition.variable) + +;; def foo(&block) +(block_parameter + (identifier) @local.definition.variable) + +;; class def +(class + (constant) @hoist.definition.class) + +;; def foo(*list) +(splat_parameter + (identifier) @local.definition.variable) + +;; def foo(**hash) +(hash_splat_parameter + (identifier) @local.definition.variable) + +;; def foo(arg = 0) +(optional_parameter + (identifier) @local.definition.variable) + +;; module P +(module + (constant) @hoist.definition.module) + +;; alias new_method existing_methdo +(alias + name: (identifier) @local.definition.method) + +;; patterns +;; pat => bind +(as_pattern + name: (identifier) @local.definition.variable) +;; Integer, a, String +(array_pattern + (identifier) @local.definition.variable) +;; {user: u} +(hash_pattern + (keyword_pattern + value: (identifier) @local.definition.variable)) +;; user: (only key) +(hash_pattern + (keyword_pattern + key: (hash_key_symbol) @local.definition.variable + !value)) +;; a | b +(alternative_pattern + (identifier) @local.definition.variable) +;; a | b +(variable_reference_pattern + (identifier) @local.definition.variable) + + +;; refs + +;; a and b +(binary + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; a ? b : c +(conditional + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; a += b +;; +;; b is a ref +(operator_assignment + (identifier) @local.reference) + +;; a..b +(range + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; ++a +(unary + (identifier) @local.reference) + +;; [a, b, c] +(array + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; {key: v} +(pair + [(identifier) + (constant) + (instance_variable)] @local.reference) + + +;; a = b +;; +;; b is a ref +(assignment + right: + [(identifier) + (constant) + (instance_variable)] @local.reference) +(right_assignment_list + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; a.prop +;; a.method() +(call + receiver: + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; foo() +(call + method: (identifier) @local.reference + !receiver) + +;; method(a, b, c) +(argument_list + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; *arg +(splat_argument + (identifier) @local.reference) + +;; "#{var}" +(interpolation + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; arr[0] +(element_reference + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; if _ .. elif _ .. else .. end +(if + [(identifier) + (constant) + (instance_variable)] @local.reference) +(then + [(identifier) + (constant) + (instance_variable)] @local.reference) +(else + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; expr if condition +(if_modifier + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; while _ do _ end +(while + [(identifier) + (constant) + (instance_variable)] @local.reference) +(do + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; expr while condition +(while_modifier + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; case a when b end +(case + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; begin .. rescue .. else .. ensure +(begin + [(identifier) + (constant) + (instance_variable)] @local.reference) +(ensure + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; unless .. end +(unless + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; b unless a +(unless_modifier + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; until .. end +(until + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; b until a +(until_modifier + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; (a) +(parenthesized_statements + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; statements +(body_statement + [(identifier) + (constant) + (instance_variable)] @local.reference) + +(block_body + [(identifier) + (constant) + (instance_variable)] @local.reference) + +;; class _ < A +(superclass + (constant) @local.reference) + +;; alias new_method existing_methdo +(alias + alias: (identifier) @local.reference) + +;; A::B +(scope_resolution + scope: (constant) @local.reference) diff --git a/src/intelligence/language/rust/mod.rs b/src/intelligence/language/rust/mod.rs new file mode 100644 index 0000000..e8d30b2 --- /dev/null +++ b/src/intelligence/language/rust/mod.rs @@ -0,0 +1,1003 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static RUST: TSLanguageConfig = TSLanguageConfig { + language_ids: &["Rust"], + file_extensions: &["rs"], + grammar: tree_sitter_rust::language, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + [(identifier) + (shorthand_field_identifier) + (field_identifier) + (type_identifier)] @hoverable + "#, + ), + namespaces: &[&[ + // variables + "const", + "function", + "variable", + // types + "struct", + "enum", + "union", + "typedef", + "interface", + // fields + "field", + "enumerator", + // namespacing + "module", + // misc + "label", + "lifetime", + ]], +}; + +#[cfg(test)] +mod tests { + use crate::intelligence::language::test_utils::*; + + #[test] + fn declare_const_and_static() { + let src = r#" + const a: () = (); + static b: () = (); + "#; + + let (_, def_count, _, _) = counts(src, "Rust"); + + // a, b + assert_eq!(def_count, 2); + } + + #[test] + fn declare_let_statement() { + let src = r#" + fn main() { + let a = (); + let (b, c) = (); + let S { d, e } = (); + let S { field: f, g } = (); + let S { h, .. } = (); + let S { i, field: _ } = (); + } + "#; + let (_, def_count, _, _) = counts(src, "Rust"); + + // main, a, b, c, d, e, f, g, h, i + assert_eq!(def_count, 10); + } + + #[test] + fn declare_function_params() { + let src = r#" + fn f1(a: T) {} + fn f2(b: T, c: T) {} + fn f3((d, e): (T, U)) {} + fn f4(S {f, g}: S) {} + fn f5(S {h, ..}: S) {} + fn f6(S { field: i }: S) {} + "#; + let (_, def_count, _, _) = counts(src, "Rust"); + + // f1, f2, f3, f4, f5, f6, a, b, c, d, e, f, g, h, i + assert_eq!(def_count, 15); + } + + #[test] + fn declare_closure_params() { + let src = r#" + fn main() { + let _ = |x| {}; + let _ = |x, y| {}; + let _ = |x: ()| {}; + let _ = |(x, y): ()| {}; + } + "#; + let (_, def_count, _, _) = counts(src, "Rust"); + + // main, + // x, + // x, y, + // x, + // x, y + assert_eq!(def_count, 7); + } + + #[test] + fn declare_labels() { + let src = r#" + fn main() { + 'loop: loop {}; + 'loop: for _ in () {} + 'loop: while true {} + } + "#; + let (_, def_count, _, _) = counts(src, "Rust"); + + // main, 'loop x3 + assert_eq!(def_count, 4); + } + + #[test] + fn declare_types() { + let src = r#" + struct One { + two: T, + three: T, + } + + enum Four { + Five, + Six(T), + Seven { + eight: T + } + } + + union Nine {} + + type Ten = (); + "#; + let (_, def_count, _, _) = counts(src, "Rust"); + + assert_eq!(def_count, 10); + } + + #[test] + fn declare_namespaces() { + let src = r#" + mod one {} + pub mod two {} + mod three { + mod four {} + } + "#; + let (_, def_count, _, _) = counts(src, "Rust"); + + assert_eq!(def_count, 4); + } + + #[test] + fn declare_let_expr() { + let src = r#" + if let a = () {} + if let Some(a) = () {} + + while let a = () {} + while let Some(a) = () {} + "#; + let (_, def_count, _, _) = counts(src, "Rust"); + + assert_eq!(def_count, 4); + } + + #[test] + fn refer_unary_expr() { + let src = r#" + fn main() { + let a = 2; + !a; + -a; + *a; + } + "#; + let (_, _, ref_count, _) = counts(src, "Rust"); + + assert_eq!(ref_count, 3); + } + + #[test] + fn refer_binary_expr() { + let src = r#" + fn main() { + let a = 2; + let b = 3; + a + b; + a >> b; + } + "#; + let (_, _, ref_count, _) = counts(src, "Rust"); + + assert_eq!(ref_count, 4); + } + + #[test] + fn refer_control_flow() { + let src = r#" + fn main() { + let a = 2; + + // 1 + if a {} + + // 2 + if _ {} else if a {} + + // 3 + while a { + // 4 + break a; + } + + // 5 + a?; + + // 6 + return a; + + // 7 + a.await; + + // 8 + yield a; + } + "#; + let (_, _, ref_count, _) = counts(src, "Rust"); + + assert_eq!(ref_count, 8); + } + + #[test] + fn refer_assignment() { + let src = r#" + fn main() { + let mut a = 2; + a += 2; + a = 2; + a *= 2; + } + "#; + let (_, _, ref_count, _) = counts(src, "Rust"); + + assert_eq!(ref_count, 3); + } + + #[test] + fn refer_struct_expr() { + let src = r#" + fn main() { + let a = 2; + let b = 2; + S { a, b }; + S { ..a }; + S { field: a, b }; + } + "#; + let (_, _, ref_count, _) = counts(src, "Rust"); + + assert_eq!(ref_count, 5); + } + + #[test] + fn refer_dot() { + let src = r#" + fn main() { + let a = S {}; + + a.b; + a.foo(); + } + "#; + let (_, _, ref_count, _) = counts(src, "Rust"); + + assert_eq!(ref_count, 2); + } + + #[test] + fn refer_arguments() { + let src = r#" + fn main() { + let a = 2; + let b = 3; + foo(a, b); + } + "#; + let (_, _, ref_count, _) = counts(src, "Rust"); + + assert_eq!(ref_count, 2); + } + + #[test] + fn symbols() { + let src = r#" + fn one() { + let two = 1; + let (three, four) = (2, 3); + let T { field: five} = t; + let _ = |six| {}; + const seven: () = (); + static eight: () = (); + } + + struct Nine { + ten: (), + } + + union Eleven {} + enum Twelve { + Thirteen, + Fourteen(T) + } + "#; + assert_eq_defs( + src.as_bytes(), + "Rust", + vec![ + ("one", "function"), + ("two", "variable"), + ("three", "variable"), + ("four", "variable"), + ("five", "variable"), + ("six", "variable"), + ("seven", "const"), + ("eight", "const"), + ("Nine", "struct"), + ("ten", "field"), + ("Eleven", "union"), + ("Twelve", "enum"), + ("Thirteen", "enumerator"), + ("Fourteen", "enumerator"), + ], + ); + } + + #[test] + fn function_params() { + test_scopes( + "Rust", + r#" + fn foo(t: T, u: U) -> R {} + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + foo { + kind: "function", + context: "fn §foo§(t: T, u: U) -> R {}", + }, + ], + child scopes: [ + scope { + definitions: [ + t { + kind: "variable", + context: "fn foo(§t§: T, u: U) -> R {}", + }, + u { + kind: "variable", + context: "fn foo(t: T, §u§: U) -> R {}", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ); + } + + #[test] + fn use_statements() { + test_scopes( + "Rust", + r#" + mod intelligence; + + use bleep; + use super::test_utils; + use intelligence::language as lang; + use crate::text_range::{TextRange, Point}; + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + intelligence { + kind: "module", + context: "mod §intelligence§;", + referenced in (1): [ + `use §intelligence§::language as lang;`, + ], + }, + ], + imports: [ + bleep { + context: "use §bleep§;", + }, + test_utils { + context: "use super::§test_utils§;", + }, + lang { + context: "use intelligence::language as §lang§;", + }, + TextRange { + context: "use crate::text_range::{§TextRange§, Point};", + }, + Point { + context: "use crate::text_range::{TextRange, §Point§};", + }, + ], + child scopes: [], + } + "#]], + ) + } + + #[test] + fn lifetimes() { + test_scopes( + "Rust", + r#" + impl<'a, T> Trait for Struct<'a, T> { + fn foo<'b>(&'a self) -> &'b T { } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + 'a { + kind: "lifetime", + context: "impl<§'a§, T> Trait for Struct<'a, T> {", + referenced in (2): [ + `impl<'a, T> Trait for Struct<§'a§, T> {`, + `fn foo<'b>(&§'a§ self) -> &'b T { }`, + ], + }, + T { + kind: "typedef", + context: "impl<'a, §T§> Trait for Struct<'a, T> {", + referenced in (2): [ + `impl<'a, T> Trait for Struct<'a, §T§> {`, + `fn foo<'b>(&'a self) -> &'b §T§ { }`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + foo { + kind: "function", + context: "fn §foo§<'b>(&'a self) -> &'b T { }", + }, + ], + child scopes: [ + scope { + definitions: [ + 'b { + kind: "lifetime", + context: "fn foo<§'b§>(&'a self) -> &'b T { }", + referenced in (1): [ + `fn foo<'b>(&'a self) -> &§'b§ T { }`, + ], + }, + self { + kind: "variable", + context: "fn foo<'b>(&'a §self§) -> &'b T { }", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn generics_and_traits() { + test_scopes( + "Rust", + r#" + trait Foo {} + + fn foo<'a, 'b, T, U: Foo + 'a>(t: T, u: U) + where T: Foo + 'b, + { } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + Foo { + kind: "interface", + context: "trait §Foo§ {}", + referenced in (2): [ + `fn foo<'a, 'b, T, U: §Foo§ + 'a>(t: T, u: U)`, + `where T: §Foo§ + 'b,`, + ], + }, + foo { + kind: "function", + context: "fn §foo§<'a, 'b, T, U: Foo + 'a>(t: T, u: U)", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + scope { + definitions: [ + 'a { + kind: "lifetime", + context: "fn foo<§'a§, 'b, T, U: Foo + 'a>(t: T, u: U)", + referenced in (1): [ + `fn foo<'a, 'b, T, U: Foo + §'a§>(t: T, u: U)`, + ], + }, + 'b { + kind: "lifetime", + context: "fn foo<'a, §'b§, T, U: Foo + 'a>(t: T, u: U)", + referenced in (1): [ + `where T: Foo + §'b§,`, + ], + }, + T { + kind: "typedef", + context: "fn foo<'a, 'b, §T§, U: Foo + 'a>(t: T, u: U)", + referenced in (3): [ + `fn foo<'a, 'b, T, U: Foo<§T§> + 'a>(t: T, u: U)`, + `fn foo<'a, 'b, T, U: Foo + 'a>(t: §T§, u: U)`, + `where §T§: Foo + 'b,`, + ], + }, + U { + kind: "typedef", + context: "fn foo<'a, 'b, T, §U§: Foo + 'a>(t: T, u: U)", + referenced in (1): [ + `fn foo<'a, 'b, T, U: Foo + 'a>(t: T, u: §U§)`, + ], + }, + t { + kind: "variable", + context: "fn foo<'a, 'b, T, U: Foo + 'a>(§t§: T, u: U)", + }, + u { + kind: "variable", + context: "fn foo<'a, 'b, T, U: Foo + 'a>(t: T, §u§: U)", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn type_constructors() { + test_scopes( + "Rust", + r#" + struct Highlight {} + + enum Direction { Incoming, Outgoing } + + fn foo() -> Highlight { + Highlight { } + } + + fn bar() -> Direction { + Direction::Incoming + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + Highlight { + kind: "struct", + context: "struct §Highlight§ {}", + referenced in (2): [ + `fn foo() -> §Highlight§ {`, + `§Highlight§ { }`, + ], + }, + Direction { + kind: "enum", + context: "enum §Direction§ { Incoming, Outgoing }", + referenced in (2): [ + `fn bar() -> §Direction§ {`, + `§Direction§::Incoming`, + ], + }, + foo { + kind: "function", + context: "fn §foo§() -> Highlight {", + }, + bar { + kind: "function", + context: "fn §bar§() -> Direction {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [ + Incoming { + kind: "enumerator", + context: "enum Direction { §Incoming§, Outgoing }", + }, + Outgoing { + kind: "enumerator", + context: "enum Direction { Incoming, §Outgoing§ }", + }, + ], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn macros() { + test_scopes( + "Rust", + r#" + fn main() { + let (a, b, c) = (); + // top-level tokens + assert_eq!(a, b + c); + + // nested tokens + println!("{}", if a { b } then { c }); + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "fn §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "let (§a§, b, c) = ();", + referenced in (2): [ + `assert_eq!(§a§, b + c);`, + `println!("{}", if §a§ { b } then { c });`, + ], + }, + b { + kind: "variable", + context: "let (a, §b§, c) = ();", + referenced in (2): [ + `assert_eq!(a, §b§ + c);`, + `println!("{}", if a { §b§ } then { c });`, + ], + }, + c { + kind: "variable", + context: "let (a, b, §c§) = ();", + referenced in (2): [ + `assert_eq!(a, b + §c§);`, + `println!("{}", if a { b } then { §c§ });`, + ], + }, + ], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ) + } + + // Self::method and self.method can be raised as references + #[test] + fn handle_self_type_and_var() { + test_scopes( + "Rust", + r#" + struct MyStruct {} + + impl MyStruct { + fn foo() { + Self::foo() + } + + fn bar(&self) { + self.bar() + } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + MyStruct { + kind: "struct", + context: "struct §MyStruct§ {}", + referenced in (1): [ + `impl §MyStruct§ {`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + foo { + kind: "function", + context: "fn §foo§() {", + referenced in (1): [ + `Self::§foo§()`, + ], + }, + bar { + kind: "function", + context: "fn §bar§(&self) {", + referenced in (1): [ + `self.§bar§()`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + scope { + definitions: [ + self { + kind: "variable", + context: "fn bar(&§self§) {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn let_else_1_65_support() { + test_scopes( + "Rust", + r#" + fn main() { + let a = 3; + if let b = a + && let c = b + && let d = c { + d + } else { + return; + } + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + main { + kind: "function", + context: "fn §main§() {", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + a { + kind: "variable", + context: "let §a§ = 3;", + referenced in (1): [ + `if let b = §a§`, + ], + }, + ], + child scopes: [ + scope { + definitions: [ + b { + kind: "variable", + context: "if let §b§ = a", + referenced in (1): [ + `&& let c = §b§`, + ], + }, + c { + kind: "variable", + context: "&& let §c§ = b", + referenced in (1): [ + `&& let d = §c§ {`, + ], + }, + d { + kind: "variable", + context: "&& let §d§ = c {", + referenced in (1): [ + `§d§`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn value_of_function_definition() { + let src = r#"fn main() { // 0 + let a = 2; // 1 + let b = 2; // 2 + let c = 2; // 3 + } // 4"#; + + let sg = build_graph("Rust", src.as_bytes()); + let main_function = sg.find_node_by_name(src.as_bytes(), b"main").unwrap(); + let function_node = &sg.graph[sg.value_of_definition(main_function).unwrap()]; + + assert_eq!(function_node.range().start.line, 0); + assert_eq!(function_node.range().end.line, 4); + } + + #[test] + fn value_of_function_with_generics() { + let src = r#"fn main(p: P, q: Q) { // 0 + let a = 2; // 1 + let b = 2; // 2 + let c = 2; // 3 + let d = 2; // 4 + } // 5"#; + + let sg = build_graph("Rust", src.as_bytes()); + let main_function = sg.find_node_by_name(src.as_bytes(), b"main").unwrap(); + let function_node = &sg.graph[sg.value_of_definition(main_function).unwrap()]; + + assert_eq!(function_node.range().start.line, 0); + assert_eq!(function_node.range().end.line, 5); + } + + #[test] + fn value_of_struct_definition() { + let src = r#"struct P { // 0 + s: Y, // 1 + c: H, // 2 + } // 3"#; + + let sg = build_graph("Rust", src.as_bytes()); + let struct_p = sg.find_node_by_name(src.as_bytes(), b"P").unwrap(); + let struct_node = &sg.graph[sg.value_of_definition(struct_p).unwrap()]; + + assert_eq!(struct_node.range().start.line, 0); + assert_eq!(struct_node.range().end.line, 3); + } + + #[test] + fn value_of_let_definition() { + let src = r#"fn main() { + let a = 2; + let b = 2; + }"#; + + let sg = build_graph("Rust", src.as_bytes()); + let let_def_a = sg.find_node_by_name(src.as_bytes(), b"a").unwrap(); + + // no range produced for variable definitions + assert!(sg.value_of_definition(let_def_a).is_none()); + } + + #[test] + fn value_of_let_def_closures() { + let src = r#"fn main() { // 0 + let a = |x| { // 1 + foo_bar(); // 2 + }; // 3 + } // 4"#; + + let sg = build_graph("Rust", src.as_bytes()); + let let_def_a = sg.find_node_by_name(src.as_bytes(), b"a").unwrap(); + let let_def_node = &sg.graph[sg.value_of_definition(let_def_a).unwrap()]; + + assert_eq!(let_def_node.range().start.line, 1); + assert_eq!(let_def_node.range().end.line, 3); + } +} diff --git a/src/intelligence/language/rust/scopes.scm b/src/intelligence/language/rust/scopes.scm new file mode 100644 index 0000000..8a5c385 --- /dev/null +++ b/src/intelligence/language/rust/scopes.scm @@ -0,0 +1,463 @@ +;; see tree-sitter-rust/src/grammar.json for an exhaustive list of productions + +;; scopes +(block) @local.scope ; { ... } +(function_item) @local.scope +(declaration_list) @local.scope ; mod { ... } + +;; impl items can define types and lifetimes: +;; +;; impl<'a, T> Trait for Struct { .. } +;; +;; in order to constrain those to the impl block, +;; we add a local scope here: +(impl_item) @local.scope +(struct_item) @local.scope +(enum_item) @local.scope +(union_item) @local.scope +(type_item) @local.scope +(trait_item) @local.scope + +;; let expressions create scopes +(if_expression + [(let_condition) + (let_chain)]) @local.scope + +;; each match arm can bind variables with +;; patterns, without creating a block scope; +;; +;; match _ { +;; (a, b) => a, +;; } +;; +;; The bindings for a, b are constrained to +;; the match arm. +(match_arm) @local.scope + +;; loop labels are defs that are available only +;; within the scope they create: +;; +;; 'outer: loop { +;; let x = 2; +;; }; +;; let y = 2; +;; +;; Produces a scope graph like so: +;; +;; { +;; defs: [ y ], +;; scopes: [ +;; { +;; defs: [ 'outer ], +;; scopes: [ +;; { +;; defs: [ x ] +;; } +;; ] +;; } +;; ] +;; } +;; +(loop_expression) @local.scope +(for_expression) @local.scope +(while_expression) @local.scope + + +;; defs + +;; let x = ...; +(let_declaration + pattern: (identifier) @local.definition.variable) + +;; if let x = ...; +;; while let x = ...; +(let_condition + . + (identifier) @local.definition.variable) + +;; let (a, b, ...) = ..; +;; if let (a, b, ...) = {} +;; while let (a, b, ...) = {} +;; match _ { (a, b) => { .. } } +(tuple_pattern (identifier) @local.definition.variable) + +;; Some(a) +(tuple_struct_pattern + type: (_) + (identifier) @local.definition.variable) + +;; let S { field: a } = ..; +(struct_pattern + (field_pattern + (identifier) @local.definition.variable)) + +;; let S { a, b } = ..; +(struct_pattern + (field_pattern + (shorthand_field_identifier) @local.definition.variable)) + +;; (mut x: T) +(mut_pattern (identifier) @local.definition.variable) + +;; (ref x: T) +(ref_pattern (identifier) @local.definition.variable) + +;; const x = ...; +(const_item (identifier) @local.definition.const) + +;; static x = ...; +(static_item (identifier) @local.definition.const) + +;; fn _(x: _) +(parameters + (parameter + pattern: (identifier) @local.definition.variable)) +;; fn _(self) +(parameters + (self_parameter + (self) @local.definition.variable)) + +;; type parameters +(type_parameters + (type_identifier) @local.definition.typedef) +(type_parameters + (lifetime) @local.definition.lifetime) +(constrained_type_parameter + left: (type_identifier) @local.definition.typedef) + +;; |x| { ... } +;; no type +(closure_parameters (identifier) @local.definition.variable) + +;; |x: T| { ... } +;; with type +(closure_parameters + (parameter + (identifier) @local.definition.variable)) + +;;fn x(..) +(function_item (identifier) @hoist.definition.function) + +;; 'outer: loop { .. } +(loop_expression + (loop_label) @local.definition.label) + +;; `for` exprs create two defs: a label (if any) and the +;; loop variable +(for_expression . (identifier) @local.definition.variable) +(for_expression (loop_label) @local.definition.label) + +;; 'label: while cond { .. } +(while_expression + (loop_label) @local.definition.label) + +;; type definitions +(struct_item (type_identifier) @hoist.definition.struct) +(enum_item (type_identifier) @hoist.definition.enum) +(union_item (type_identifier) @hoist.definition.union) +(type_item . (type_identifier) @hoist.definition.typedef) +(trait_item (type_identifier) @hoist.definition.interface) + +;; struct and union fields +(field_declaration_list + (field_declaration + (field_identifier) @local.definition.field)) + +;; enum variants +(enum_variant_list + (enum_variant + (identifier) @local.definition.enumerator)) + +;; mod x; +(mod_item (identifier) @local.definition.module) + +;; use statements + +;; use item; +(use_declaration + (identifier) @local.import) + +;; use path as item; +(use_as_clause + alias: (identifier) @local.import) + +;; use path::item; +(use_declaration + (scoped_identifier + name: (identifier) @local.import)) + +;; use module::{member1, member2, member3}; +(use_list + (identifier) @local.import) +(use_list + (scoped_identifier + name: (identifier) @local.import)) + + +;; refs + +;; !x +(unary_expression (identifier) @local.reference) + +;; &x +(reference_expression (identifier) @local.reference) + +;; (x) +(parenthesized_expression (identifier) @local.reference) + +;; x? +(try_expression (identifier) @local.reference) + +;; a = b +(assignment_expression (identifier) @local.reference) + +;; a op b +(binary_expression (identifier) @local.reference) + +;; a op= b +(compound_assignment_expr (identifier) @local.reference) + +;; a as b +(type_cast_expression (identifier) @local.reference) + +;; a() +(call_expression (identifier) @local.reference) + +;; Self::foo() +;; +;; `foo` can be resolved +(call_expression + (scoped_identifier + (identifier) @_self_type + (identifier) @local.reference) + (#match? @_self_type "Self")) + +;; self.foo() +;; +;; `foo` can be resolved +(call_expression + (field_expression + (self) + (field_identifier) @local.reference)) + +;; return a +(return_expression (identifier) @local.reference) + +;; break a +(break_expression (identifier) @local.reference) + +;; break 'label +(break_expression (loop_label) @local.reference) + +;; continue 'label; +(continue_expression (loop_label) @local.reference) + +;; yield x; +(yield_expression (identifier) @local.reference) + +;; await a +(await_expression (identifier) @local.reference) + +;; (a, b) +(tuple_expression (identifier) @local.reference) + +;; a[] +(index_expression (identifier) @local.reference) + +;; ident; +(expression_statement (identifier) @local.reference) + +;; a..b +(range_expression (identifier) @local.reference) + +;; [ident; N] +(array_expression (identifier) @local.reference) + +;; path::to::item +;; +;; `path` is a ref +(scoped_identifier + path: (identifier) @local.reference) + +;; rhs of let decls +(let_declaration + value: (identifier) @local.reference) + +;; type T = [T; N] +;; +;; N is a ident ref +(array_type + length: (identifier) @local.reference) + +;; S { _ } +(struct_expression + (type_identifier) @local.reference) + +;; S { a } +(struct_expression + (field_initializer_list + (shorthand_field_initializer + (identifier) @local.reference))) + +;; S { a: value } +(struct_expression + (field_initializer_list + (field_initializer + (identifier) @local.reference))) + +;; S { ..a } +(struct_expression + (field_initializer_list + (base_field_initializer + (identifier) @local.reference))) + +;; if a {} +(if_expression (identifier) @local.reference) + +;; for pattern in value {} +;; +;; `value` is a ref +(for_expression + value: (identifier) @local.reference) + +;; while a {} +(while_expression (identifier) @local.reference) + +;; if let _ = a {} +;; +;; the ident following the `=` is a ref +;; the ident preceding the `=` is a def +;; while let _ = a {} +(let_condition + "=" + (identifier) @local.reference) + + +;; match a +(match_expression (identifier) @local.reference) + +;; match _ { +;; pattern => a, +;; } +;; +;; this `a` is somehow not any expression form +(match_arm (identifier) @local.reference) + +;; a.b +;; +;; `b` is ignored +(field_expression + (identifier) @local.reference) + +;; { stmt; foo } +(block + (identifier) @local.reference) + +;; arguments to method calls or function calls +(arguments + (identifier) @local.reference) + +;; impl S { .. } +(impl_item (type_identifier) @local.reference) + +;; where T: ... +(where_predicate + left: (type_identifier) @local.reference) + +;; trait bounds +(trait_bounds + (type_identifier) @local.reference) +(trait_bounds + (lifetime) @local.reference) + +;; idents in macros +(token_tree + (identifier) @local.reference) + +;; types + +;; (T, U) +(tuple_type + (type_identifier) @local.reference) + +;; &T +(reference_type + (type_identifier) @local.reference) + +;; &'a T +(reference_type + (lifetime) @local.reference) + +;; &'a self +(self_parameter + (lifetime) @local.reference) + +;; *mut T +;; *const T +(pointer_type + (type_identifier) @local.reference) + +;; A<_> +(generic_type + (type_identifier) @local.reference) + +;; _ +(type_arguments + (type_identifier) @local.reference) +(type_arguments + (lifetime) @local.reference) + +;; T +;; +;; U is ignored +;; V is a ref +(type_binding + name: (_) + type: (type_identifier) @local.reference) + +;; [T] +(array_type + (type_identifier) @local.reference) + +;; type T = U; +;; +;; T is a def +;; U is a ref +(type_item + name: (_) + type: (type_identifier) @local.reference) + +(function_item + return_type: (type_identifier) @local.reference) + +;; type refs in params +;; +;; fn _(_: T) +(parameters + (parameter + type: (type_identifier) @local.reference)) + +;; dyn T +(dynamic_type + (type_identifier) @local.reference) + +;; ::call() +(bracketed_type + (type_identifier) @local.reference) + +;; T as Trait +(qualified_type + (type_identifier) @local.reference) + +;; module::T +;; +;; `module` is a def +;; `T` is a ref +(scoped_type_identifier + path: (identifier) @local.reference) + +;; struct _ { field: Type } +;; `Type` is a ref + (field_declaration + name: (_) + type: (type_identifier) @local.reference) diff --git a/src/intelligence/language/test_utils.rs b/src/intelligence/language/test_utils.rs new file mode 100644 index 0000000..e6a3c46 --- /dev/null +++ b/src/intelligence/language/test_utils.rs @@ -0,0 +1,62 @@ +pub use expect_test::expect; + +use std::collections::HashSet; + +use crate::intelligence::{scope_resolution::NodeKind, Language, TreeSitterFile}; + +use expect_test::Expect; + +#[rustfmt::skip] +pub fn counts(src: &str, lang_id: &str) -> (usize, usize, usize, usize) { + let tsf = TreeSitterFile::try_build(src.as_bytes(), lang_id).unwrap(); + let scope_graph = tsf.scope_graph().unwrap(); + let nodes = scope_graph.graph.node_weights(); + nodes.fold((0, 0, 0, 0), |(s, d, r, i), node| match node { + NodeKind::Scope(_) => (s + 1, d, r , i ), + NodeKind::Def(_) => (s, d + 1, r , i ), + NodeKind::Ref(_) => (s, d, r + 1, i ), + NodeKind::Import(_)=> (s, d, r , i + 1), + }) +} + +pub fn assert_eq_defs(src: &[u8], lang_id: &str, defs: Vec<(&str, &str)>) { + let language = match Language::from_id(lang_id) { + Language::Supported(config) => config, + _ => panic!("testing unsupported language"), + }; + let namespaces = language.namespaces; + + let tsf = TreeSitterFile::try_build(src, lang_id).unwrap(); + let scope_graph = tsf.scope_graph().unwrap(); + + let expected_defs: HashSet<_> = defs.into_iter().collect(); + let observed_defs: HashSet<(&str, &str)> = scope_graph + .graph + .node_weights() + .filter_map(|node| match node { + NodeKind::Def(def) if def.symbol_id.is_some() => { + let name = std::str::from_utf8(def.name(src)).unwrap(); + let symbol = def.symbol_id.map(|sym_id| sym_id.name(namespaces)).unwrap(); + Some((name, symbol)) + } + _ => None, + }) + .collect(); + + assert_eq!(expected_defs, observed_defs) +} + +pub fn test_scopes(lang_id: &str, src: &[u8], expected: Expect) { + let graph = build_graph(lang_id, src); + let language = match Language::from_id(lang_id) { + Language::Supported(config) => config, + _ => panic!("testing unsupported language"), + }; + let observed = graph.debug(src, language); + expected.assert_debug_eq(&observed) +} + +pub fn build_graph(lang_id: &str, src: &[u8]) -> crate::intelligence::ScopeGraph { + let tsf = TreeSitterFile::try_build(src, lang_id).unwrap(); + tsf.scope_graph().unwrap() +} diff --git a/src/intelligence/language/typescript/mod.rs b/src/intelligence/language/typescript/mod.rs new file mode 100644 index 0000000..00f5a88 --- /dev/null +++ b/src/intelligence/language/typescript/mod.rs @@ -0,0 +1,364 @@ +use crate::intelligence::{MemoizedQuery, TSLanguageConfig}; + +pub static TYPESCRIPT: TSLanguageConfig = TSLanguageConfig { + language_ids: &["TypeScript", "TSX"], + file_extensions: &["ts", "tsx"], + grammar: tree_sitter_typescript::language_tsx, + scope_query: MemoizedQuery::new(include_str!("./scopes.scm")), + hoverable_query: MemoizedQuery::new( + r#" + [(identifier) + (property_identifier) + (shorthand_property_identifier) + (shorthand_property_identifier_pattern) + (statement_identifier) + (type_identifier)] @hoverable + "#, + ), + namespaces: &[&[ + //variables + "constant", + "variable", + "property", + "parameter", + // functions + "function", + "method", + "generator", + // types + "alias", + "enum", + "enumerator", + "class", + "interface", + // misc. + "label", + ]], +}; + +#[cfg(test)] +mod test { + use crate::intelligence::language::test_utils::*; + + // tests the following constructs: + // - imports (inherited from js) + // - type aliases + // - type constructs (union types, nested types, function types) + // - generics + // - object property (should create an empty scope) + #[test] + fn simple() { + test_scopes( + "TypeScript", + r#" + import React, { createContext } from 'react'; + import { ExtendedItemType, ItemType } + from '../components/ContextMenu/ContextMenuItem/Item'; + + type SearchHistoryType = { + text: string; + type: ItemType | ExtendedItemType; + icon?: React.ReactElement; + }; + + type ContextType = { + inputValue: string; + setInputValue: (v: string) => void; + searchHistory: SearchHistoryType[]; + setSearchHistory: (s: SearchHistoryType[]) => void; + }; + + export const SearchContext = createContext({ + inputValue: '', + setInputValue: (value) => {}, + searchHistory: [], + setSearchHistory: (newHistory) => {}, + }); + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + SearchHistoryType { + kind: "alias", + context: "type §SearchHistoryType§ = {", + referenced in (2): [ + `searchHistory: §SearchHistoryType§[];`, + `setSearchHistory: (s: §SearchHistoryType§[]) => void;`, + ], + }, + ContextType { + kind: "alias", + context: "type §ContextType§ = {", + referenced in (1): [ + `export const SearchContext = createContext<§ContextType§>({`, + ], + }, + SearchContext { + kind: "constant", + context: "export const §SearchContext§ = createContext({", + }, + ], + imports: [ + React { + context: "import §React§, { createContext } from 'react';", + referenced in (1): [ + `icon?: §React§.ReactElement;`, + ], + }, + createContext { + context: "import React, { §createContext§ } from 'react';", + referenced in (1): [ + `export const SearchContext = §createContext§({`, + ], + }, + ExtendedItemType { + context: "import { §ExtendedItemType§, ItemType }", + referenced in (1): [ + `type: ItemType | §ExtendedItemType§;`, + ], + }, + ItemType { + context: "import { ExtendedItemType, §ItemType§ }", + referenced in (1): [ + `type: §ItemType§ | ExtendedItemType;`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [ + v { + kind: "parameter", + context: "setInputValue: (§v§: string) => void;", + }, + ], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [], + }, + scope { + definitions: [ + s { + kind: "parameter", + context: "setSearchHistory: (§s§: SearchHistoryType[]) => void;", + }, + ], + child scopes: [], + }, + scope { + definitions: [], + child scopes: [ + scope { + definitions: [ + value { + kind: "parameter", + context: "setInputValue: (§value§) => {},", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + scope { + definitions: [ + newHistory { + kind: "parameter", + context: "setSearchHistory: (§newHistory§) => {},", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + } + "#]], + ) + } + + #[test] + fn tsx() { + test_scopes( + "TSX", + br#" + import React from 'react'; + import ReactDOM from 'react-dom/client'; + import App from './App'; + import './index.css'; + + ReactDOM.createRoot(document.getElementById('root') as HTMLElement).render( + + + , + ); + "#, + expect![[r#" + scope { + definitions: [], + imports: [ + React { + context: "import §React§ from 'react';", + referenced in (2): [ + `<§React§.StrictMode>`, + `,`, + ], + }, + ReactDOM { + context: "import §ReactDOM§ from 'react-dom/client';", + referenced in (1): [ + `§ReactDOM§.createRoot(document.getElementById('root') as HTMLElement).render(`, + ], + }, + App { + context: "import §App§ from './App';", + referenced in (1): [ + `<§App§ />`, + ], + }, + ], + child scopes: [], + } + "#]], + ) + } + + // https://github.com/BloopAI/bloop/issues/213 + // + // type parameters and function parameters should belong to a scope + // that is smaller that the function definition itself. + #[test] + fn function_and_type_params() { + test_scopes( + "TypeScript", + r#" + function foo(t: T, u: U) {} + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + foo { + kind: "function", + context: "function §foo§(t: T, u: U) {}", + }, + ], + child scopes: [ + scope { + definitions: [ + T { + kind: "none", + context: "function foo<§T§, U>(t: T, u: U) {}", + referenced in (1): [ + `function foo(t: §T§, u: U) {}`, + ], + }, + U { + kind: "none", + context: "function foo(t: T, u: U) {}", + referenced in (1): [ + `function foo(t: T, u: §U§) {}`, + ], + }, + t { + kind: "parameter", + context: "function foo(§t§: T, u: U) {}", + }, + u { + kind: "parameter", + context: "function foo(t: T, §u§: U) {}", + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + } + "#]], + ); + } + + #[test] + fn optional_param_regression() { + test_scopes( + "TypeScript", + r#" + function foo(a?: string, b: string) { + return (a, b) + } + "# + .as_bytes(), + expect![[r#" + scope { + definitions: [ + foo { + kind: "function", + context: "function §foo§(a?: string, b: string) {", + }, + ], + child scopes: [ + scope { + definitions: [ + a { + kind: "parameter", + context: "function foo(§a§?: string, b: string) {", + referenced in (1): [ + `return (§a§, b)`, + ], + }, + b { + kind: "parameter", + context: "function foo(a?: string, §b§: string) {", + referenced in (1): [ + `return (a, §b§)`, + ], + }, + ], + child scopes: [ + scope { + definitions: [], + child scopes: [ + scope { + definitions: [], + child scopes: [], + }, + ], + }, + ], + }, + ], + } + "#]], + ); + } +} diff --git a/src/intelligence/language/typescript/scopes.scm b/src/intelligence/language/typescript/scopes.scm new file mode 100644 index 0000000..0af5475 --- /dev/null +++ b/src/intelligence/language/typescript/scopes.scm @@ -0,0 +1,431 @@ +;; typescript, javascript, and tsx inherit the same common +;; grammar, and add onto it. this file contains additional +;; queries for typescript types and ADTs, it also revises +;; certain queries (classes, function parameters). + +;; scopes + +[ + (statement_block) + (class_body) + (arrow_function) + (object) + ;; nameless functions create scopes, just like arrow functions + (function !name) + (function_declaration) + (generator_function_declaration) + (for_statement) + (for_in_statement) + (switch_case) + (catch_clause) + ;; assignments are permitted inside sequence exprs: + ;; + ;; const a = 2; + ;; throw f = 1, f, a; + ;; + ;; should produce: + ;; + ;; { + ;; defs: [ a ], + ;; scopes [{ + ;; defs: [ f ], + ;; refs: [ f, a ] + ;; }], + ;; } + (sequence_expression) + + ;; type signatures in properties may contain parameter + ;; definitions, which can never have references. this + ;; scope "seals" off this definitions. + ;; + ;; type S = { + ;; getter: (f: string) => string; + ;; } + ;; + ;; should produce one top-level definition: `S`. without + ;; sealing the property signature, it also produces `f` + ;; as a top-level definition. + (property_signature) +] @local.scope + + + +;; defs + +;; tree-sitter-javascript has 5 "declaration" kinds: +;; +;; - class +;; - function +;; - generator function +;; - lexical +;; - variable + +;; function x() +(function_declaration + (identifier) @hoist.definition.function) + +(generator_function_declaration + (identifier) @hoist.definition.generator) + +;; function params +(formal_parameters + (required_parameter + (identifier) @local.definition.parameter)) +(formal_parameters + (optional_parameter + (identifier) @local.definition.parameter)) + +;; patterns + +;; f(a, ...b) +(rest_pattern + (identifier) @local.definition.variable) + +;; f(a, y = f) +;; +;; the lhs is a def, the rhs is a ref +(assignment_pattern + (identifier) @local.definition.variable + (identifier) @local.reference) + +;; for ([a, b] in thing) +;; +;; `a` & `b` are defs +(array_pattern + (identifier) @local.definition.variable) + +;; let {a, b} = obj; +(object_pattern + (shorthand_property_identifier_pattern) @local.definition.variable) + +;; var x = _ +(variable_declaration + (variable_declarator . (identifier) @local.definition.variable)) + +;; const x = _ +(lexical_declaration + "const" + (variable_declarator . (identifier) @local.definition.constant)) + +;; let x = _ +(lexical_declaration + "let" + (variable_declarator . (identifier) @local.definition.variable)) + +;; a = b +(assignment_expression + . + (identifier) @local.definition.variable) + +;; method def +;; +;; TODO: support getters and setters here, blocked on: +;; https://github.com/tree-sitter/tree-sitter/issues/1461 +(method_definition + (property_identifier) @local.definition.method) + +;; class +(class_declaration + (type_identifier) @local.definition.class) + +;; arrow func +(arrow_function + (identifier) @local.definition.variable) + + +;; imports + +;; import defaultMember from "module"; +(import_statement + (import_clause (identifier) @local.import)) + +;; import { member } from "module"; +;; import { member as alias } from "module"; +(import_statement + (import_clause + (named_imports + [(import_specifier !alias (identifier) @local.import) + (import_specifier alias: (identifier) @local.import)]))) + +;; for (item in list) +;; +;; `item` is a def +(for_in_statement + left: (identifier) @local.definition.variable) + +;; labels +(labeled_statement + (statement_identifier) @local.definition.label) + +;; type T +(type_alias_declaration + name: + (type_identifier) @local.definition.alias) + +;; type parameters in generic +;; functions or interfaces +(type_parameters + (type_parameter + (type_identifier) @local.definition)) + +;; enum T +(enum_declaration + (identifier) @local.definition.enum) + +;; enumerators +;; +;; enum Direction { +;; L // property_identifier +;; D = "Down" // enum_assignment +;; } +(enum_body + (property_identifier) @local.definition.enumerator) +(enum_body + (enum_assignment + (property_identifier) @local.definition.enumerator)) + +;; abstract class T +(abstract_class_declaration + (type_identifier) @local.definition.class) + +;; class _ { +;; t: T +;; } +(public_field_definition + (property_identifier) @local.definition.property) + +;; class { +;; abstract f(T): U; +;; } +(abstract_method_signature + (property_identifier) @local.definition.property) + +;; interface T +(interface_declaration + (type_identifier) @local.definition.interface) + +;; catch clauses +(catch_clause + (identifier) @local.definition.variable) + + +;; refs + +;; someVar; +(expression_statement (identifier) @local.reference) + +;; { "a": value } +(object + (pair + (identifier) @local.reference)) + +;; y = {a, b} +(object + (shorthand_property_identifier) @local.reference) + + +;; [ a, b, c ] +(array + (identifier) @local.reference) + +;; new Object() +(new_expression + (identifier) @local.reference) + +;; return x; +(return_statement + (identifier) @local.reference) + +;; yield t; +(yield_expression + (identifier) @local.reference) + +;; call expression +(call_expression + (identifier) @local.reference) + +;; call arguments +(arguments + (identifier) @local.reference) + +;; type arguments +(type_arguments + (type_identifier) @local.reference) + +;; index expression +(subscript_expression + (identifier) @local.reference) + +;; member expression: a.b +(member_expression + (identifier) @local.reference) + +;; nested identifier: +;; +;; `React` is a ref +;; `StrictMode` is ignored +(nested_identifier + . + (identifier) @local.reference) + +;; await ident; +(await_expression + (identifier) @local.reference) + +;; a + b +(binary_expression + (identifier) @local.reference) + +;; -x +(unary_expression + (identifier) @local.reference) + +;; x++ +(update_expression + (identifier) @local.reference) + +;; a += b +(augmented_assignment_expression + (identifier) @local.reference) + +;; (a) +(parenthesized_expression + (identifier) @local.reference) + +;; tuples +(sequence_expression + (identifier) @local.reference) + +;; c? a : b +(ternary_expression + (identifier) @local.reference) + +;; {...object} +(spread_element + (identifier) @local.reference) + +;; exports are refs +;; +;; export { name, name }; +;; export { name as alias }; +(export_statement + (export_clause + (export_specifier name: (identifier) @local.reference))) + +;; export default ident; +(export_statement + (identifier) @local.reference) + +;; for (item in list) +;; +;; `list` is a def +(for_in_statement + right: (identifier) @local.reference) + +;; break f; +(break_statement (statement_identifier) @local.reference) + +;; continue f; +(continue_statement (statement_identifier) @local.reference) + +;; jsx +;; (jsx_expression +;; (identifier) @local.reference) +;; +;; (jsx_opening_element +;; (identifier) @local.reference) +;; +;; (jsx_closing_element +;; (identifier) @local.reference) +;; +;; (jsx_self_closing_element +;; (identifier) @local.reference) + + +;; type refs + +;; type _ = T +(type_alias_declaration + value: + (type_identifier) @local.reference) + +;; (T) +(parenthesized_type + (type_identifier) @local.reference) + +;; T[] +(array_type + (type_identifier) @local.reference) + +;; A extends B ? C : D +(conditional_type + (type_identifier) @local.reference) + +;; ?T +(flow_maybe_type + (type_identifier) @local.reference) + +;; T<_> +(generic_type + (type_identifier) @local.reference) + +;; T & U +(intersection_type + (type_identifier) @local.reference) + +;; T | U +(union_type + (type_identifier) @local.reference) + +;; (T, U) => V +(function_type + (type_identifier) @local.reference) + +;; keyof T +(index_type_query + (type_identifier) @local.reference) + +;; val as T +(as_expression + (identifier) @local.reference + (type_identifier) @local.reference) + +;; let t: T = foo(); +;; {t: T, u: U} +(type_annotation + (type_identifier) @local.reference) + +;; [T, U] +(tuple_type + (type_identifier) @local.reference) + +;; T[U] +(lookup_type + (type_identifier) @local.reference) + +;; T.U +;; +;; `T` is ref +;; `U` is ignored +(nested_type_identifier + . + (identifier) @local.reference) + +;; t is T +(type_predicate_annotation + (type_predicate + (identifier) @local.reference + (type_identifier) @local.reference)) + +;; jsx +(jsx_expression + (identifier) @local.reference) + +(jsx_opening_element + (identifier) @local.reference) + +(jsx_closing_element + (identifier) @local.reference) + +(jsx_self_closing_element + (identifier) @local.reference) diff --git a/src/intelligence/namespace.rs b/src/intelligence/namespace.rs new file mode 100644 index 0000000..0250af6 --- /dev/null +++ b/src/intelligence/namespace.rs @@ -0,0 +1,48 @@ +use serde::{Deserialize, Serialize}; + +/// An opaque identifier for every symbol in a language +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct SymbolId { + pub namespace_idx: usize, + pub symbol_idx: usize, +} + +impl SymbolId { + pub fn name(&self, namespaces: NameSpaces) -> &'static str { + namespaces[self.namespace_idx][self.symbol_idx] + } +} + +/// A grouping of symbol kinds that allow references among them. +/// A variable can refer only to other variables, and not types, for example. +pub type NameSpace = &'static [&'static str]; + +/// A collection of namespaces +pub type NameSpaces = &'static [NameSpace]; + +/// Helper trait +pub trait NameSpaceMethods { + fn all_symbols(self) -> Vec<&'static str>; + + fn symbol_id_of(&self, symbol: &str) -> Option; +} + +impl NameSpaceMethods for NameSpaces { + fn all_symbols(self) -> Vec<&'static str> { + self.iter().flat_map(|ns| ns.iter().cloned()).collect() + } + + fn symbol_id_of(&self, symbol: &str) -> Option { + self.iter() + .enumerate() + .find_map(|(namespace_idx, namespace)| { + namespace + .iter() + .position(|s| s == &symbol) + .map(|symbol_idx| SymbolId { + namespace_idx, + symbol_idx, + }) + }) + } +} diff --git a/src/intelligence/scope_resolution.rs b/src/intelligence/scope_resolution.rs new file mode 100644 index 0000000..7b92331 --- /dev/null +++ b/src/intelligence/scope_resolution.rs @@ -0,0 +1,1073 @@ +#[cfg(test)] +mod debug; +mod def; +mod import; +mod reference; +mod scope; + +pub use def::LocalDef; +pub use import::LocalImport; +pub use reference::Reference; +pub use scope::{LocalScope, ScopeStack}; + +use super::{NameSpaceMethods, TSLanguageConfig, ALL_LANGUAGES}; +use crate::{symbol::Symbol, text_range::TextRange}; + +use std::{collections::HashMap, str::FromStr}; + +use petgraph::{graph::Graph, visit::EdgeRef, Direction}; +use serde::{Deserialize, Serialize}; +use tracing::warn; +use tree_sitter::{Node, Query, QueryCursor}; + +pub type NodeIndex = petgraph::graph::NodeIndex; + +/// The algorithm used to resolve scopes. +/// +/// The resolution method may be parametrized on language. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[non_exhaustive] +pub enum ResolutionMethod { + /// `Generic` refers to a basic lexical scoping algorithm. + Generic, +} + +impl ResolutionMethod { + /// Build a lexical scope-graph with a scope query and a tree-sitter tree. The `src` + /// parameter is required by tree-sitter to resolve certain kinds of query predicates + /// such as #match? and #eq?. + pub fn build_scope( + &self, + query: &Query, + root_node: Node<'_>, + src: &[u8], + language: &TSLanguageConfig, + ) -> ScopeGraph { + match self { + ResolutionMethod::Generic => scope_res_generic(query, root_node, src, language), + } + } +} + +/// The type of a node in the ScopeGraph +#[derive(Serialize, Deserialize, Debug, Clone)] +pub enum NodeKind { + /// A scope node + Scope(LocalScope), + + /// A definition node + Def(LocalDef), + + /// An import node + Import(LocalImport), + + /// A reference node + Ref(Reference), +} + +impl NodeKind { + /// Construct a scope node from a range + pub fn scope(range: TextRange) -> Self { + Self::Scope(LocalScope::new(range)) + } + + /// Produce the range spanned by this node + pub fn range(&self) -> TextRange { + match self { + Self::Scope(l) => l.range, + Self::Def(d) => d.range, + Self::Ref(r) => r.range, + Self::Import(i) => i.range, + } + } +} + +/// Describes the relation between two nodes in the ScopeGraph +#[derive(Serialize, Deserialize, PartialEq, Eq, Copy, Clone, Debug)] +pub enum EdgeKind { + /// The edge weight from a nested scope to its parent scope + ScopeToScope, + + /// The edge weight from a definition to its definition scope + DefToScope, + + /// The edge weight from an import to its definition scope + ImportToScope, + + /// The edge weight from a reference to its definition + RefToDef, + + /// The edge weight from a reference to its import + RefToImport, +} + +/// A graph representation of scopes and names in a single syntax tree +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ScopeGraph { + /// The raw graph + pub graph: Graph, + + // Graphs do not have the concept of a `root`, but lexical scopes follow the syntax + // tree, and as a result, have a "root" node. The root_idx points to a scope node that + // encompasses the entire file: the global scope. + root_idx: NodeIndex, + + /// An index into ALL_LANGUAGES which corresponds to the language for this graph + lang_id: usize, +} + +impl ScopeGraph { + pub fn new(range: TextRange, lang_id: usize) -> Self { + let mut graph = Graph::new(); + let root_idx = graph.add_node(NodeKind::scope(range)); + Self { + graph, + root_idx, + lang_id, + } + } + + pub fn get_node(&self, node_idx: NodeIndex) -> Option<&NodeKind> { + self.graph.node_weight(node_idx) + } + + /// Insert a local scope into the scope-graph + pub fn insert_local_scope(&mut self, new: LocalScope) { + if let Some(parent_scope) = self.scope_by_range(new.range, self.root_idx) { + let new_scope = NodeKind::Scope(new); + let new_idx = self.graph.add_node(new_scope); + self.graph + .add_edge(new_idx, parent_scope, EdgeKind::ScopeToScope); + } + } + + /// Insert a def into the scope-graph + pub fn insert_local_def(&mut self, new: LocalDef) { + if let Some(defining_scope) = self.scope_by_range(new.range, self.root_idx) { + let new_def = NodeKind::Def(new); + let new_idx = self.graph.add_node(new_def); + self.graph + .add_edge(new_idx, defining_scope, EdgeKind::DefToScope); + } + } + + /// Insert a def into the scope-graph, at the parent scope of the defining scope + pub fn insert_hoisted_def(&mut self, new: LocalDef) { + if let Some(defining_scope) = self.scope_by_range(new.range, self.root_idx) { + let new_def = NodeKind::Def(new); + let new_idx = self.graph.add_node(new_def); + + // if the parent scope exists, insert this def there, if not, + // insert into the defining scope + let target_scope = self.parent_scope(defining_scope).unwrap_or(defining_scope); + + self.graph + .add_edge(new_idx, target_scope, EdgeKind::DefToScope); + } + } + + /// Insert a def into the scope-graph, at the root scope + pub fn insert_global_def(&mut self, new: LocalDef) { + let new_def = NodeKind::Def(new); + let new_idx = self.graph.add_node(new_def); + self.graph + .add_edge(new_idx, self.root_idx, EdgeKind::DefToScope); + } + + /// Insert an import into the scope-graph + pub fn insert_local_import(&mut self, new: LocalImport) { + if let Some(defining_scope) = self.scope_by_range(new.range, self.root_idx) { + let new_imp = NodeKind::Import(new); + let new_idx = self.graph.add_node(new_imp); + self.graph + .add_edge(new_idx, defining_scope, EdgeKind::ImportToScope); + } + } + + /// Insert a ref into the scope-graph + pub fn insert_ref(&mut self, new: Reference, src: &[u8]) { + let mut possible_defs = vec![]; + let mut possible_imports = vec![]; + if let Some(local_scope_idx) = self.scope_by_range(new.range, self.root_idx) { + // traverse the scopes from the current-scope to the root-scope + for scope in self.scope_stack(local_scope_idx) { + // find candidate definitions in each scope + for local_def in self + .graph + .edges_directed(scope, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::DefToScope) + .map(|edge| edge.source()) + { + if let NodeKind::Def(def) = &self.graph[local_def] { + if new.name(src) == def.name(src) { + match (&def.symbol_id, &new.symbol_id) { + // both contain symbols, but they don't belong to the same namepspace + (Some(d), Some(r)) if d.namespace_idx != r.namespace_idx => {} + + // in all other cases, form an edge from the ref to def. + // an empty symbol belongs to all namespaces: + // * (None, None) + // * (None, Some(_)) + // * (Some(_), None) + // * (Some(_), Some(_)) if def.namespace == ref.namespace + _ => { + possible_defs.push(local_def); + } + }; + } + } + } + + // find candidate imports in each scope + for local_import in self + .graph + .edges_directed(scope, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::ImportToScope) + .map(|edge| edge.source()) + { + if let NodeKind::Import(import) = &self.graph[local_import] { + if new.name(src) == import.name(src) { + possible_imports.push(local_import); + } + } + } + } + } + + if !possible_defs.is_empty() || !possible_imports.is_empty() { + let new_ref = NodeKind::Ref(new); + let ref_idx = self.graph.add_node(new_ref); + for def_idx in possible_defs { + self.graph.add_edge(ref_idx, def_idx, EdgeKind::RefToDef); + } + for imp_idx in possible_imports { + self.graph.add_edge(ref_idx, imp_idx, EdgeKind::RefToImport); + } + } + } + + fn scope_stack(&self, start: NodeIndex) -> ScopeStack<'_> { + ScopeStack { + scope_graph: self, + start: Some(start), + } + } + + // The smallest scope that encompasses `range`. Start at `start` and narrow down if possible. + fn scope_by_range(&self, range: TextRange, start: NodeIndex) -> Option { + let target_range = self.graph[start].range(); + if target_range.contains(&range) { + let child_scopes = self + .graph + .edges_directed(start, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::ScopeToScope) + .map(|edge| edge.source()) + .collect::>(); + for child_scope in child_scopes { + if let Some(t) = self.scope_by_range(range, child_scope) { + return Some(t); + } + } + return Some(start); + } + None + } + + // Produce the parent scope of a given scope + fn parent_scope(&self, start: NodeIndex) -> Option { + if matches!(self.graph[start], NodeKind::Scope(_)) { + return self + .graph + .edges_directed(start, Direction::Outgoing) + .filter(|edge| *edge.weight() == EdgeKind::ScopeToScope) + .map(|edge| edge.target()) + .next(); + } + None + } + + /// Produce a list of interesting ranges: ranges of defs and refs + pub fn hoverable_ranges(&self) -> Box + '_> { + let iterator = + self.graph + .node_indices() + .filter_map(|node_idx| match &self.graph[node_idx] { + NodeKind::Scope(_) => None, + NodeKind::Def(d) => Some(d.range), + NodeKind::Ref(r) => Some(r.range), + NodeKind::Import(i) => Some(i.range), + }); + Box::new(iterator) + } + + /// Produce possible definitions for a reference + pub fn definitions( + &self, + reference_node: NodeIndex, + ) -> Box + '_> { + let iterator = self + .graph + .edges_directed(reference_node, Direction::Outgoing) + .filter(|edge| *edge.weight() == EdgeKind::RefToDef) + .map(|edge| edge.target()); + Box::new(iterator) + } + + /// Produce possible imports for a reference + pub fn imports(&self, reference_node: NodeIndex) -> Box + '_> { + let iterator = self + .graph + .edges_directed(reference_node, Direction::Outgoing) + .filter(|edge| *edge.weight() == EdgeKind::RefToImport) + .map(|edge| edge.target()); + Box::new(iterator) + } + + /// Produce possible references for a definition/import node + pub fn references( + &self, + definition_node: NodeIndex, + ) -> Box + '_> { + let iterator = self + .graph + .edges_directed(definition_node, Direction::Incoming) + .filter(|edge| { + *edge.weight() == EdgeKind::RefToDef || *edge.weight() == EdgeKind::RefToImport + }) + .map(|edge| edge.source()); + Box::new(iterator) + } + + pub fn node_by_range(&self, start_byte: usize, end_byte: usize) -> Option { + self.graph + .node_indices() + .filter(|&idx| self.is_definition(idx) || self.is_reference(idx) || self.is_import(idx)) + .find(|&idx| { + let node = self.graph[idx].range(); + start_byte >= node.start.byte && end_byte <= node.end.byte + }) + } + + /// The "value" of a definition is loosely characterized as + /// + /// - the body of a function block + /// - the body of a class + /// - the parameters list defining generic types + /// - the RHS of a value + /// + /// The heuristic used here is + /// - the smallest scope-node that encompasses the definition_node + /// - or the largest scope-node on the same line as the to the definition_node + pub fn value_of_definition(&self, def_idx: NodeIndex) -> Option { + let smallest_scope_node = self + .scope_by_range(self.graph[def_idx].range(), self.root_idx) + .filter(|&idx| { + self.graph[idx].range().start.line == self.graph[def_idx].range().start.line + }); + let largest_adjacent_node = self + .graph + .node_indices() + .filter(|&idx| match self.graph[idx] { + NodeKind::Scope(scope) => { + scope.range.start.line == self.graph[def_idx].range().start.line + } + _ => false, + }) + .max_by_key(|idx| self.graph[*idx].range().size()); + + smallest_scope_node.or(largest_adjacent_node) + } + + pub fn node_by_position(&self, line: usize, column: usize) -> Option { + self.graph + .node_indices() + .filter(|&idx| self.is_definition(idx) || self.is_reference(idx)) + .find(|&idx| { + let node = self.graph[idx].range(); + node.start.line == line + && node.end.line == line + && node.start.column <= column + && node.end.column >= column + }) + } + + pub fn symbols(&self) -> Vec { + let namespaces = ALL_LANGUAGES[self.lang_id].namespaces; + self.graph + .node_weights() + .filter_map(|weight| match weight { + NodeKind::Def(LocalDef { + range, + symbol_id: Some(symbol_id), + .. + }) => Some(Symbol { + kind: symbol_id.name(namespaces).to_owned(), // FIXME: this should use SymbolId::name + range: *range, + }), + _ => None, + }) + .collect() + } + + // produce a stringified name of a def/ref's symbol + pub fn symbol_name_of(&self, idx: NodeIndex) -> Option<&'static str> { + let namespaces = ALL_LANGUAGES[self.lang_id].namespaces; + match &self.graph[idx] { + NodeKind::Def(d) => d.symbol_id.map(|s| s.name(namespaces)), + NodeKind::Ref(r) => r.symbol_id.map(|s| s.name(namespaces)), + _ => None, + } + } + + // is the given ref/def a direct child of the root scope + pub fn is_top_level(&self, idx: NodeIndex) -> bool { + self.graph.contains_edge(idx, self.root_idx) + } + + #[cfg(test)] + pub fn debug(&self, src: &[u8], language: &'static TSLanguageConfig) -> debug::ScopeDebug { + let graph = &self.graph; + let start = self.root_idx; + debug::ScopeDebug::new(graph, start, src, language) + } + + #[cfg(test)] + pub fn find_node_by_name(&self, src: &[u8], name: &[u8]) -> Option { + self.graph.node_indices().find(|idx| { + matches!( + &self.graph[*idx], + NodeKind::Def(d) if d.name(src) == name) + }) + } + + pub fn is_definition(&self, node_idx: NodeIndex) -> bool { + matches!(self.graph[node_idx], NodeKind::Def(_)) + } + + pub fn is_reference(&self, node_idx: NodeIndex) -> bool { + matches!(self.graph[node_idx], NodeKind::Ref(_)) + } + + pub fn is_scope(&self, node_idx: NodeIndex) -> bool { + matches!(self.graph[node_idx], NodeKind::Scope(_)) + } + + pub fn is_import(&self, node_idx: NodeIndex) -> bool { + matches!(self.graph[node_idx], NodeKind::Import(_)) + } +} + +fn scope_res_generic( + query: &Query, + root_node: Node<'_>, + src: &[u8], + language: &TSLanguageConfig, +) -> ScopeGraph { + let namespaces = language.namespaces; + + enum Scoping { + Global, + Hoisted, + Local, + } + + // extract supported capture groups + struct LocalDefCapture<'a> { + index: u32, + symbol: Option<&'a str>, + scoping: Scoping, + } + + struct LocalRefCapture<'a> { + index: u32, + symbol: Option<&'a str>, + } + + impl FromStr for Scoping { + type Err = String; + fn from_str(s: &str) -> Result { + match s { + "hoist" => Ok(Self::Hoisted), + "global" => Ok(Self::Global), + "local" => Ok(Self::Local), + s => Err(s.to_owned()), + } + } + } + + // every capture of the form: + // - local.definition. + // - hoist.definition. + // is a local_def + let mut local_def_captures = Vec::>::new(); + + // every capture of the form local.import is a local_import + let mut local_import_capture_index = None; + + // every capture of the form local.reference. is a local_ref + let mut local_ref_captures = Vec::>::new(); + + // every capture of the form local.scope is a local_scope + let mut local_scope_capture_index = None; + + // determine indices of every capture group in the query file + for (i, name) in query.capture_names().iter().enumerate() { + let i = i as u32; + let parts: Vec<_> = name.split('.').collect(); + + match parts.as_slice() { + [scoping, "definition", sym] => { + let index = i; + let symbol = Some(sym.to_owned()); + let scoping = Scoping::from_str(scoping).expect("invalid scope keyword"); + + let l = LocalDefCapture { + index, + symbol, + scoping, + }; + local_def_captures.push(l) + } + [scoping, "definition"] => { + let index = i; + let symbol = None; + let scoping = Scoping::from_str(scoping).expect("invalid scope keyword"); + + let l = LocalDefCapture { + index, + symbol, + scoping, + }; + local_def_captures.push(l) + } + ["local", "reference", sym] => { + let index = i; + let symbol = Some(sym.to_owned()); + + let l = LocalRefCapture { index, symbol }; + local_ref_captures.push(l); + } + ["local", "reference"] => { + let index = i; + let symbol = None; + + let l = LocalRefCapture { index, symbol }; + local_ref_captures.push(l); + } + ["local", "scope"] => local_scope_capture_index = Some(i), + ["local", "import"] => local_import_capture_index = Some(i), + _ if !name.starts_with('_') => warn!(?name, "unrecognized query capture"), + _ => (), // allow captures that start with underscore to fly under the radar + } + } + + // run scope-query upon the syntax-tree + let mut cursor = QueryCursor::new(); + let captures = cursor.captures(query, root_node, src); + + let lang_id = ALL_LANGUAGES + .iter() + .position(|l| l.language_ids == language.language_ids) + .unwrap(); + let mut scope_graph = ScopeGraph::new(root_node.range().into(), lang_id); + + let capture_map = captures.fold( + HashMap::<_, Vec<_>>::new(), + |mut map, (match_, capture_idx)| { + let capture = match_.captures[capture_idx]; + let range: TextRange = capture.node.range().into(); + map.entry(capture.index).or_default().push(range); + map + }, + ); + + // insert scopes first + if let Some(ranges) = local_scope_capture_index.and_then(|idx| capture_map.get(&idx)) { + for range in ranges { + let scope = LocalScope::new(*range); + scope_graph.insert_local_scope(scope); + } + } + + // followed by imports + if let Some(ranges) = local_import_capture_index.and_then(|idx| capture_map.get(&idx)) { + for range in ranges { + let import = LocalImport::new(*range); + scope_graph.insert_local_import(import); + } + } + + // followed by defs + for LocalDefCapture { + index, + symbol, + scoping, + } in local_def_captures + { + if let Some(ranges) = capture_map.get(&index) { + for range in ranges { + // if the symbol is present, is it one of the supported symbols for this language? + let symbol_id = symbol.and_then(|s| namespaces.symbol_id_of(s)); + let local_def = LocalDef::new(*range, symbol_id); + + match scoping { + Scoping::Hoisted => scope_graph.insert_hoisted_def(local_def), + Scoping::Global => scope_graph.insert_global_def(local_def), + Scoping::Local => scope_graph.insert_local_def(local_def), + }; + } + } + } + + // and then refs + for LocalRefCapture { index, symbol } in local_ref_captures { + if let Some(ranges) = capture_map.get(&index) { + for range in ranges { + // if the symbol is present, is it one of the supported symbols for this language? + let symbol_id = symbol.and_then(|s| namespaces.symbol_id_of(s)); + let ref_ = Reference::new(*range, symbol_id); + + scope_graph.insert_ref(ref_, src); + } + } + } + + scope_graph +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + intelligence::SymbolId, + text_range::{Point, TextRange}, + }; + use expect_test::expect; + + const DUMMY_LANG_ID: usize = 0; + + // test-utility to build byte-only text-ranges + // + // assumes one byte per line + fn r(start: usize, end: usize) -> TextRange { + TextRange { + start: Point { + byte: start, + line: start, + column: 0, + }, + end: Point { + byte: end, + line: end, + column: 0, + }, + } + } + + // test-utility to create a local scope + fn scope(start: usize, end: usize) -> LocalScope { + LocalScope { + range: r(start, end), + } + } + + // test-utility to create a local def + fn definition(start: usize, end: usize) -> LocalDef { + LocalDef { + range: r(start, end), + symbol_id: None, + } + } + + // test-utility to create a reference + fn reference(start: usize, end: usize) -> Reference { + Reference { + range: r(start, end), + symbol_id: None, + } + } + + // test-utility to build a stringified edge-list from a graph + fn test_edges(graph: &Graph, expected: expect_test::Expect) { + let edge_list = graph + .edge_references() + .map(|edge| { + let source = graph[edge.source()].range(); + let target = graph[edge.target()].range(); + let weight = edge.weight(); + format!( + "{:02}..{:02} --{weight:?}-> {:02}..{:02}\n", + source.start.byte, source.end.byte, target.start.byte, target.end.byte, + ) + }) + .collect::(); + + expected.assert_eq(&edge_list) + } + + #[test] + fn insert_scopes() { + let mut s = ScopeGraph::new(r(0, 20), DUMMY_LANG_ID); + + let a = scope(0, 10); + let c = scope(0, 5); + let d = scope(6, 10); + + let b = scope(11, 20); + let e = scope(11, 15); + let f = scope(16, 20); + + for scope in [a, b, c, d, e, f] { + s.insert_local_scope(scope); + } + + // should build: + // + // root + // `- a + // `- c + // `- d + // `- b + // `- e + // `- f + // + // |n| = 7 + // |e| = 6 + + assert_eq!(s.graph.node_count(), 7); + assert_eq!(s.graph.edge_count(), 6); + + // a -> root + // b -> root + // c -> a + // d -> a + // e -> b + // f -> b + test_edges( + &s.graph, + expect![[r#" + 00..10 --ScopeToScope-> 00..20 + 11..20 --ScopeToScope-> 00..20 + 00..05 --ScopeToScope-> 00..10 + 06..10 --ScopeToScope-> 00..10 + 11..15 --ScopeToScope-> 11..20 + 16..20 --ScopeToScope-> 11..20 + "#]], + ); + } + + #[test] + fn insert_defs() { + let mut s = ScopeGraph::new(r(0, 20), DUMMY_LANG_ID); + + // modeling the following code: + // + // fn main() { + // let a = 2; + // let b = 3; + // } + + let main = scope(0, 10); + let a = definition(1, 2); + let b = definition(4, 5); + + s.insert_local_scope(main); + s.insert_local_def(a); + s.insert_local_def(b); + + // should build: + // + // root + // `- main + // `- a + // `- b + + test_edges( + &s.graph, + expect![[r#" + 00..10 --ScopeToScope-> 00..20 + 01..02 --DefToScope-> 00..10 + 04..05 --DefToScope-> 00..10 + "#]], + ); + } + + #[test] + fn insert_hoisted_defs() { + let mut s = ScopeGraph::new(r(0, 20), DUMMY_LANG_ID); + + let main = scope(0, 10); + let a = definition(1, 2); + let b = definition(4, 5); + + s.insert_local_scope(main); + s.insert_local_def(a); + // should hoist `b` from `main` to `root` + s.insert_hoisted_def(b); + + // should build: + // + // root + // `- b + // `- main + // `- a + + // root has 2 incoming edges: + // main -> root + // b -> root + assert_eq!( + s.graph + .edges_directed(s.root_idx, Direction::Incoming) + .count(), + 2 + ); + + test_edges( + &s.graph, + expect![[r#" + 00..10 --ScopeToScope-> 00..20 + 01..02 --DefToScope-> 00..10 + 04..05 --DefToScope-> 00..20 + "#]], + ); + } + + #[test] + fn insert_hoisted_no_parent() { + let mut s = ScopeGraph::new(r(0, 20), DUMMY_LANG_ID); + + let a = definition(1, 2); + + s.insert_hoisted_def(a); + + // should build: + // + // root + // `- a + // + // `a` cannot be hoisted beyond `root` + + test_edges( + &s.graph, + expect![[r#" + 01..02 --DefToScope-> 00..20 + "#]], + ); + } + + #[test] + fn insert_ref() { + let mut s = ScopeGraph::new(r(0, 20), DUMMY_LANG_ID); + + let foo = definition(0, 3); + let foo_ref = reference(5, 8); + + let src = r"foo\nfoo".as_bytes(); + + s.insert_local_def(foo); + s.insert_ref(foo_ref, src); + + // should build + // + // root + // `- foo <- foo_ref + + test_edges( + &s.graph, + expect![[r#" + 00..03 --DefToScope-> 00..20 + 05..08 --RefToDef-> 00..03 + "#]], + ) + } + + #[test] + fn insert_ref_namespaced() { + let mut s = ScopeGraph::new(r(0, 50), DUMMY_LANG_ID); + + // we assume the following namespaces: + // - 0: [ 0: function, 1: method, 2: getter ] + // - 1: [ 0: var 1: const, 2: static ] + // + // defs from namespace 0 should be unreachable from + // refs from namespace 1 and vice-versa + + // create two defs: + // - fn foo + // - var foo + // + // every function call is annotated with the `function` symbol + // every variable ref is annotated with the `var` symbol + // every const ref is annotated with the `const` symbol + let src = r#"fn foo() {}; +var foo; +foo(); +foo + 1; +[0; foo]"# + .as_bytes(); + + // function ∈ {namespace=0, symbol=0} + let foo_func_def = { + let mut d = definition(3, 6); + d.symbol_id = Some(SymbolId { + namespace_idx: 0, + symbol_idx: 0, + }); + d + }; + + // var ∈ {namespace=1, symbol=0} + let foo_var_def = { + let mut d = definition(17, 20); + d.symbol_id = Some(SymbolId { + namespace_idx: 1, + symbol_idx: 0, + }); + d + }; + + // function ∈ {namespace=0, symbol=0} + let foo_func_ref = { + let mut r = reference(22, 25); + r.symbol_id = Some(SymbolId { + namespace_idx: 0, + symbol_idx: 0, + }); + r + }; + + // var ∈ {namespace=1, symbol=0} + let foo_var_ref = { + let mut r = reference(29, 32); + r.symbol_id = Some(SymbolId { + namespace_idx: 1, + symbol_idx: 0, + }); + r + }; + + // const ∈ {namespace=1, symbol=1} + let foo_const_ref = { + let mut r = reference(42, 45); + r.symbol_id = Some(SymbolId { + namespace_idx: 1, + symbol_idx: 1, + }); + r + }; + + s.insert_local_def(foo_func_def); + s.insert_local_def(foo_var_def); + s.insert_ref(foo_func_ref, src); + s.insert_ref(foo_var_ref, src); + s.insert_ref(foo_const_ref, src); + + // should build + // + // root + // `- foo_func <- foo_func_ref + // `- foo_var <- foo_var_ref, foo_const_ref + + test_edges( + &s.graph, + expect![[r#" + 03..06 --DefToScope-> 00..50 + 17..20 --DefToScope-> 00..50 + 22..25 --RefToDef-> 03..06 + 29..32 --RefToDef-> 17..20 + 42..45 --RefToDef-> 17..20 + "#]], + ) + } + + #[test] + fn insert_ref_no_namespace() { + let mut s = ScopeGraph::new(r(0, 50), DUMMY_LANG_ID); + + // modeling the following code: + // + // fn foo() {} + // var foo; + // + // foo + 1 + // + // `foo` should refer to both, `fn foo` and `var foo`, + // the lack of namespacing should raise both defs as + // possible defs. + // + // once again, we assume the following namespaces: + // - 0: [ 0: function, 1: method, 2: getter ] + // - 1: [ 0: var 1: const, 2: static ] + + // function ∈ {namespace=0, symbol=0} + let foo_func_def = { + let mut d = definition(3, 6); + d.symbol_id = Some(SymbolId { + namespace_idx: 0, + symbol_idx: 0, + }); + d + }; + + // var ∈ {namespace=1, symbol=0} + let foo_var_def = { + let mut d = definition(17, 20); + d.symbol_id = Some(SymbolId { + namespace_idx: 1, + symbol_idx: 0, + }); + d + }; + + let foo_ambiguous_ref = reference(23, 26); + + let src = r#"fn foo() {}; +var foo; + +foo + 1"# + .as_bytes(); + + s.insert_local_def(foo_func_def); + s.insert_local_def(foo_var_def); + s.insert_ref(foo_ambiguous_ref, src); + + // should build; + // + // root + // `- foo_func_def <- foo_ambiguous_ref + // `- foo_var_def <- foo_ambiguous_ref + + test_edges( + &s.graph, + expect![[r#" + 03..06 --DefToScope-> 00..50 + 17..20 --DefToScope-> 00..50 + 23..26 --RefToDef-> 17..20 + 23..26 --RefToDef-> 03..06 + "#]], + ) + } + + #[test] + fn hoverable_ranges() { + let mut s = ScopeGraph::new(r(0, 50), DUMMY_LANG_ID); + + // modeling the following code: + // + // let t = 2; + // t + 1; + // + // contains 1 def, 1 ref + + let src = "let t = 2;\nt + 1;".as_bytes(); + + let t_def = definition(4, 5); + let t_ref = reference(11, 12); + + s.insert_local_def(t_def); + s.insert_ref(t_ref, src); + + let hoverable_ranges = s.hoverable_ranges().collect::>(); + assert_eq!(hoverable_ranges, vec![r(4, 5), r(11, 12)]) + } +} diff --git a/src/intelligence/scope_resolution/debug.rs b/src/intelligence/scope_resolution/debug.rs new file mode 100644 index 0000000..437af07 --- /dev/null +++ b/src/intelligence/scope_resolution/debug.rs @@ -0,0 +1,266 @@ +use std::fmt; + +use super::{EdgeKind, LocalDef, NodeKind}; +use crate::{intelligence::TSLanguageConfig, text_range::TextRange}; + +use petgraph::{ + graph::{Graph, NodeIndex}, + visit::EdgeRef, + Direction, +}; + +pub struct ScopeDebug { + range: TextRange, + defs: Vec, + imports: Vec, + scopes: Vec, + language: &'static TSLanguageConfig, +} + +struct DefDebug { + name: String, + range: TextRange, + context: String, + refs: Vec, + symbol: String, +} + +struct RefDebug { + context: String, +} + +struct ImportDebug { + name: String, + range: TextRange, + context: String, + refs: Vec, +} + +impl DefDebug { + fn new( + range: TextRange, + name: String, + refs: Vec, + symbol: String, + src: &[u8], + ) -> Self { + Self { + name, + range, + context: context(range, src), + refs: refs + .into_iter() + .map(|r| context(r, src)) + .map(|context| RefDebug { context }) + .collect(), + symbol, + } + } +} + +impl ImportDebug { + fn new(range: TextRange, name: String, refs: Vec, src: &[u8]) -> Self { + Self { + name, + range, + context: context(range, src), + refs: refs + .into_iter() + .map(|r| context(r, src)) + .map(|context| RefDebug { context }) + .collect(), + } + } +} + +impl ScopeDebug { + fn empty(range: TextRange, language: &'static TSLanguageConfig) -> Self { + Self { + range, + defs: Vec::new(), + imports: Vec::new(), + scopes: Vec::new(), + language, + } + } + + fn build(&mut self, graph: &Graph, start: NodeIndex, src: &[u8]) { + let mut defs = graph + .edges_directed(start, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::DefToScope) + .map(|edge| { + let def_node = edge.source(); + + // range of this def + let range = graph[def_node].range(); + + // text source of this def + let text = std::str::from_utf8(&src[range.start.byte..range.end.byte]) + .unwrap() + .to_owned(); + + // all references of this def, sorted by range + let mut refs = graph + .edges_directed(def_node, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::RefToDef) + .map(|edge| graph[edge.source()].range()) + .collect::>(); + + refs.sort(); + + // symbol, if any + let symbol = match &graph[def_node] { + NodeKind::Def(LocalDef { + symbol_id: Some(symbol_id), + .. + }) => symbol_id.name(self.language.namespaces).to_string(), + _ => "none".to_string(), + }; + + DefDebug::new(range, text, refs, symbol, src) + }) + .collect::>(); + + let mut imports = graph + .edges_directed(start, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::ImportToScope) + .map(|edge| { + let imp_node = edge.source(); + + // range of this import + let range = graph[imp_node].range(); + + // text source of this import + let text = std::str::from_utf8(&src[range.start.byte..range.end.byte]) + .unwrap() + .to_owned(); + + // all references of this import, sorted by range + let mut refs = graph + .edges_directed(imp_node, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::RefToImport) + .map(|edge| graph[edge.source()].range()) + .collect::>(); + + refs.sort(); + + ImportDebug::new(range, text, refs, src) + }) + .collect::>(); + + let mut scopes = graph + .edges_directed(start, Direction::Incoming) + .filter(|edge| *edge.weight() == EdgeKind::ScopeToScope) + .map(|edge| { + let source_scope = edge.source(); + let mut scope_debug = ScopeDebug::empty(graph[source_scope].range(), self.language); + scope_debug.build(graph, source_scope, src); + scope_debug + }) + .collect::>(); + + // sort defs by their ranges + defs.sort_by(|a, b| a.range.cmp(&b.range)); + // sort imports by their ranges + imports.sort_by(|a, b| a.range.cmp(&b.range)); + // sort scopes by their ranges + scopes.sort_by(|a, b| a.range.cmp(&b.range)); + + self.defs = defs; + self.imports = imports; + self.scopes = scopes; + } + + pub fn new( + graph: &Graph, + start: NodeIndex, + src: &[u8], + lang_config: &'static TSLanguageConfig, + ) -> Self { + let mut scope_debug = Self::empty(graph[start].range(), lang_config); + scope_debug.build(graph, start, src); + scope_debug + } +} + +impl fmt::Debug for ScopeDebug { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.imports.is_empty() { + f.debug_struct("scope") + .field("definitions", &self.defs) + .field("child scopes", &self.scopes) + .finish() + } else { + f.debug_struct("scope") + .field("definitions", &self.defs) + .field("imports", &self.imports) + .field("child scopes", &self.scopes) + .finish() + } + } +} + +impl fmt::Debug for DefDebug { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut s = f.debug_struct(&self.name); + let d = s + .field("kind", &self.symbol) + .field("context", &self.context); + + if self.refs.is_empty() { + d + } else { + d.field(&format!("referenced in ({})", self.refs.len()), &self.refs) + } + .finish() + } +} + +impl fmt::Debug for ImportDebug { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut s = f.debug_struct(&self.name); + let d = s.field("context", &self.context); + + if self.refs.is_empty() { + d + } else { + d.field(&format!("referenced in ({})", self.refs.len()), &self.refs) + } + .finish() + } +} + +impl fmt::Debug for RefDebug { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "`{}`", self.context) + } +} + +fn context(range: TextRange, src: &[u8]) -> String { + // first new line before start + let context_start = src + .iter() + .enumerate() + .take(range.start.byte) + .rev() + .find_map(|(idx, &c)| (c == b'\n').then_some(idx)) + .unwrap_or(range.start.byte - 1) + .saturating_add(1); + + // first new line after end + let context_end: usize = src + .iter() + .enumerate() + .skip(range.end.byte) + .find_map(|(idx, &c)| (c == b'\n').then_some(idx)) + .unwrap_or(range.end.byte + 1) + .saturating_sub(1); + + let from_utf8 = |bytes| std::str::from_utf8(bytes).unwrap(); + format!( + "{}§{}§{}", + from_utf8(&src[context_start..range.start.byte]).trim_start(), + from_utf8(&src[range.start.byte..range.end.byte]), + from_utf8(&src[range.end.byte..=context_end]).trim_end() + ) +} diff --git a/src/intelligence/scope_resolution/def.rs b/src/intelligence/scope_resolution/def.rs new file mode 100644 index 0000000..7992c92 --- /dev/null +++ b/src/intelligence/scope_resolution/def.rs @@ -0,0 +1,20 @@ +use crate::{intelligence::namespace::SymbolId, text_range::TextRange}; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct LocalDef { + pub range: TextRange, + pub symbol_id: Option, +} + +impl LocalDef { + /// Initialize a new definition + pub fn new(range: TextRange, symbol_id: Option) -> Self { + Self { range, symbol_id } + } + + pub fn name<'a>(&self, buffer: &'a [u8]) -> &'a [u8] { + &buffer[self.range.start.byte..self.range.end.byte] + } +} diff --git a/src/intelligence/scope_resolution/import.rs b/src/intelligence/scope_resolution/import.rs new file mode 100644 index 0000000..02b8c67 --- /dev/null +++ b/src/intelligence/scope_resolution/import.rs @@ -0,0 +1,19 @@ +use crate::text_range::TextRange; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct LocalImport { + pub range: TextRange, +} + +impl LocalImport { + /// Initialize a new import + pub fn new(range: TextRange) -> Self { + Self { range } + } + + pub fn name<'a>(&self, buffer: &'a [u8]) -> &'a [u8] { + &buffer[self.range.start.byte..self.range.end.byte] + } +} diff --git a/src/intelligence/scope_resolution/reference.rs b/src/intelligence/scope_resolution/reference.rs new file mode 100644 index 0000000..f68d936 --- /dev/null +++ b/src/intelligence/scope_resolution/reference.rs @@ -0,0 +1,20 @@ +use crate::{intelligence::namespace::SymbolId, text_range::TextRange}; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Reference { + pub range: TextRange, + pub symbol_id: Option, +} + +impl Reference { + /// Initialize a new reference + pub fn new(range: TextRange, symbol_id: Option) -> Self { + Self { range, symbol_id } + } + + pub fn name<'a>(&self, buffer: &'a [u8]) -> &'a [u8] { + &buffer[self.range.start.byte..self.range.end.byte] + } +} diff --git a/src/intelligence/scope_resolution/scope.rs b/src/intelligence/scope_resolution/scope.rs new file mode 100644 index 0000000..daa5ab0 --- /dev/null +++ b/src/intelligence/scope_resolution/scope.rs @@ -0,0 +1,40 @@ +use super::{EdgeKind, ScopeGraph}; +use crate::text_range::TextRange; + +use petgraph::{graph::NodeIndex, visit::EdgeRef, Direction}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct LocalScope { + pub range: TextRange, +} + +impl LocalScope { + pub fn new(range: TextRange) -> Self { + Self { range } + } +} + +pub struct ScopeStack<'a> { + pub scope_graph: &'a ScopeGraph, + pub start: Option>, +} + +impl<'a> Iterator for ScopeStack<'a> { + type Item = NodeIndex; + fn next(&mut self) -> Option { + if let Some(start) = self.start { + let parent = self + .scope_graph + .graph + .edges_directed(start, Direction::Outgoing) + .find(|edge| *edge.weight() == EdgeKind::ScopeToScope) + .map(|edge| edge.target()); + let original = start; + self.start = parent; + Some(original) + } else { + None + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..dc48e74 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,205 @@ +pub mod file; +pub mod indexes; +pub mod intelligence; +pub mod repository; +pub mod sync_handle; +pub mod symbol; +pub mod text_range; +pub mod search; +pub mod schema; +pub mod snippet; +pub mod content_document; + +use std::path::Path; + +pub use file::File; +pub use indexes::{Indexes, Indexable}; +pub use repository::Repository; +use search::Searcher; +pub use sync_handle::SyncHandle; + +use pyo3::prelude::*; +use serde_json::json; + + +/// Formats the sum of two numbers as string. +#[pyfunction] +fn go_to(root_path_str: &str, index_path_str: &str, relative_path: &str, line: usize, start_index: usize, end_index: usize) -> PyResult { + let root_path = Path::new(root_path_str); + + if !root_path.exists() { + return Err(pyo3::exceptions::PyRuntimeError::new_err("Internal error: Root path does not exist")); + } + + let index_path = Path::new(index_path_str); + + if !index_path.exists() { + return Err(pyo3::exceptions::PyRuntimeError::new_err("Internal error: Index path does not exist")); + } + + let buffer_size_per_thread = 15_000_000; + let num_threads = 4; + + let rt = tokio::runtime::Runtime::new().map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Internal error: Failed to create Tokio runtime: {}", e)) + })?; + + rt.block_on(async { + let indexes = Indexes::new(&index_path, buffer_size_per_thread, num_threads).await.map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to create indexes: {}", e)) + })?; + + indexes.index(root_path).await.map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to index repository: {}", e)) + })?; + + let searcher = Searcher::new(&index_path).map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to create searcher: {}", e)) + })?; + + let result = searcher.token_info(relative_path, line, start_index, end_index).map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Error retrieving token info: {}", e)) + })?; + + Ok(search::Searcher::format_token_info(result)) + }) +} + +#[pyfunction] +fn text_search(root_path_str: &str, index_path_str: &str, query: &str, case_sensitive: bool) -> PyResult { + let root_path = Path::new(root_path_str); + + if !root_path.exists() { + return Err(pyo3::exceptions::PyRuntimeError::new_err("Internal error: Root path does not exist")); + } + + let index_path = Path::new(index_path_str); + + if !index_path.exists() { + return Err(pyo3::exceptions::PyRuntimeError::new_err("Internal error: Index path does not exist")); + } + + let buffer_size_per_thread = 15_000_000; + let num_threads = 4; + + let rt = tokio::runtime::Runtime::new().map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Internal error: Failed to create Tokio runtime: {}", e)) + })?; + + rt.block_on(async { + let indexes = Indexes::new(&index_path, buffer_size_per_thread, num_threads).await.map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to create indexes: {}", e)) + })?; + + indexes.index(root_path).await.map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to index repository: {}", e)) + })?; + + let searcher = Searcher::new(&index_path).map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to create searcher: {}", e)) + })?; + + let result = searcher.text_search(query, case_sensitive).map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Error performing text search: {}", e)) + })?; + + Ok(search::Searcher::format_search_results(result)) + }) + // Ok("dsf"); +} + +#[pyfunction] +fn fuzzy_search(root_path_str: &str, index_path_str: &str, query: &str, max_distance: u8) -> PyResult { + let root_path = Path::new(root_path_str); + + if !root_path.exists() { + return Err(pyo3::exceptions::PyRuntimeError::new_err("Internal error: Root path does not exist")); + } + + let index_path = Path::new(index_path_str); + + if !index_path.exists() { + return Err(pyo3::exceptions::PyRuntimeError::new_err("Internal error: Index path does not exist")); + } + + let buffer_size_per_thread = 15_000_000; + let num_threads = 4; + + let rt = tokio::runtime::Runtime::new().map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Internal error: Failed to create Tokio runtime: {}", e)) + })?; + + rt.block_on(async { + let indexes = Indexes::new(&index_path, buffer_size_per_thread, num_threads).await.map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to create indexes: {}", e)) + })?; + + indexes.index(root_path).await.map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to index repository: {}", e)) + })?; + + let searcher = Searcher::new(&index_path).map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to create searcher: {}", e)) + })?; + + let result = searcher.fuzzy_search(query, max_distance).map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Error performing fuzzy search: {}", e)) + })?; + + Ok(search::Searcher::format_fuzzy_search_results(result)) + }) +} + + +#[pyfunction] +fn get_hoverable_ranges(root_path_str: &str, index_path_str: &str, relative_path: &str) -> PyResult { + let root_path = Path::new(root_path_str); + + if !root_path.exists() { + return Err(pyo3::exceptions::PyRuntimeError::new_err("Internal error: Root path does not exist")); + } + + let index_path = Path::new(index_path_str); + + if !index_path.exists() { + return Err(pyo3::exceptions::PyRuntimeError::new_err("Internal error: Index path does not exist")); + } + + let buffer_size_per_thread = 15_000_000; + let num_threads = 4; + + let rt = tokio::runtime::Runtime::new().map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Internal error: Failed to create Tokio runtime: {}", e)) + })?; + + rt.block_on(async { + let indexes = Indexes::new(&index_path, buffer_size_per_thread, num_threads).await.map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to create indexes: {}", e)) + })?; + + indexes.index(root_path).await.map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to index repository: {}", e)) + })?; + + let searcher = Searcher::new(&index_path).map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to create searcher: {}", e)) + })?; + + let ranges = searcher.get_hoverable_ranges(relative_path).map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!("Error retrieving hoverable ranges: {}", e)) + })?; + + let formatted_ranges = search::Searcher::format_hoverable_ranges(ranges); + + Ok(json!(formatted_ranges).to_string()) + }) +} + +#[pymodule] +fn code_nav_devon(m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(go_to, m)?)?; + m.add_function(wrap_pyfunction!(text_search, m)?)?; + m.add_function(wrap_pyfunction!(fuzzy_search, m)?)?; + m.add_function(wrap_pyfunction!(get_hoverable_ranges, m)?)?; + Ok(()) +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..67bcb8c --- /dev/null +++ b/src/main.rs @@ -0,0 +1,31 @@ +use anyhow::Result; + + +#[tokio::main] +async fn main() -> Result<()> { + // let root_path = Path::new("/Users/arnav/Desktop/devon/Devon"); + // // println!("{}", root_path.display()); + // let index_path = Path::new("/Users/arnav/Desktop/devon/Devon/index"); + + // let buffer_size_per_thread = 15_000_000; + // let num_threads = 4; + + // let indexes = Indexes::new(&index_path, buffer_size_per_thread, num_threads).await?; + // indexes.index(root_path).await?; + + // // // // // Create a searcher and perform a search + // let searcher = Searcher::new(&index_path)?; + // let result = searcher.token_info("/Users/arnav/Desktop/devon/Devon/devon_agent/agents/default/agent.py", 33, 6, 11); + // match result { + // Ok(token_info) => println!("{}", pyo3_example::search::Searcher::format_token_info(token_info)), + // Err(e) => println!("Error retrieving token info: {}", e), + // } + + // // let result = searcher.text_search("indexes")?; + // // println!("{}", retreival::search::Searcher::format_search_results(result)); + + // // println!("-"); + // // // Print out the results + + Ok(()) +} diff --git a/src/repository.rs b/src/repository.rs new file mode 100644 index 0000000..40f6dd3 --- /dev/null +++ b/src/repository.rs @@ -0,0 +1,14 @@ +use anyhow::{Context, Result}; +use std::path::{Path, PathBuf}; + +#[derive(Debug)] +pub struct Repository { + pub disk_path: PathBuf, +} + +impl Repository { + pub fn from_path(path: &Path) -> Result { + let disk_path = path.canonicalize().context("failed to canonicalize path")?; + Ok(Self { disk_path }) + } +} \ No newline at end of file diff --git a/src/schema.rs b/src/schema.rs new file mode 100644 index 0000000..6e728b5 --- /dev/null +++ b/src/schema.rs @@ -0,0 +1,14 @@ +use tantivy::schema::{Schema, TEXT, STRING, STORED, FAST, BytesOptions, SchemaBuilder}; + +pub fn build_schema() -> Schema { + let mut schema_builder = SchemaBuilder::default(); + schema_builder.add_text_field("path", STRING | FAST | STORED); + schema_builder.add_text_field("content", TEXT | STORED); + schema_builder.add_text_field("content_insensitive", TEXT | STORED); + schema_builder.add_bytes_field("symbol_locations", STORED); + schema_builder.add_bytes_field("line_end_indices", BytesOptions::default().set_stored()); + schema_builder.add_text_field("symbols", TEXT | STORED); + schema_builder.add_text_field("lang", STRING | FAST | STORED); + schema_builder.add_text_field("hash", STRING | FAST | STORED); + schema_builder.build() +} diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 0000000..b413199 --- /dev/null +++ b/src/search.rs @@ -0,0 +1,498 @@ +use std::collections::HashMap; +use std::path::Path; +use tantivy::query::{FuzzyTermQuery, TermQuery, QueryParser}; +use tantivy::schema::Field; +use tantivy::{Index, IndexReader, collector::TopDocs, Term}; +use anyhow::Result; +use serde::{Deserialize, Serialize}; + +use crate::content_document::ContentDocument; +use crate::intelligence::code_navigation::{CodeNavigationContext, FileSymbols, OccurrenceKind, Token}; +use crate::intelligence::TSLanguage; +use crate::schema::build_schema; +use crate::symbol::SymbolLocations; +use crate::text_range::TextRange; + +#[derive(Debug, Serialize, Deserialize)] +pub struct SearchResult { + pub path: String, + pub line_number: usize, + pub column: usize, + pub context: String, +} + +pub struct Searcher { + index: Index, + reader: IndexReader, + path_field: Field, + content_field: Field, + content_insensitive_field: Field, // Added field + line_end_indices_field: Field, + lang_field: Field, // Added lang field + symbol_locations_field: Field, +} + +impl Searcher { + pub fn new(index_path: &Path) -> Result { + let index = Index::open_in_dir(index_path)?; + let reader = index.reader()?; + let schema = build_schema(); + let path_field = schema.get_field("path").unwrap(); + let content_field = schema.get_field("content").unwrap(); + let content_insensitive_field = schema.get_field("content_insensitive").unwrap(); // Added field + let line_end_indices_field = schema.get_field("line_end_indices").unwrap(); + let lang_field = schema.get_field("lang").unwrap(); + let symbol_locations_field = schema.get_field("symbol_locations").unwrap(); + + Ok(Self { + index, + reader, + path_field, + content_field, + content_insensitive_field, + line_end_indices_field, + lang_field, + symbol_locations_field, + }) + } + + pub fn text_search(&self, query_str: &str, case_sensitive: bool) -> Result> { + let searcher = self.reader.searcher(); + + // Choose the appropriate field and query parser based on case sensitivity + let (field, query_str) = if case_sensitive { + (self.content_field, query_str.to_string()) + } else { + (self.content_insensitive_field, query_str.to_lowercase()) + }; + + let query_parser = QueryParser::for_index(&self.index, vec![field]); + let query = query_parser.parse_query(&query_str)?; + let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?; + + let mut results = Vec::new(); + for (_score, doc_address) in top_docs { + let retrieved_doc = searcher.doc(doc_address)?; + + let path = match retrieved_doc.get_first(self.path_field) { + Some(path_field) => path_field.as_text().unwrap().to_string(), + None => { + println!("Debug: Path field is missing"); + continue; + } + }; + + let content = match retrieved_doc.get_first(field) { + Some(field) => field.as_text().unwrap().to_string(), + None => { + println!("Debug: Content field is missing"); + continue; + } + }; + + let new_content = match retrieved_doc.get_first(self.content_field) { + Some(content_field) => content_field.as_text().unwrap().to_string(), + None => { + println!("Debug: Content field is missing"); + continue; + } + }; + + let line_end_indices_field = retrieved_doc.get_first(self.line_end_indices_field); + + let line_end_indices: Vec = match line_end_indices_field { + Some(field) => { + match field.as_bytes() { + Some(bytes) => { + bytes.chunks_exact(4).map(|c| { + u32::from_le_bytes([c[0], c[1], c[2], c[3]]) + }).collect() + } + None => { + println!("Debug: Failed to get bytes"); + continue; + } + } + } + None => { + println!("Debug: Line end indices field is missing"); + continue; + } + }; + + for (mut line_number, window) in line_end_indices.windows(2).enumerate() { + if let [start, end] = *window { + let line = &content[start as usize..end as usize]; + + if line.contains(&query_str) { + line_number += 2; + let column = line.find(&query_str).unwrap(); + let context_start = if line_number >= 3 { line_number - 3 } else { 0 }; + let context_end = usize::min(line_number + 3, line_end_indices.len() - 1); + let context: String = line_end_indices[context_start..=context_end] + .windows(2) + .map(|w| { + let start = w[0] as usize; + let end = w[1] as usize; + &new_content[start..end] + }) + .collect::>() + .join("\n"); + + results.push(SearchResult { + path: path.clone(), + line_number, + column, + context, + }); + } + } + } + } + + Ok(results) + } + + pub fn fuzzy_search(&self, query_str: &str, max_distance: u8) -> Result> { + let searcher = self.reader.searcher(); + + let query = FuzzyTermQuery::new( + Term::from_field_text(self.content_field, query_str), + max_distance, // max edit distance for fuzzy search + true, + ); + + let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?; + + let mut results = Vec::new(); + for (_score, doc_address) in top_docs { + let retrieved_doc = searcher.doc(doc_address)?; + + let path = match retrieved_doc.get_first(self.path_field) { + Some(path_field) => path_field.as_text().unwrap().to_string(), + None => { + println!("Debug: Path field is missing"); + continue; + } + }; + + let content = match retrieved_doc.get_first(self.content_field) { + Some(content_field) => content_field.as_text().unwrap().to_string(), + None => { + println!("Debug: Content field is missing"); + continue; + } + }; + + let line_end_indices_field = retrieved_doc.get_first(self.line_end_indices_field); + + let line_end_indices: Vec = match line_end_indices_field { + Some(field) => { + match field.as_bytes() { + Some(bytes) => { + bytes.chunks_exact(4).map(|c| { + u32::from_le_bytes([c[0], c[1], c[2], c[3]]) + }).collect() + } + None => { + println!("Debug: Failed to get bytes"); + continue; + } + } + } + None => { + println!("Debug: Line end indices field is missing"); + continue; + } + }; + + for (mut line_number, window) in line_end_indices.windows(2).enumerate() { + if let [start, end] = *window { + let line = &content[start as usize..end as usize]; + + if line.contains(query_str) { + line_number += 2; + let column = line.find(query_str).unwrap(); + let context_start = line_number - 2; + let context_end = usize::min(line_number - 1, line_end_indices.len() - 1); + let context: String = line_end_indices[context_start..=context_end] + .windows(2) + .map(|w| { + let start = w[0] as usize; + let end = w[1] as usize; + &content[start..end] + }) + .collect::>() + .join("\n"); + + results.push(SearchResult { + path: path.clone(), + line_number, + column, + context, + }); + } + } + } + } + + Ok(results) + } + + pub fn format_fuzzy_search_results(results: Vec) -> String { + if results.is_empty() { + return "No results found".to_string(); + } + + let mut formatted_results = String::new(); + for result in results { + formatted_results.push_str(&format!( + "File: {}, Line: {}, Column: {}, \nContent:\n{}\n\n", + result.path, result.line_number, result.column, result.context + )); + } + formatted_results + } + + + pub fn format_search_results(results: Vec) -> String { + if results.is_empty() { + return "No results found".to_string(); + } + + let mut formatted_results = String::new(); + for result in results { + formatted_results.push_str(&format!( + "File: {}, Line: {}, Column: {}, \nContent:\n{}\n\n", + result.path, result.line_number, result.column, result.context + )); + } + formatted_results + } + + pub fn load_all_documents(&self, lang: &str) -> Result> { + let searcher = self.reader.searcher(); + + let mut documents = Vec::new(); + for segment_reader in searcher.segment_readers() { + let store_reader = segment_reader.get_store_reader(0)?; + let alive_bitset = segment_reader.alive_bitset(); + + for doc in store_reader.iter(alive_bitset) { + let doc = doc?; + let lang_field_value = doc.get_first(self.lang_field) + .and_then(|f| f.as_text()) + .unwrap_or("").to_lowercase(); + + // println!("{:?} {:?}", lang_field_value, lang); + + if lang_field_value == lang { + let content = doc.get_first(self.content_field) + .and_then(|f| f.as_text()) + .unwrap_or("") + .to_string(); + + let relative_path = doc.get_first(self.path_field) + .and_then(|f| f.as_text()) + .unwrap_or("") + .to_string(); + + let line_end_indices: Vec = doc.get_first(self.line_end_indices_field) + .and_then(|f| f.as_bytes()) + .unwrap_or(&[]) + .chunks_exact(4) + .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]])) + .collect(); + + let symbol_locations: SymbolLocations = doc.get_first(self.symbol_locations_field) + .and_then(|f| f.as_bytes()) + .and_then(|b| bincode::deserialize(b).ok()) + .unwrap_or_default(); + + // println!("{:?}", symbol_locations); + + documents.push(ContentDocument { + content, + lang: Some(lang.to_string()), + relative_path, + line_end_indices, + symbol_locations, + }); + } + } + } + + Ok(documents) + } + + + pub fn line_word_to_byte_range(&self, content: &str, line_end_indices: &[u32], line_number: usize, word_start_index: usize, word_end_index: usize) -> Result<(usize, usize)> { + if line_number == 0 || line_number > line_end_indices.len() { + return Err(anyhow::anyhow!("Invalid line number")); + } + + // Calculate the start and end byte indices for the line + let start_of_line = if line_number == 1 { + 0 + } else { + line_end_indices[line_number - 2] as usize + 1 + }; + + let end_of_line = line_end_indices[line_number - 1] as usize; + + // Extract the line as a &str + let line = &content[start_of_line..end_of_line]; + + // println!("{}", line); + + // Validate word start and end indices + if word_start_index >= word_end_index || word_end_index > line.chars().count() { + return Err(anyhow::anyhow!("Invalid word indices")); + } + + // Find the byte index for the start of the word + let word_start_byte_index = line.chars().take(word_start_index).map(|c| c.len_utf8()).sum::(); + + // Find the byte index for the end of the word + let word_end_byte_index = line.chars().take(word_end_index).map(|c| c.len_utf8()).sum::(); + + let start_byte = start_of_line + word_start_byte_index; + let end_byte = start_of_line + word_end_byte_index; + + println!("{:?}", &content[start_byte..end_byte]); + + Ok((start_byte, end_byte)) + } + + fn detect_language(path: &Path) -> &'static str { + let extension = path.extension().and_then(std::ffi::OsStr::to_str).unwrap_or(""); + TSLanguage::from_extension(extension).unwrap_or("plaintext") + } + + pub fn token_info(&self, relative_path: &str, line: usize, start_index: usize, end_index: usize) -> Result> { + let lang = Self::detect_language(Path::new(relative_path)).to_lowercase(); + + // println!("{}", lang); + + let all_docs = self.load_all_documents(&lang)?; + + // Find the source document based on the provided relative path + let source_document_idx = all_docs.iter().position(|doc| doc.relative_path == relative_path) + .ok_or(anyhow::anyhow!("Source document not found"))?; + + let doc = all_docs.get(source_document_idx).unwrap(); + + // Convert line number and indices to byte range + let (start_byte, end_byte) = Self::line_word_to_byte_range(self, &doc.content, &doc.line_end_indices, line, start_index, end_index)?; + + let token = Token { + relative_path, + start_byte, + end_byte, + }; + + let context = CodeNavigationContext { + token, + all_docs: &all_docs, + source_document_idx, + snipper: None, + }; + + let mut data = context.token_info(); + + // Adjust line numbers by 1 + for file_symbols in &mut data { + for occurrence in &mut file_symbols.data { + occurrence.range.start.line += 1; + occurrence.range.end.line += 1; + } + } + + Ok(data) + } + + // New function to format token info results + pub fn format_token_info(token_info_results: Vec) -> String { + if token_info_results.is_empty() { + return "No results found".to_string(); + } + + let mut formatted_results = String::new(); + for file_symbols in token_info_results { + for occurrence in file_symbols.data { + formatted_results.push_str(&format!( + "Kind: {}, File: {}, Line: {}, Column: {}\nContent:\n{}\n\n", + if let OccurrenceKind::Reference = occurrence.kind {"Reference"} else {"Definition"}, + file_symbols.file, + occurrence.range.start.line, + occurrence.range.start.column, + occurrence.snippet.data, + )); + } + } + formatted_results + } + + pub fn get_hoverable_ranges(&self, relative_path: &str) -> Result> { + let lang = Self::detect_language(Path::new(relative_path)).to_lowercase(); + let all_docs = self.load_all_documents(&lang)?; + + // Find the document based on the provided relative path + let doc = all_docs.iter().find(|doc| doc.relative_path == relative_path) + .ok_or(anyhow::anyhow!("Document not found"))?; + + doc.hoverable_ranges().ok_or(anyhow::anyhow!("Hoverable ranges not found")) + } + + pub fn format_hoverable_ranges(ranges: Vec) -> Vec> { + let mut formatted_ranges = Vec::new(); + for range in ranges { + let mut range_map = HashMap::new(); + range_map.insert("start_line".to_string(), range.start.line as u32); + range_map.insert("start_column".to_string(), range.start.column as u32); + range_map.insert("end_line".to_string(), range.end.line as u32); + range_map.insert("end_column".to_string(), range.end.column as u32); + formatted_ranges.push(range_map); + } + formatted_ranges + } +} + +#[cfg(test)] +mod tests { + use crate::Indexes; + + use super::*; + + #[tokio::test] + async fn test_searcher_with_test_files() -> Result<()> { + let root_path = Path::new("./test_files"); + let index_path = Path::new("./test_files/index"); + + // Clean up the index directory if it exists + if index_path.exists() { + std::fs::remove_dir_all(index_path)?; + } + + // Create indexes + let buffer_size_per_thread = 60_000_000; + let num_threads = 4; + + let indexes = Indexes::new(index_path, buffer_size_per_thread, num_threads).await?; + indexes.index(root_path).await?; + + // Create a searcher and perform a search + let searcher = Searcher::new(index_path)?; + let result = searcher.text_search("indexes", true)?; + + // Print out the results (or you can write assertions here) + for res in result { + println!( + "File: {}, Line: {}, Column: {}, Context: {}", + res.path, res.line_number, res.column, res.context + ); + } + + Ok(()) + } +} \ No newline at end of file diff --git a/src/snippet.rs b/src/snippet.rs new file mode 100644 index 0000000..9abfe25 --- /dev/null +++ b/src/snippet.rs @@ -0,0 +1,606 @@ +use anyhow::Result; +use regex::{Regex, RegexBuilder}; +use serde::Serialize; +use smallvec::{smallvec, SmallVec}; + +use crate::{content_document::ContentDocument, symbol::Symbol}; +use std::ops::Range; + +#[derive(Serialize, Debug, PartialEq, Eq)] +pub struct SnippedFile { + pub relative_path: String, + pub lang: Option, + pub snippets: Vec, +} + +#[derive(Serialize, Debug, PartialEq, Eq)] +pub struct Snippet { + pub data: String, + pub highlights: Vec>, + pub symbols: Vec, + pub line_range: Range, +} + +/// A marker indicating a subset of some source text, with a list of highlighted ranges. +/// +/// This doesn't store the actual text data itself, just the position information for simplified +/// merging. +#[derive(Serialize, Debug, PartialEq, Eq)] +pub struct Location { + /// The subset's byte range in the original input string. + pub byte_range: Range, + + /// The subset's line range in the original input string. + pub line_range: Range, + + /// A set of byte ranges denoting highlighted text indices, on the subset string. + pub highlights: SmallVec<[Range; 2]>, +} + +impl Location { + // This is not a real error type, it communicates that the argument was not consumed. + #[allow(clippy::result_large_err)] + fn join(&mut self, rhs: Self) -> Result<(), Self> { + // Override empty snippets. + if self.highlights.is_empty() { + *self = rhs; + return Ok(()); + } + + // Fail if the locations don't overlap. + if self.line_range.end < rhs.line_range.start { + return Err(rhs); + } + + let offset = rhs.byte_range.start - self.byte_range.start; + self.line_range.end = rhs.line_range.end; + self.byte_range.end = rhs.byte_range.end; + self.highlights + .extend(rhs.highlights.into_iter().map(|mut h| { + h.start += offset; + h.end += offset; + h + })); + + Ok(()) + } + + /// Reify this `Location` into a `Snippet`, given the source string and symbols list. + pub fn reify(self, s: &str, symbols: &[Symbol]) -> Snippet { + Snippet { + data: s[self.byte_range.clone()].to_owned(), + line_range: self.line_range.clone(), + highlights: self.highlights.into_vec(), + symbols: symbols + .iter() + .filter(|s| { + s.range.start.line >= self.line_range.start + && s.range.end.line <= self.line_range.end + }) + .cloned() + .map(|mut sym| { + sym.range.start.byte -= self.byte_range.start; + sym.range.end.byte -= self.byte_range.start; + sym + }) + .collect(), + } + } + + pub fn line_count(&self) -> usize { + self.line_range.end - self.line_range.start + } +} + +impl SnippedFile { + pub fn merge(mut self, rhs: Self) -> Self { + self.snippets.extend(rhs.snippets); + Self { + snippets: self.snippets, + ..rhs + } + } +} + +#[derive(Copy, Clone, Debug)] +pub struct Snipper { + pub context_before: usize, + pub context_after: usize, + pub find_symbols: bool, + pub case_sensitive: bool, +} + +impl Default for Snipper { + fn default() -> Self { + Self { + context_before: 0, + context_after: 0, + find_symbols: false, + case_sensitive: true, + } + } +} + +impl Snipper { + pub fn context(mut self, before: usize, after: usize) -> Self { + self.context_before = before; + self.context_after = after; + self + } + + pub fn find_symbols(mut self, find_symbols: bool) -> Self { + self.find_symbols = find_symbols; + self + } + + pub fn case_sensitive(mut self, case_sensitive: bool) -> Self { + self.case_sensitive = case_sensitive; + self + } + + pub fn all_for_doc( + &self, + regex: &str, + doc: &ContentDocument, + ) -> Result> { + let query = RegexBuilder::new(regex) + .multi_line(true) + .case_insensitive(!self.case_sensitive) + .build()?; + + let snippets = if self.find_symbols { + // a symbol search should perform an intersection of + // search results with the symbol list present in a document. + // + let mut symbols = doc.symbol_locations.list(); + let symbol_ranges = symbols + .iter() + .map(|sym| sym.range.into()) + .collect::>>(); + + // limit highlights to only symbols + // + // for a search query of `symbol:n` on this text: + // + // const cool_beans = beans(); + // + // only the `n` from `cool_beans` should be highlighted, if + // `cool_beans` is the only symbol in the document: + // + // const cool_beans = beans(); + // ^-- expected + // + // const cool_beans = beans(); + // ^ ^ ^-- incorrect + // + let highlights = query + .find_iter(&doc.content) + .map(|m| m.range()) + .filter(|hl_range| { + symbol_ranges.iter().any(|sym_range| { + hl_range.start >= sym_range.start && hl_range.end <= sym_range.end + }) + }) + .collect::>>(); + + // limit symbols to only those in our highlight list + // + // for a search query of `symbol:loud` on this text: + // + // const (loud, clear) = audio(); + // + // the symbols returned should be just `loud`, even though `clear` + // is also a symbol present in the same snippet. + symbols.retain(|sym_range| { + highlights.iter().any(|hl_range| { + hl_range.start >= sym_range.range.start.byte + && hl_range.end <= sym_range.range.end.byte + }) + }); + + self.expand_many(highlights.into_iter(), &doc.content, &doc.line_end_indices) + .map(|loc| loc.reify(&doc.content, &symbols)) + .collect::>() + } else { + let highlights = query.find_iter(&doc.content).map(|m| m.range()); + self.expand_many(highlights.into_iter(), &doc.content, &doc.line_end_indices) + .map(|loc| loc.reify(&doc.content, &[])) + .collect::>() + }; + + Ok(if snippets.is_empty() { + None + } else { + Some(SnippedFile { + relative_path: doc.relative_path.clone(), + lang: doc.lang.clone(), + snippets, + }) + }) + } + + fn expand_many<'a>( + &'a self, + mut highlights: impl Iterator> + 'a, + text: &'a str, + line_ends: &'a [u32], + ) -> impl Iterator + 'a { + // We store the "next" location here, in case we run into an early split down below due to 2 + // locations not joining together. + let mut next = None; + std::iter::from_fn(move || { + let mut loc = next.take().unwrap_or(Location { + byte_range: 0..0, + line_range: 0..0, + highlights: SmallVec::new(), + }); + + for highlight in &mut highlights { + let next_loc = self.expand(highlight, text, line_ends); + if let Err(next_loc) = loc.join(next_loc) { + next = Some(next_loc); + break; + } + } + + if !loc.highlights.is_empty() { + Some(loc) + } else { + None + } + }) + } + + pub fn expand<'a>( + &'a self, + highlight: Range, + text: &'a str, + line_ends: &'a [u32], + ) -> Location { + let start = text[..highlight.start] + .rmatch_indices('\n') + .nth(self.context_before) + .map(|(i, _)| i + 1) + .unwrap_or(0); + + let end = text[highlight.end..] + .match_indices('\n') + .nth(self.context_after) + .map(|(i, _)| i + highlight.end) + .unwrap_or(text.len()); + + let line_end = line_ends + .iter() + .position(|i| end <= *i as usize) + .unwrap_or(line_ends.len()); + + let line_start = line_ends + .iter() + .rev() + .position(|i| (*i as usize) < start) + .map(|i| line_ends.len() - i) + .unwrap_or(0); + + Location { + byte_range: start..end, + line_range: line_start..line_end, + highlights: smallvec![(highlight.start - start)..(highlight.end - start)], + } + } +} + +#[derive(Serialize)] +pub struct HighlightedString { + pub text: String, + + /// Index ranges that are highlighted as matched. + pub highlights: SmallVec<[Range; 2]>, +} + +impl HighlightedString { + /// Create a new highlighted string with no highlights. + pub fn new>(text: T) -> Self { + Self { + text: text.into(), + highlights: Default::default(), + } + } + + /// Apply a regex to this string, recording the match ranges, if any. + pub fn apply_regex(&mut self, regex: &Regex) { + self.highlights + .extend(regex.find_iter(&self.text).map(|m| m.range())); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + use regex::Regex; + + impl Snippet { + fn line_count(&self) -> usize { + self.line_range.end - self.line_range.start + } + } + + /// Test helper to ensure a string is newline-terminated, and also return an array of newline + /// indices. + fn with_line_ends(s: &str) -> (&str, Vec) { + assert!(s.ends_with('\n')); + let line_ends = s + .match_indices('\n') + .map(|(i, _)| i as u32) + .collect::>(); + (s, line_ends) + } + + #[test] + fn simple_snip() { + let (text, line_ends) = with_line_ends("foobar\n"); + let highlight = 0..3; + + let snipper = Snipper::default(); + + assert_eq!( + snipper.expand(highlight, text, &line_ends).reify(text, &[]), + Snippet { + data: "foobar".into(), + line_range: 0..0, + highlights: vec![0..3], + symbols: vec![], + } + ); + } + + #[test] + fn empty_lines() { + let (text, line_ends) = with_line_ends("\n\nfoo\nbar\nquux\n\n\n\n\n"); + let highlight = 6..9; + let snipper = Snipper::default().context(1, 1); + + assert_eq!( + snipper.expand(highlight, text, &line_ends).reify(text, &[]), + Snippet { + data: "foo\nbar\nquux".into(), + line_range: 2..4, + highlights: vec![4..7], + symbols: vec![], + } + ); + } + + #[test] + fn crlf_line_ends() { + let (text, line_ends) = with_line_ends("foo\r\nbar\r\nquux\n"); + let highlight = 5..8; + let snipper = Snipper::default().context(1, 1); + + assert_eq!( + snipper.expand(highlight, text, &line_ends).reify(text, &[]), + Snippet { + data: "foo\r\nbar\r\nquux".into(), + line_range: 0..2, + highlights: vec![5..8], + symbols: vec![], + } + ); + } + + #[test] + fn mixed_line_ends() { + let (text, line_ends) = with_line_ends("foo\nbar\r\nquux\n"); + let highlight = 4..7; + let snipper = Snipper::default().context(1, 1); + + assert_eq!( + snipper.expand(highlight, text, &line_ends).reify(text, &[]), + Snippet { + data: "foo\nbar\r\nquux".to_owned(), + line_range: 0..2, + highlights: vec![4..7], + symbols: vec![], + } + ); + } + + #[test] + fn context_before() { + let (text, line_ends) = with_line_ends("a\nfoo\nbar\nquux\nz\n"); + let highlight = 6..9; + let snipper = Snipper::default().context(1, 0); + + assert_eq!( + snipper.expand(highlight, text, &line_ends).reify(text, &[]), + Snippet { + data: "foo\nbar".to_owned(), + line_range: 1..2, + highlights: vec![4..7], + symbols: vec![], + } + ); + } + + /// Check that `context_before` being larger than the available line count works. + #[test] + fn context_before_underflow() { + let (text, line_ends) = with_line_ends("bar\nquux\nz\n"); + let highlight = 0..3; + let snipper = Snipper::default().context(1, 0); + + assert_eq!( + snipper.expand(highlight, text, &line_ends).reify(text, &[]), + Snippet { + data: "bar".to_owned(), + line_range: 0..0, + highlights: vec![0..3], + symbols: vec![], + } + ); + } + + #[test] + fn context_after() { + let (text, line_ends) = with_line_ends("a\nfoo\nbar\nquux\nz\n"); + let highlight = 6..9; + let snipper = Snipper::default().context(0, 1); + + assert_eq!( + snipper.expand(highlight, text, &line_ends).reify(text, &[]), + Snippet { + data: "bar\nquux".to_owned(), + line_range: 2..3, + highlights: vec![0..3], + symbols: vec![], + } + ); + } + + /// Check that `context_after` being larger than the available line count works. + #[test] + fn context_after_overflow() { + let (text, line_ends) = with_line_ends("a\nfoo\nbar\n"); + let highlight = 6..9; + let snipper = Snipper::default().context(0, 1); + + assert_eq!( + snipper.expand(highlight, text, &line_ends).reify(text, &[]), + Snippet { + data: "bar\n".to_owned(), + line_range: 2..3, + highlights: vec![0..3], + symbols: vec![], + } + ); + } + + #[test] + fn merge_into_one() { + let text = &[ + r#"pub const SLICE_FROM_RAW_PARTS: [&str; 4] = ["core", "slice", "raw", "from_raw_parts"];"#, + r#"pub const SLICE_FROM_RAW_PARTS_MUT: [&str; 4] = ["core", "slice", "raw", "from_raw_parts_mut"];"#, + r#"pub const SLICE_GET: [&str; 4] = ["core", "slice", "", "get"];"#, + r#"pub const SLICE_INTO_VEC: [&str; 4] = ["alloc", "slice", "", "into_vec"];"#, + r#"pub const SLICE_INTO: [&str; 4] = ["core", "slice", "", "iter"];"#, + r#"pub const SLICE_ITER: [&str; 4] = ["core", "slice", "iter", "Iter"];"#, + "" + ] + .join("\n"); + + let (text, line_ends) = with_line_ends(text); + let regex = Regex::new("SLICE").unwrap(); + let highlights = regex.find_iter(text).map(|m| m.range()); + + let snipper = Snipper::default().context(1, 1); + let observed = snipper + .expand_many(highlights, text, &line_ends) + .collect::>(); + + assert_eq!(observed.len(), 1); + assert_eq!(observed[0].line_count(), 6); + } + + #[test] + fn merge_into_two() { + let text = &[ + r#"pub const SLICE_FROM_RAW_PARTS: [&str; 4] = ["core", "slice", "raw", "from_raw_parts"];"#, + r#"pub const SLICE_FROM_RAW_PARTS_MUT: [&str; 4] = ["core", "slice", "raw", "from_raw_parts_mut"];"#, + r#"pub const GET: [&str; 4] = ["core", "slice", "", "get"];"#, + r#"pub const VEC: [&str; 4] = ["alloc", "slice", "", "into_vec"];"#, + r#"pub const INTO: [&str; 4] = ["core", "slice", "", "iter"];"#, + r#"pub const SLICE_ITER: [&str; 4] = ["core", "slice", "iter", "Iter"];"#, + "", + ] + .join("\n"); + + let (text, line_ends) = with_line_ends(text); + let regex = Regex::new("SLICE").unwrap(); + let highlights = regex.find_iter(text).map(|m| m.range()); + + let observed = Snipper::default() + .context(1, 1) + .expand_many(highlights, text, &line_ends) + .map(|l| l.reify(text, &[])) + .collect::>(); + + assert_eq!(observed.len(), 2); + assert_eq!( + observed[0].data, + [r#"pub const SLICE_FROM_RAW_PARTS: [&str; 4] = ["core", "slice", "raw", "from_raw_parts"];"#, + r#"pub const SLICE_FROM_RAW_PARTS_MUT: [&str; 4] = ["core", "slice", "raw", "from_raw_parts_mut"];"#, + r#"pub const GET: [&str; 4] = ["core", "slice", "", "get"];"#].join("\n") + ); + assert_eq!(observed[0].line_count(), 2); + assert_eq!( + observed[1].data, + [ + r#"pub const INTO: [&str; 4] = ["core", "slice", "", "iter"];"#, + r#"pub const SLICE_ITER: [&str; 4] = ["core", "slice", "iter", "Iter"];"#, + "" + ] + .join("\n") + ); + assert_eq!(observed[1].line_count(), 2); + } + + #[test] + fn multiline() { + let (text, line_ends) = with_line_ends( + "pub const SLICE_FROM_RAW_PARTS\n\ + pub const SLICE_FROM_RAW_PARTS_MUT\n\ + pub const VEC\n\ + pub const INTO\n", + ); + + let regex = Regex::new("SLICE").unwrap(); + let highlights = regex.find_iter(text).map(|m| m.range()); + + let observed = Snipper::default() + .context(1, 1) + .expand_many(highlights, text, &line_ends) + .map(|l| l.reify(text, &[])) + .collect::>(); + + assert_eq!(observed.len(), 1); + assert_eq!( + observed[0].data, + "pub const SLICE_FROM_RAW_PARTS\npub const SLICE_FROM_RAW_PARTS_MUT\npub const VEC", + ); + assert_eq!(observed[0].line_count(), 2); + } + + #[test] + fn non_ascii() { + let (text, line_ends) = with_line_ends("pub ようこそ SLICE_FROM_RAW_PARTS\npub ようこそ SLICE_FROM_RAW_ようこそ_MUT\npub const VECようこそ\npub ようこそ INTO\n"); + let regex = Regex::new("SLICE").unwrap(); + let highlights = regex.find_iter(text).map(|m| m.range()); + + let observed = Snipper::default() + .context(1, 1) + .expand_many(highlights, text, &line_ends) + .map(|l| l.reify(text, &[])) + .collect::>(); + + assert_eq!(observed.len(), 1); + assert_eq!(observed[0].line_count(), 2); + assert_eq!( + observed[0].data, + "pub ようこそ SLICE_FROM_RAW_PARTS\npub ようこそ SLICE_FROM_RAW_ようこそ_MUT\npub const VECようこそ" + ) + } + + + + #[test] + fn test_highlighted_string() { + let mut s = HighlightedString::new("foo bar quux"); + + s.apply_regex(&Regex::new("foo").unwrap()); + s.apply_regex(&Regex::new("b.r.").unwrap()); + s.apply_regex(&Regex::new("ux$").unwrap()); + + assert_eq!(s.text, "foo bar quux"); + assert_eq!(s.highlights.to_vec(), &[0..3, 4..8, 10..12]); + } +} diff --git a/src/symbol.rs b/src/symbol.rs new file mode 100644 index 0000000..1a7902b --- /dev/null +++ b/src/symbol.rs @@ -0,0 +1,37 @@ +use crate::{intelligence::ScopeGraph, text_range::TextRange}; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Symbol { + pub kind: String, + pub range: TextRange, +} + +/// Collection of symbol locations for *single* file +#[derive(Default, Debug, Clone, Deserialize, Serialize)] +#[non_exhaustive] +pub enum SymbolLocations { + /// tree-sitter powered symbol-locations (and more!) + TreeSitter(ScopeGraph), + + /// no symbol-locations for this file + #[default] + Empty, +} + +impl SymbolLocations { + pub fn list(&self) -> Vec { + match self { + Self::TreeSitter(graph) => graph.symbols(), + Self::Empty => Vec::new(), + } + } + + pub fn scope_graph(&self) -> Option<&ScopeGraph> { + match self { + Self::TreeSitter(graph) => Some(graph), + Self::Empty => None, + } + } +} diff --git a/src/sync_handle.rs b/src/sync_handle.rs new file mode 100644 index 0000000..2bca581 --- /dev/null +++ b/src/sync_handle.rs @@ -0,0 +1,2 @@ +#[derive(Default)] +pub struct SyncHandle; diff --git a/src/text_range.rs b/src/text_range.rs new file mode 100644 index 0000000..5b1ec67 --- /dev/null +++ b/src/text_range.rs @@ -0,0 +1,121 @@ +use std::cmp::{Ord, Ordering}; + +use serde::{Deserialize, Serialize}; + +/// A singular position in a text document +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct Point { + /// The byte index + pub byte: usize, + + /// 0-indexed line number + pub line: usize, + + /// Position within the line + pub column: usize, +} + +impl PartialOrd for Point { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Point { + fn cmp(&self, other: &Self) -> Ordering { + self.byte.cmp(&other.byte) + } +} + +impl Point { + pub fn new(byte: usize, line: usize, column: usize) -> Self { + Self { byte, line, column } + } + + pub fn from_byte(byte: usize, line_end_indices: &[u32]) -> Self { + let line = line_end_indices + .iter() + .position(|&line_end_byte| (line_end_byte as usize) > byte) + .unwrap_or(0); + + let column = line + .checked_sub(1) + .and_then(|idx| line_end_indices.get(idx)) + .map(|&prev_line_end| byte.saturating_sub(prev_line_end as usize)) + .unwrap_or(byte); + + Self::new(byte, line, column) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct TextRange { + pub start: Point, + pub end: Point, +} + +impl PartialOrd for TextRange { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for TextRange { + fn cmp(&self, other: &Self) -> Ordering { + let compare_start_byte = self.start.byte.cmp(&other.start.byte); + let compare_size = self.size().cmp(&other.size()); + + compare_start_byte.then(compare_size) + } +} + +impl TextRange { + pub fn new(start: Point, end: Point) -> Self { + assert!(start <= end); + Self { start, end } + } + + pub fn contains(&self, other: &TextRange) -> bool { + // (self.start ... [other.start ... other.end] ... self.end) + self.start <= other.start && other.end <= self.end + } + + #[allow(unused)] + pub fn contains_strict(&self, other: TextRange) -> bool { + // (self.start ... (other.start ... other.end) ... self.end) + self.start < other.start && other.end <= self.end + } + + pub fn size(&self) -> usize { + self.end.byte.saturating_sub(self.start.byte) + } + + pub fn from_byte_range(range: std::ops::Range, line_end_indices: &[u32]) -> Self { + let start = Point::from_byte(range.start, line_end_indices); + let end = Point::from_byte(range.end, line_end_indices); + Self::new(start, end) + } +} + +impl From for TextRange { + fn from(r: tree_sitter::Range) -> Self { + Self { + start: Point { + byte: r.start_byte, + line: r.start_point.row, + column: r.start_point.column, + }, + end: Point { + byte: r.end_byte, + line: r.end_point.row, + column: r.end_point.column, + }, + } + } +} + +impl From for std::ops::Range { + fn from(r: TextRange) -> std::ops::Range { + r.start.byte..r.end.byte + } +}