From 41c51946931001a4ae9a88e63f9db730a7a75bb9 Mon Sep 17 00:00:00 2001 From: "Tamir K." <121674805+Tamir-K@users.noreply.github.com> Date: Sun, 6 Oct 2024 09:46:16 +0300 Subject: [PATCH 01/21] Fix/corrupted file architecture key error (#2444) * Add try except clause --- CHANGELOG.md | 2 ++ capa/features/extractors/pefile.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9860292f..97f0bfc20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ ### Bug Fixes +- extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K + ### capa Explorer Web ### capa Explorer IDA Pro plugin diff --git a/capa/features/extractors/pefile.py b/capa/features/extractors/pefile.py index 1ae56378c..1dd478adf 100644 --- a/capa/features/extractors/pefile.py +++ b/capa/features/extractors/pefile.py @@ -130,7 +130,13 @@ def extract_file_arch(pe, **kwargs): elif pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_AMD64"]: yield Arch(ARCH_AMD64), NO_ADDRESS else: - logger.warning("unsupported architecture: %s", pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine]) + try: + logger.warning( + "unsupported architecture: %s", + pefile.MACHINE_TYPE[pe.FILE_HEADER.Machine], + ) + except KeyError: + logger.warning("unknown architecture: %s", pe.FILE_HEADER.Machine) def extract_file_features(pe, buf): From c7bb8b8e6787a55970f703458e322bdc330b5956 Mon Sep 17 00:00:00 2001 From: Moritz Date: Mon, 7 Oct 2024 11:46:37 +0200 Subject: [PATCH 02/21] Update Node checkout Actions (#2446) * Update setup Node Actions --- .github/workflows/web-deploy.yml | 2 +- .github/workflows/web-tests.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/web-deploy.yml b/.github/workflows/web-deploy.yml index fc103c933..518230f2e 100644 --- a/.github/workflows/web-deploy.yml +++ b/.github/workflows/web-deploy.yml @@ -43,7 +43,7 @@ jobs: fetch-depth: 1 show-progress: true - name: Set up Node - uses: actions/setup-node@v4 + uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 with: node-version: 20 cache: 'npm' diff --git a/.github/workflows/web-tests.yml b/.github/workflows/web-tests.yml index 2977656df..8a8a63312 100644 --- a/.github/workflows/web-tests.yml +++ b/.github/workflows/web-tests.yml @@ -19,7 +19,7 @@ jobs: show-progress: true - name: Set up Node - uses: actions/setup-node@v3 + uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 with: node-version: 20 cache: 'npm' From a4fdb0a3efd3fa35267593b20396d05623b01b2f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 14:07:10 +0000 Subject: [PATCH 03/21] build(deps): bump rich from 13.8.0 to 13.9.2 Bumps [rich](https://github.com/Textualize/rich) from 13.8.0 to 13.9.2. - [Release notes](https://github.com/Textualize/rich/releases) - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) - [Commits](https://github.com/Textualize/rich/compare/v13.8.0...v13.9.2) --- updated-dependencies: - dependency-name: rich dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index eda886405..7d7f10cca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,7 +36,7 @@ pyelftools==0.31 pygments==2.18.0 python-flirt==0.8.10 pyyaml==6.0.2 -rich==13.8.0 +rich==13.9.2 ruamel-yaml==0.18.6 ruamel-yaml-clib==0.2.8 setuptools==75.1.0 From 22368fbe6f4d79479bfff6614f10ad3cc6e16848 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 9 Oct 2024 12:13:11 +0000 Subject: [PATCH 04/21] rename bin_search function --- capa/features/extractors/ida/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index a40ca3fda..fc22bc38d 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -41,7 +41,7 @@ def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]: return while True: - ea, _ = ida_bytes.bin_search3(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD) + ea, _ = ida_bytes.bin_search(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD) if ea == idaapi.BADADDR: break start = ea + 1 From f2c329b7683c5beba4e5dbf1585eb7aca8ec11ea Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 9 Oct 2024 12:15:38 +0000 Subject: [PATCH 05/21] rename ida to idapro module for IDA 9.0 --- CHANGELOG.md | 1 + capa/features/extractors/ida/idalib.py | 12 ++++++++---- capa/loader.py | 4 ++-- pyproject.toml | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97f0bfc20..f8368d30a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ ### Bug Fixes - extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K +- IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz ### capa Explorer Web diff --git a/capa/features/extractors/ida/idalib.py b/capa/features/extractors/ida/idalib.py index df1e3172e..f0627971a 100644 --- a/capa/features/extractors/ida/idalib.py +++ b/capa/features/extractors/ida/idalib.py @@ -18,7 +18,7 @@ def is_idalib_installed() -> bool: try: - return importlib.util.find_spec("ida") is not None + return importlib.util.find_spec("idapro") is not None except ModuleNotFoundError: return False @@ -44,6 +44,7 @@ def get_idalib_user_config_path() -> Optional[Path]: def find_idalib() -> Optional[Path]: config_path = get_idalib_user_config_path() if not config_path: + logger.error("IDA Pro user configuration does not exist, please make sure you've installed idalib properly.") return None config = json.loads(config_path.read_text(encoding="utf-8")) @@ -51,6 +52,9 @@ def find_idalib() -> Optional[Path]: try: ida_install_dir = Path(config["Paths"]["ida-install-dir"]) except KeyError: + logger.error( + "IDA Pro user configuration does not contain location of IDA Pro installation, please make sure you've installed idalib properly." + ) return None if not ida_install_dir.exists(): @@ -73,7 +77,7 @@ def find_idalib() -> Optional[Path]: if not idalib_path.exists(): return None - if not (idalib_path / "ida" / "__init__.py").is_file(): + if not (idalib_path / "idapro" / "__init__.py").is_file(): return None return idalib_path @@ -96,7 +100,7 @@ def has_idalib() -> bool: def load_idalib() -> bool: try: - import ida + import idapro return True except ImportError: @@ -106,7 +110,7 @@ def load_idalib() -> bool: sys.path.append(idalib_path.absolute().as_posix()) try: - import ida # noqa: F401 unused import + import idapro # noqa: F401 unused import return True except ImportError: diff --git a/capa/loader.py b/capa/loader.py index c4c8c1afa..f481d7b8d 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -323,7 +323,7 @@ def get_extractor( if not idalib.load_idalib(): raise RuntimeError("failed to load IDA idalib module.") - import ida + import idapro import ida_auto import capa.features.extractors.ida.extractor @@ -333,7 +333,7 @@ def get_extractor( # so as not to screw up structured output. with capa.helpers.stdout_redirector(io.BytesIO()): with console.status("analyzing program...", spinner="dots"): - if ida.open_database(str(input_path), run_auto_analysis=True): + if idapro.open_database(str(input_path), run_auto_analysis=True): raise RuntimeError("failed to analyze input file") logger.debug("idalib: waiting for analysis...") diff --git a/pyproject.toml b/pyproject.toml index d3a5481a3..3416c3a9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -177,7 +177,7 @@ known_first_party = [ "binaryninja", "flirt", "ghidra", - "ida", + "idapro", "ida_ida", "ida_auto", "ida_bytes", From ca7073ce873059d90e15170a61c4143f6c6db72a Mon Sep 17 00:00:00 2001 From: Soufiane Fariss Date: Wed, 9 Oct 2024 17:20:50 +0200 Subject: [PATCH 06/21] ci: add web releases workflow --- .github/workflows/web-release.yml | 67 +++++++++++++++++++++++++++++++ .github/workflows/web-tests.yml | 13 +++--- web/explorer/.gitignore | 3 ++ 3 files changed, 77 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/web-release.yml diff --git a/.github/workflows/web-release.yml b/.github/workflows/web-release.yml new file mode 100644 index 000000000..a0156657f --- /dev/null +++ b/.github/workflows/web-release.yml @@ -0,0 +1,67 @@ +name: create capa Explorer Web release + +on: + workflow_dispatch: + inputs: + version: + description: 'Version number for the release (x.x.x)' + required: true + type: string + +jobs: + run-tests: + uses: ./.github/workflows/web-tests.yml + + build-and-release: + needs: run-tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set release name + run: echo "RELEASE_NAME=capa-explorer-web-${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV + + - name: Check if release already exists + run: | + if ls web/explorer/releases/capa-explorer-web-${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then + echo "A release with version ${{ github.event.inputs.version }} already exists" + exit 1 + fi + + - name: Set up Node.js + uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 + with: + node-version: 20 + cache: 'npm' + cache-dependency-path: 'web/explorer/package-lock.json' + + - name: Install dependencies + run: npm ci + working-directory: web/explorer + + - name: Build offline bundle + run: npm run build:bundle + working-directory: web/explorer + + - name: Compress bundle + run: zip -r ${{ env.RELEASE_NAME }}.zip capa-explorer-web + working-directory: web/explorer + + - name: Create releases directory + run: mkdir -vp web/explorer/releases + + - name: Move archive release to releases folder + run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases + + - name: Commit and push release + run: | + git config --local user.email "capa-dev@mandiant.com" + git config --local user.name "Capa Bot" + git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip + git commit -m "explorer web: add release ${{ env.RELEASE_NAME }}" + git push + + - name: Remove older releases + # keep only the latest 3 releases + run: ls -t capa-explorer-web-*.zip | tail -n +4 | xargs -r rm -- + working-directory: web/explorer/releases diff --git a/.github/workflows/web-tests.yml b/.github/workflows/web-tests.yml index 8a8a63312..22016a3a5 100644 --- a/.github/workflows/web-tests.yml +++ b/.github/workflows/web-tests.yml @@ -1,10 +1,11 @@ -name: Capa Explorer Web tests +name: capa Explorer Web tests on: pull_request: branches: [ master ] paths: - 'web/explorer/**' + workflow_call: # this allows the workflow to be called by other workflows jobs: test: @@ -23,20 +24,20 @@ jobs: with: node-version: 20 cache: 'npm' - cache-dependency-path: './web/explorer/package-lock.json' + cache-dependency-path: 'web/explorer/package-lock.json' - name: Install dependencies run: npm ci - working-directory: ./web/explorer + working-directory: web/explorer - name: Lint run: npm run lint - working-directory: ./web/explorer + working-directory: web/explorer - name: Format run: npm run format:check - working-directory: ./web/explorer + working-directory: web/explorer - name: Run unit tests run: npm run test - working-directory: ./web/explorer + working-directory: web/explorer diff --git a/web/explorer/.gitignore b/web/explorer/.gitignore index 064b5fed8..c734baff7 100644 --- a/web/explorer/.gitignore +++ b/web/explorer/.gitignore @@ -7,6 +7,9 @@ yarn-error.log* pnpm-debug.log* lerna-debug.log* +# capa Explorer Web Releases +releases/ + # Dependencies, build results, and other generated files node_modules .DS_Store From 7be6fe6ae1d71405730c5d12c47d14df4c6eb4af Mon Sep 17 00:00:00 2001 From: Soufiane Fariss Date: Wed, 9 Oct 2024 18:15:56 +0200 Subject: [PATCH 07/21] changelog: add web releases workflow --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97f0bfc20..b8e32a9a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -178,6 +178,7 @@ Special thanks to our repeat and new contributors: - CI: update Binary Ninja version to 4.1 and use Python 3.9 to test it #2211 @xusheng6 - CI: update tests.yml workflow to exclude web and documentation files #2263 @s-ff - CI: update build.yml workflow to exclude web and documentation files #2270 @s-ff +- CI: add web releases workflow #2455 @s-ff ### Raw diffs From 6447319cc7f6a61bc3cc928b6f980a5b7c3069c2 Mon Sep 17 00:00:00 2001 From: Fariss Date: Thu, 10 Oct 2024 13:40:04 +0200 Subject: [PATCH 08/21] explorer web: wrap long function calls (#2447) Co-authored-by: Moritz --- web/explorer/src/components/RuleMatchesTable.vue | 2 +- .../src/components/columns/RuleColumn.vue | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/web/explorer/src/components/RuleMatchesTable.vue b/web/explorer/src/components/RuleMatchesTable.vue index fe891901f..3340ef875 100644 --- a/web/explorer/src/components/RuleMatchesTable.vue +++ b/web/explorer/src/components/RuleMatchesTable.vue @@ -160,7 +160,7 @@ - + diff --git a/web/explorer/src/components/columns/RuleColumn.vue b/web/explorer/src/components/columns/RuleColumn.vue index 2a23a2749..7afc18e3b 100644 --- a/web/explorer/src/components/columns/RuleColumn.vue +++ b/web/explorer/src/components/columns/RuleColumn.vue @@ -55,7 +55,12 @@ @@ -83,3 +88,11 @@ const getTooltipContent = (data) => { return null; }; + + From 688afab087600e4204ef156a0279e7c0f5c33f98 Mon Sep 17 00:00:00 2001 From: Moritz Date: Fri, 11 Oct 2024 12:34:18 +0200 Subject: [PATCH 09/21] add v7.4.0 info --- web/public/index.html | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/web/public/index.html b/web/public/index.html index 57bbed05e..f77ab434f 100644 --- a/web/public/index.html +++ b/web/public/index.html @@ -215,6 +215,12 @@

Rule Updates

Tool Updates

+

v7.4.0 (2024-10-04)

+

+ The v7.4.0 capa release fixes a bug when processing VMRay analysis archives and enhances API extraction for all dynamic backends. For better terminal rendering capa now solely relies on the rich library.
+ The standalone capa executable can now automatically detect installations of relevant third party applications and use their backends (notably, idalib and Binary Ninja). For the extra standalone Linux build we've upgraded from Python 3.11 to 3.12. +

+

v7.3.0 (2024-09-20)

The capa v7.3.0 release comes with the following three major enhancements: From bc91171c654d8d3e5f886a3e67496984a438a116 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Fri, 11 Oct 2024 15:08:01 +0000 Subject: [PATCH 10/21] fix bug preventing save of capa results --- CHANGELOG.md | 2 ++ capa/ida/plugin/form.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8368d30a..ab5d9ca24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ ### capa Explorer IDA Pro plugin +- fix bug preventing saving of capa results via Save button @mr-tz + ### Development ### Raw diffs diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 0aee6cea2..028ce2078 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -1309,10 +1309,17 @@ def save_program_analysis(self): s = self.resdoc_cache.model_dump_json().encode("utf-8") - path = Path(self.ask_user_capa_json_file()) - if not path.exists(): + path = self.ask_user_capa_json_file() + if not path: + # dialog canceled + return + + path = Path(path) + if not path.parent.exists(): + logger.warning("Failed to save file: parent directory '%s' does not exist.", path.parent) return + logger.info("Saving capa results to %s.", path) write_file(path, s) def save_function_analysis(self): From 84c9da09e0cbf985789a5abd21b5f973176aa3ff Mon Sep 17 00:00:00 2001 From: mr-tz Date: Mon, 14 Oct 2024 05:27:38 +0000 Subject: [PATCH 11/21] fix save base address --- CHANGELOG.md | 2 ++ capa/ghidra/helpers.py | 2 +- capa/ida/helpers.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab5d9ca24..54fcd9e1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,12 +14,14 @@ - extractor: fix exception when PE extractor encounters unknown architecture #2440 @Tamir-K - IDA Pro: rename ida to idapro module for plugin and idalib in IDA 9.0 #2453 @mr-tz +- ghidra: fix saving of base address @mr-tz ### capa Explorer Web ### capa Explorer IDA Pro plugin - fix bug preventing saving of capa results via Save button @mr-tz +- fix saving of base address @mr-tz ### Development diff --git a/capa/ghidra/helpers.py b/capa/ghidra/helpers.py index b32c534a3..44af2f017 100644 --- a/capa/ghidra/helpers.py +++ b/capa/ghidra/helpers.py @@ -150,7 +150,7 @@ def collect_metadata(rules: List[Path]): os=os, extractor="ghidra", rules=tuple(r.resolve().absolute().as_posix() for r in rules), - base_address=capa.features.freeze.Address.from_capa(currentProgram().getImageBase().getOffset()), # type: ignore [name-defined] # noqa: F821 + base_address=capa.features.freeze.Address.from_capa(AbsoluteVirtualAddress(currentProgram().getImageBase().getOffset())), # type: ignore [name-defined] # noqa: F821 layout=rdoc.StaticLayout( functions=(), ), diff --git a/capa/ida/helpers.py b/capa/ida/helpers.py index 91f29f05e..066e8605d 100644 --- a/capa/ida/helpers.py +++ b/capa/ida/helpers.py @@ -204,7 +204,7 @@ def collect_metadata(rules: List[Path]): os=os, extractor="ida", rules=tuple(r.resolve().absolute().as_posix() for r in rules), - base_address=capa.features.freeze.Address.from_capa(idaapi.get_imagebase()), + base_address=capa.features.freeze.Address.from_capa(AbsoluteVirtualAddress(idaapi.get_imagebase())), layout=rdoc.StaticLayout( functions=(), # this is updated after capabilities have been collected. From 54badc323d49f6f8c0d05d01f25f51d79a539b89 Mon Sep 17 00:00:00 2001 From: Soufiane Fariss Date: Mon, 14 Oct 2024 11:59:17 +0200 Subject: [PATCH 12/21] ci: add CHANGELOG.md for web releases --- .github/workflows/web-release.yml | 38 ++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/.github/workflows/web-release.yml b/.github/workflows/web-release.yml index a0156657f..4dedff923 100644 --- a/.github/workflows/web-release.yml +++ b/.github/workflows/web-release.yml @@ -1,5 +1,4 @@ -name: create capa Explorer Web release - +name: create web release on: workflow_dispatch: inputs: @@ -19,12 +18,12 @@ jobs: - uses: actions/checkout@v4 - name: Set release name - run: echo "RELEASE_NAME=capa-explorer-web-${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV + run: echo "RELEASE_NAME=capa-explorer-web-v${{ github.event.inputs.version }}-${GITHUB_SHA::7}" >> $GITHUB_ENV - name: Check if release already exists run: | - if ls web/explorer/releases/capa-explorer-web-${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then - echo "A release with version ${{ github.event.inputs.version }} already exists" + if ls web/explorer/releases/capa-explorer-web-v${{ github.event.inputs.version }}-* 1> /dev/null 2>&1; then + echo "::error:: A release with version ${{ github.event.inputs.version }} already exists" exit 1 fi @@ -50,18 +49,31 @@ jobs: - name: Create releases directory run: mkdir -vp web/explorer/releases - - name: Move archive release to releases folder + - name: Move release to releases folder run: mv web/explorer/${{ env.RELEASE_NAME }}.zip web/explorer/releases - - name: Commit and push release + - name: Compute release SHA256 hash run: | - git config --local user.email "capa-dev@mandiant.com" - git config --local user.name "Capa Bot" - git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip - git commit -m "explorer web: add release ${{ env.RELEASE_NAME }}" - git push + echo "RELEASE_SHA256=$(sha256sum web/explorer/releases/${{ env.RELEASE_NAME }}.zip | awk '{print $1}')" >> $GITHUB_ENV + + - name: Update CHANGELOG.md + run: | + echo "## ${{ env.RELEASE_NAME }}" >> web/explorer/releases/CHANGELOG.md + echo "- Release Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> web/explorer/releases/CHANGELOG.md + echo "- SHA256: ${{ env.RELEASE_SHA256 }}" >> web/explorer/releases/CHANGELOG.md + echo "" >> web/explorer/releases/CHANGELOG.md + cat web/explorer/releases/CHANGELOG.md - name: Remove older releases # keep only the latest 3 releases - run: ls -t capa-explorer-web-*.zip | tail -n +4 | xargs -r rm -- + run: ls -t capa-explorer-web-v*.zip | tail -n +4 | xargs -r rm -- working-directory: web/explorer/releases + + - name: Commit and push release + run: | + git config --local user.email "capa-dev@mandiant.com" + git config --local user.name "Capa Bot" + git add -f web/explorer/releases/${{ env.RELEASE_NAME }}.zip web/explorer/releases/CHANGELOG.md + git add -u web/explorer/releases/ + git commit -m ":robot: explorer web: add release ${{ env.RELEASE_NAME }}" + git push From 338107cf9ea98cca3c2c0d0debd9af8caec291c6 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 15 Oct 2024 15:04:23 +0000 Subject: [PATCH 13/21] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 64b174e50..60fcf9874 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 64b174e50253cbd506df40e7728531b801636a56 +Subproject commit 60fcf9874198b4c1f1527b4f8609020d3be64f32 From 70610cd1c5df7a5d126f144c173a4e8b7fd13d3e Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Wed, 16 Oct 2024 16:11:44 +0000 Subject: [PATCH 14/21] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 60fcf9874..01e45dfcf 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 60fcf9874198b4c1f1527b4f8609020d3be64f32 +Subproject commit 01e45dfcf9ac6125ad8cb1345b10af0e7a1d2dca From d74225b5e07f620ac1c0a94f8e8d1a0e0a02c5a9 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Fri, 18 Oct 2024 19:09:29 +0000 Subject: [PATCH 15/21] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 01e45dfcf..9917a93ee 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 01e45dfcf9ac6125ad8cb1345b10af0e7a1d2dca +Subproject commit 9917a93ee145f03333992fc2de2db783648bba32 From cebf8e7274092ec2720c8395c55b67f7b25cf64a Mon Sep 17 00:00:00 2001 From: mr-tz Date: Mon, 21 Oct 2024 14:57:40 +0000 Subject: [PATCH 16/21] update minimum Python to 3.10 --- .devcontainer/Dockerfile | 2 +- .devcontainer/devcontainer.json | 2 +- .github/workflows/build.yml | 14 +++++--------- .github/workflows/tests.yml | 26 +++++++++++++------------- CHANGELOG.md | 2 ++ capa/ghidra/README.md | 2 +- capa/ghidra/capa_explorer.py | 8 ++------ capa/ghidra/capa_ghidra.py | 8 ++------ capa/helpers.py | 11 ----------- capa/ida/plugin/README.md | 5 +++-- capa/main.py | 20 ++++++-------------- requirements.txt | 2 +- 12 files changed, 37 insertions(+), 65 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 33398f53b..f7979e7f7 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,6 +1,6 @@ # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile -# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster +# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3-bullseye, 3.10-bullseye, 3-buster, 3.10-buster, etc. ARG VARIANT="3.10-bullseye" FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index c8444ed39..cbecb5603 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -6,7 +6,7 @@ "dockerfile": "Dockerfile", "context": "..", "args": { - // Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6 + // Update 'VARIANT' to pick a Python version: 3, 3.10, etc. // Append -bullseye or -buster to pin to an OS version. // Use -bullseye variants on local on arm64/Apple Silicon. "VARIANT": "3.10", diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0084f0993..f434ef0bc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,26 +21,25 @@ jobs: # set to false for debugging fail-fast: true matrix: - # using Python 3.8 to support running across multiple operating systems including Windows 7 include: - os: ubuntu-20.04 # use old linux so that the shared library versioning is more portable artifact_name: capa asset_name: linux - python_version: 3.8 + python_version: '3.10' - os: ubuntu-20.04 artifact_name: capa asset_name: linux-py312 - python_version: 3.12 + python_version: "3.12" - os: windows-2019 artifact_name: capa.exe asset_name: windows - python_version: 3.8 + python_version: '3.10' - os: macos-12 # use older macOS for assumed better portability artifact_name: capa asset_name: macos - python_version: 3.8 + python_version: '3.10' steps: - name: Checkout capa uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 @@ -86,9 +85,6 @@ jobs: - os: ubuntu-22.04 artifact_name: capa asset_name: linux - - os: ubuntu-22.04 - artifact_name: capa - asset_name: linux-py312 - os: windows-2022 artifact_name: capa.exe asset_name: windows @@ -107,7 +103,7 @@ jobs: # upload zipped binaries to Release page if: github.event_name == 'release' name: zip and upload ${{ matrix.asset_name }} - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 needs: [build] strategy: matrix: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9aa826ef0..92f3e35ca 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -26,7 +26,7 @@ env: jobs: changelog_format: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout capa uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 @@ -37,15 +37,15 @@ jobs: if [ $number != 1 ]; then exit 1; fi code_style: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout capa uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 # use latest available python to take advantage of best performance - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: "3.11" + python-version: "3.12" - name: Install dependencies run: | pip install -r requirements.txt @@ -64,16 +64,16 @@ jobs: run: pre-commit run deptry --hook-stage manual rule_linter: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout capa with submodules uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: submodules: recursive - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: "3.11" + python-version: "3.12" - name: Install capa run: | pip install -r requirements.txt @@ -90,15 +90,15 @@ jobs: matrix: os: [ubuntu-20.04, windows-2019, macos-12] # across all operating systems - python-version: ["3.8", "3.11"] + python-version: ["3.10", "3.11"] include: # on Ubuntu run these as well - os: ubuntu-20.04 - python-version: "3.8" + python-version: "3.10" - os: ubuntu-20.04 - python-version: "3.9" + python-version: "3.11" - os: ubuntu-20.04 - python-version: "3.10" + python-version: "3.12" steps: - name: Checkout capa with submodules uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 @@ -168,12 +168,12 @@ jobs: ghidra-tests: name: Ghidra tests for ${{ matrix.python-version }} - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 needs: [tests] strategy: fail-fast: false matrix: - python-version: ["3.8", "3.11"] + python-version: ["3.10", "3.11"] java-version: ["17"] ghidra-version: ["11.0.1"] public-version: ["PUBLIC_20240130"] # for ghidra releases diff --git a/CHANGELOG.md b/CHANGELOG.md index 54fcd9e1f..14841b60d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ ### Breaking Changes +- remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz + ### New Rules (0) - diff --git a/capa/ghidra/README.md b/capa/ghidra/README.md index 30a5695b7..b6596c577 100644 --- a/capa/ghidra/README.md +++ b/capa/ghidra/README.md @@ -55,7 +55,7 @@ You can also execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant | capa | `>= 7.0.0` | https://github.com/mandiant/capa/releases | | Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon/releases | | Ghidra | `>= 10.3.2` | https://github.com/NationalSecurityAgency/ghidra/releases | -| Python | `>= 3.8.0` | https://www.python.org/downloads | +| Python | `>= 3.10.0` | https://www.python.org/downloads | ## Installation diff --git a/capa/ghidra/capa_explorer.py b/capa/ghidra/capa_explorer.py index 4628b6752..efbfd0e3e 100644 --- a/capa/ghidra/capa_explorer.py +++ b/capa/ghidra/capa_explorer.py @@ -368,14 +368,10 @@ def main(): if __name__ == "__main__": - if sys.version_info < (3, 8): + if sys.version_info < (3, 10): from capa.exceptions import UnsupportedRuntimeError - raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+") - elif sys.version_info < (3, 10): - from warnings import warn - - warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2) + raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+") exit_code = main() if exit_code != 0: popup("capa explorer encountered errors during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821 diff --git a/capa/ghidra/capa_ghidra.py b/capa/ghidra/capa_ghidra.py index 817924930..db43ecfac 100644 --- a/capa/ghidra/capa_ghidra.py +++ b/capa/ghidra/capa_ghidra.py @@ -160,12 +160,8 @@ def main(): if __name__ == "__main__": - if sys.version_info < (3, 8): + if sys.version_info < (3, 10): from capa.exceptions import UnsupportedRuntimeError - raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+") - elif sys.version_info < (3, 10): - from warnings import warn - - warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2) + raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+") sys.exit(main()) diff --git a/capa/helpers.py b/capa/helpers.py index 4505647c4..70f1358c3 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -331,17 +331,6 @@ def log_unsupported_arch_error(): logger.error("-" * 80) -def log_unsupported_runtime_error(): - logger.error("-" * 80) - logger.error(" Unsupported runtime or Python interpreter.") - logger.error(" ") - logger.error(" capa supports running under Python 3.8 and higher.") - logger.error(" ") - logger.error(" If you're seeing this message on the command line,") - logger.error(" please ensure you're running a supported Python version.") - logger.error("-" * 80) - - def is_running_standalone() -> bool: """ are we running from a PyInstaller'd executable? diff --git a/capa/ida/plugin/README.md b/capa/ida/plugin/README.md index 0af39a4ad..e904b7adf 100644 --- a/capa/ida/plugin/README.md +++ b/capa/ida/plugin/README.md @@ -96,7 +96,7 @@ can update using the `Settings` button. ### Requirements -capa explorer supports Python versions >= 3.8.x and IDA Pro versions >= 7.4. The following IDA Pro versions have been tested: +capa explorer supports Python versions >= 3.10 and IDA Pro versions >= 7.4. The following IDA Pro versions have been tested: * IDA 7.4 * IDA 7.5 @@ -105,8 +105,9 @@ capa explorer supports Python versions >= 3.8.x and IDA Pro versions >= 7.4. The * IDA 8.0 * IDA 8.1 * IDA 8.2 +* IDA 9.0 -capa explorer is however limited to the Python versions supported by your IDA installation (which may not include all Python versions >= 3.8.x). +capa explorer is however limited to the Python versions supported by your IDA installation (which may not include all Python versions >= 3.10). If you encounter issues with your specific setup, please open a new [Issue](https://github.com/mandiant/capa/issues). diff --git a/capa/main.py b/capa/main.py index 60c5d638a..ca1434d50 100644 --- a/capa/main.py +++ b/capa/main.py @@ -185,15 +185,11 @@ def get_default_signatures() -> List[Path]: return ret -def simple_message_exception_handler(exctype, value: BaseException, traceback: TracebackType): +def simple_message_exception_handler( + exctype: type[BaseException], value: BaseException, traceback: TracebackType | None +): """ prints friendly message on unexpected exceptions to regular users (debug mode shows regular stack trace) - - args: - # TODO(aaronatp): Once capa drops support for Python 3.8, move the exctype type annotation to - # the function parameters and remove the "# type: ignore[assignment]" from the relevant place - # in the main function, see (https://github.com/mandiant/capa/issues/1896) - exctype (type[BaseException]): exception class """ if exctype is KeyboardInterrupt: @@ -455,7 +451,7 @@ def handle_common_args(args): raise RuntimeError("unexpected --color value: " + args.color) if not args.debug: - sys.excepthook = simple_message_exception_handler # type: ignore[assignment] + sys.excepthook = simple_message_exception_handler if hasattr(args, "input_file"): args.input_file = Path(args.input_file) @@ -901,12 +897,8 @@ def apply_extractor_filters(extractor: FeatureExtractor, extractor_filters: Filt def main(argv: Optional[List[str]] = None): - if sys.version_info < (3, 8): - raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+") - elif sys.version_info < (3, 10): - from warnings import warn - - warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2) + if sys.version_info < (3, 10): + raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+") if argv is None: argv = sys.argv[1:] diff --git a/requirements.txt b/requirements.txt index 7d7f10cca..7e6354282 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ intervaltree==3.1.0 markdown-it-py==3.0.0 mdurl==0.1.2 msgpack==1.0.8 -networkx==3.1 +networkx==3.4.2 pefile==2024.8.26 pip==24.2 protobuf==5.28.2 From 2987eeb0acc03e187bdd90bb1644377774a2045a Mon Sep 17 00:00:00 2001 From: mr-tz Date: Tue, 22 Oct 2024 07:40:26 +0000 Subject: [PATCH 17/21] update type annotations tmp --- capa/capabilities/common.py | 4 +- capa/capabilities/dynamic.py | 12 +-- capa/capabilities/static.py | 14 +-- capa/engine.py | 14 +-- capa/features/com/__init__.py | 3 +- capa/features/com/classes.py | 3 +- capa/features/com/interfaces.py | 5 +- capa/features/common.py | 15 ++- capa/features/extractors/base_extractor.py | 46 ++++---- .../extractors/binexport2/__init__.py | 54 +++++----- .../extractors/binexport2/arch/arm/insn.py | 12 +-- .../binexport2/arch/intel/helpers.py | 4 +- .../extractors/binexport2/arch/intel/insn.py | 14 +-- .../extractors/binexport2/basicblock.py | 8 +- .../extractors/binexport2/extractor.py | 20 ++-- capa/features/extractors/binexport2/file.py | 14 +-- .../extractors/binexport2/function.py | 12 +-- .../features/extractors/binexport2/helpers.py | 50 ++++----- capa/features/extractors/binexport2/insn.py | 24 ++--- capa/features/extractors/binja/basicblock.py | 12 +-- capa/features/extractors/binja/extractor.py | 10 +- capa/features/extractors/binja/file.py | 20 ++-- capa/features/extractors/binja/function.py | 4 +- capa/features/extractors/binja/global_.py | 6 +- capa/features/extractors/binja/helpers.py | 6 +- capa/features/extractors/binja/insn.py | 34 +++--- capa/features/extractors/cape/call.py | 6 +- capa/features/extractors/cape/extractor.py | 14 +-- capa/features/extractors/cape/file.py | 24 ++--- capa/features/extractors/cape/global_.py | 10 +- capa/features/extractors/cape/helpers.py | 4 +- capa/features/extractors/cape/models.py | 96 ++++++++--------- capa/features/extractors/cape/process.py | 8 +- capa/features/extractors/common.py | 10 +- capa/features/extractors/dnfile/extractor.py | 20 ++-- capa/features/extractors/dnfile/file.py | 18 ++-- capa/features/extractors/dnfile/function.py | 12 +-- capa/features/extractors/dnfile/helpers.py | 16 +-- capa/features/extractors/dnfile/insn.py | 16 +-- capa/features/extractors/dnfile/types.py | 8 +- capa/features/extractors/dotnetfile.py | 26 ++--- capa/features/extractors/drakvuf/call.py | 6 +- capa/features/extractors/drakvuf/extractor.py | 16 +-- capa/features/extractors/drakvuf/file.py | 8 +- capa/features/extractors/drakvuf/global_.py | 10 +- capa/features/extractors/drakvuf/helpers.py | 5 +- capa/features/extractors/drakvuf/models.py | 22 ++-- capa/features/extractors/drakvuf/process.py | 8 +- capa/features/extractors/drakvuf/thread.py | 4 +- capa/features/extractors/elf.py | 10 +- capa/features/extractors/elffile.py | 6 +- capa/features/extractors/ghidra/basicblock.py | 10 +- capa/features/extractors/ghidra/extractor.py | 8 +- capa/features/extractors/ghidra/file.py | 22 ++-- capa/features/extractors/ghidra/function.py | 4 +- capa/features/extractors/ghidra/global_.py | 6 +- capa/features/extractors/ghidra/helpers.py | 22 ++-- capa/features/extractors/ghidra/insn.py | 38 +++---- capa/features/extractors/helpers.py | 4 +- capa/features/extractors/ida/basicblock.py | 8 +- capa/features/extractors/ida/extractor.py | 8 +- capa/features/extractors/ida/file.py | 20 ++-- capa/features/extractors/ida/function.py | 4 +- capa/features/extractors/ida/global_.py | 6 +- capa/features/extractors/ida/helpers.py | 12 +-- capa/features/extractors/ida/insn.py | 36 +++---- capa/features/extractors/null.py | 36 +++---- capa/features/extractors/pefile.py | 8 +- capa/features/extractors/viv/basicblock.py | 12 +-- capa/features/extractors/viv/extractor.py | 12 +-- capa/features/extractors/viv/file.py | 20 ++-- capa/features/extractors/viv/function.py | 14 +-- capa/features/extractors/viv/global_.py | 4 +- .../features/extractors/viv/indirect_calls.py | 12 +-- capa/features/extractors/viv/insn.py | 44 ++++---- capa/features/extractors/vmray/__init__.py | 16 +-- capa/features/extractors/vmray/call.py | 8 +- capa/features/extractors/vmray/extractor.py | 16 +-- capa/features/extractors/vmray/file.py | 22 ++-- capa/features/extractors/vmray/global_.py | 10 +- capa/features/extractors/vmray/models.py | 56 +++++----- capa/features/freeze/__init__.py | 52 +++++---- capa/ghidra/capa_explorer.py | 8 +- capa/ghidra/helpers.py | 3 +- capa/helpers.py | 8 +- capa/ida/helpers.py | 4 +- capa/ida/plugin/cache.py | 18 ++-- capa/ida/plugin/form.py | 4 +- capa/ida/plugin/item.py | 6 +- capa/ida/plugin/model.py | 10 +- capa/ida/plugin/view.py | 4 +- capa/loader.py | 38 +++---- capa/main.py | 22 ++-- capa/perf.py | 3 +- capa/render/proto/__init__.py | 6 +- capa/render/result_document.py | 59 +++++------ capa/render/utils.py | 4 +- capa/render/vverbose.py | 4 +- capa/rules/__init__.py | 100 +++++++++--------- capa/rules/cache.py | 8 +- pyproject.toml | 6 +- scripts/capa-as-library.py | 8 +- scripts/capa2sarif.py | 4 +- scripts/compare-backends.py | 8 +- scripts/detect_duplicate_features.py | 3 +- scripts/inspect-binexport2.py | 12 +-- scripts/lint.py | 19 ++-- scripts/setup-linter-dependencies.py | 13 ++- scripts/show-capabilities-by-function.py | 3 +- scripts/show-features.py | 3 +- scripts/show-unused-features.py | 22 ++-- tests/fixtures.py | 3 +- tests/test_binexport_accessors.py | 4 +- tests/test_freeze_dynamic.py | 3 +- tests/test_freeze_static.py | 3 +- web/rules/scripts/build_root.py | 3 +- 116 files changed, 874 insertions(+), 905 deletions(-) diff --git a/capa/capabilities/common.py b/capa/capabilities/common.py index a73f40afe..e9b6f253d 100644 --- a/capa/capabilities/common.py +++ b/capa/capabilities/common.py @@ -9,7 +9,7 @@ import logging import itertools import collections -from typing import Any, Tuple +from typing import Any from capa.rules import Scope, RuleSet from capa.engine import FeatureSet, MatchResults @@ -64,7 +64,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon def find_capabilities( ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs -) -> Tuple[MatchResults, Any]: +) -> tuple[MatchResults, Any]: from capa.capabilities.static import find_static_capabilities from capa.capabilities.dynamic import find_dynamic_capabilities diff --git a/capa/capabilities/dynamic.py b/capa/capabilities/dynamic.py index 2a433be4e..5eced84d3 100644 --- a/capa/capabilities/dynamic.py +++ b/capa/capabilities/dynamic.py @@ -9,7 +9,7 @@ import logging import itertools import collections -from typing import Any, List, Tuple +from typing import Any import capa.perf import capa.features.freeze as frz @@ -24,7 +24,7 @@ def find_call_capabilities( ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle -) -> Tuple[FeatureSet, MatchResults]: +) -> tuple[FeatureSet, MatchResults]: """ find matches for the given rules for the given call. @@ -51,7 +51,7 @@ def find_call_capabilities( def find_thread_capabilities( ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle -) -> Tuple[FeatureSet, MatchResults, MatchResults]: +) -> tuple[FeatureSet, MatchResults, MatchResults]: """ find matches for the given rules within the given thread. @@ -89,7 +89,7 @@ def find_thread_capabilities( def find_process_capabilities( ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle -) -> Tuple[MatchResults, MatchResults, MatchResults, int]: +) -> tuple[MatchResults, MatchResults, MatchResults, int]: """ find matches for the given rules within the given process. @@ -127,7 +127,7 @@ def find_process_capabilities( def find_dynamic_capabilities( ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None -) -> Tuple[MatchResults, Any]: +) -> tuple[MatchResults, Any]: all_process_matches: MatchResults = collections.defaultdict(list) all_thread_matches: MatchResults = collections.defaultdict(list) all_call_matches: MatchResults = collections.defaultdict(list) @@ -135,7 +135,7 @@ def find_dynamic_capabilities( feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=()) assert isinstance(extractor, DynamicFeatureExtractor) - processes: List[ProcessHandle] = list(extractor.get_processes()) + processes: list[ProcessHandle] = list(extractor.get_processes()) n_processes: int = len(processes) with capa.helpers.CapaProgressBar( diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py index aeb710ae3..df8cd7e78 100644 --- a/capa/capabilities/static.py +++ b/capa/capabilities/static.py @@ -10,7 +10,7 @@ import logging import itertools import collections -from typing import Any, List, Tuple +from typing import Any import capa.perf import capa.helpers @@ -26,7 +26,7 @@ def find_instruction_capabilities( ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle -) -> Tuple[FeatureSet, MatchResults]: +) -> tuple[FeatureSet, MatchResults]: """ find matches for the given rules for the given instruction. @@ -53,7 +53,7 @@ def find_instruction_capabilities( def find_basic_block_capabilities( ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle -) -> Tuple[FeatureSet, MatchResults, MatchResults]: +) -> tuple[FeatureSet, MatchResults, MatchResults]: """ find matches for the given rules within the given basic block. @@ -93,7 +93,7 @@ def find_basic_block_capabilities( def find_code_capabilities( ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle -) -> Tuple[MatchResults, MatchResults, MatchResults, int]: +) -> tuple[MatchResults, MatchResults, MatchResults, int]: """ find matches for the given rules within the given function. @@ -131,16 +131,16 @@ def find_code_capabilities( def find_static_capabilities( ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None -) -> Tuple[MatchResults, Any]: +) -> tuple[MatchResults, Any]: all_function_matches: MatchResults = collections.defaultdict(list) all_bb_matches: MatchResults = collections.defaultdict(list) all_insn_matches: MatchResults = collections.defaultdict(list) feature_counts = rdoc.StaticFeatureCounts(file=0, functions=()) - library_functions: Tuple[rdoc.LibraryFunction, ...] = () + library_functions: tuple[rdoc.LibraryFunction, ...] = () assert isinstance(extractor, StaticFeatureExtractor) - functions: List[FunctionHandle] = list(extractor.get_functions()) + functions: list[FunctionHandle] = list(extractor.get_functions()) n_funcs: int = len(functions) n_libs: int = 0 percentage: float = 0 diff --git a/capa/engine.py b/capa/engine.py index 25c26cb96..ff2d642d0 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -8,7 +8,7 @@ import copy import collections -from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Union, Mapping, Iterable, Iterator +from typing import TYPE_CHECKING, Union, Mapping, Iterable, Iterator import capa.perf import capa.features.common @@ -27,7 +27,7 @@ # to collect the locations of a feature, do: `features[Number(0x10)]` # # aliased here so that the type can be documented and xref'd. -FeatureSet = Dict[Feature, Set[Address]] +FeatureSet = dict[Feature, set[Address]] class Statement: @@ -94,7 +94,7 @@ class And(Statement): match if all of the children evaluate to True. the order of evaluation is dictated by the property - `And.children` (type: List[Statement|Feature]). + `And.children` (type: list[Statement|Feature]). a query optimizer may safely manipulate the order of these children. """ @@ -127,7 +127,7 @@ class Or(Statement): match if any of the children evaluate to True. the order of evaluation is dictated by the property - `Or.children` (type: List[Statement|Feature]). + `Or.children` (type: list[Statement|Feature]). a query optimizer may safely manipulate the order of these children. """ @@ -176,7 +176,7 @@ class Some(Statement): match if at least N of the children evaluate to True. the order of evaluation is dictated by the property - `Some.children` (type: List[Statement|Feature]). + `Some.children` (type: list[Statement|Feature]). a query optimizer may safely manipulate the order of these children. """ @@ -267,7 +267,7 @@ def evaluate(self, features: FeatureSet, short_circuit=True): # inspect(match_details) # # aliased here so that the type can be documented and xref'd. -MatchResults = Mapping[str, List[Tuple[Address, Result]]] +MatchResults = Mapping[str, list[tuple[Address, Result]]] def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]: @@ -292,7 +292,7 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: features[capa.features.common.MatchedRule(namespace)].update(locations) -def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]: +def match(rules: list["capa.rules.Rule"], features: FeatureSet, addr: Address) -> tuple[FeatureSet, MatchResults]: """ match the given rules against the given features, returning an updated set of features and the matches. diff --git a/capa/features/com/__init__.py b/capa/features/com/__init__.py index 4b4edd041..722706b20 100644 --- a/capa/features/com/__init__.py +++ b/capa/features/com/__init__.py @@ -6,7 +6,6 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. from enum import Enum -from typing import Dict, List from capa.helpers import assert_never @@ -22,7 +21,7 @@ class ComType(Enum): } -def load_com_database(com_type: ComType) -> Dict[str, List[str]]: +def load_com_database(com_type: ComType) -> dict[str, list[str]]: # lazy load these python files since they are so large. # that is, don't load them unless a COM feature is being handled. import capa.features.com.classes diff --git a/capa/features/com/classes.py b/capa/features/com/classes.py index f517821f8..d5048a404 100644 --- a/capa/features/com/classes.py +++ b/capa/features/com/classes.py @@ -5,9 +5,8 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List -COM_CLASSES: Dict[str, List[str]] = { +COM_CLASSES: dict[str, list[str]] = { "ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"], "ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"], "ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"], diff --git a/capa/features/com/interfaces.py b/capa/features/com/interfaces.py index b2b9a9044..05d9049b4 100644 --- a/capa/features/com/interfaces.py +++ b/capa/features/com/interfaces.py @@ -5,9 +5,8 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List -COM_INTERFACES: Dict[str, List[str]] = { +COM_INTERFACES: dict[str, list[str]] = { "IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"], "IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"], "IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"], @@ -16334,7 +16333,7 @@ "IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"], "IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"], "IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"], - "IRcsServiceTuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"], + "IRcsServicetuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"], "IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"], "IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"], "IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"], diff --git a/capa/features/common.py b/capa/features/common.py index e3401f7c8..5820c5793 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -9,10 +9,9 @@ import re import abc import codecs -import typing import logging import collections -from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional +from typing import TYPE_CHECKING, Union, Optional if TYPE_CHECKING: # circular import, otherwise @@ -79,8 +78,8 @@ def __init__( self, success: bool, statement: Union["capa.engine.Statement", "Feature"], - children: List["Result"], - locations: Optional[Set[Address]] = None, + children: list["Result"], + locations: Optional[set[Address]] = None, ): super().__init__() self.success = success @@ -213,7 +212,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True): # mapping from string value to list of locations. # will unique the locations later on. - matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set) + matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set) assert isinstance(self.value, str) for feature, locations in features.items(): @@ -261,7 +260,7 @@ class _MatchedSubstring(Substring): note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API. """ - def __init__(self, substring: Substring, matches: Dict[str, Set[Address]]): + def __init__(self, substring: Substring, matches: dict[str, set[Address]]): """ args: substring: the substring feature that matches. @@ -305,7 +304,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True): # mapping from string value to list of locations. # will unique the locations later on. - matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set) + matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set) for feature, locations in features.items(): if not isinstance(feature, (String,)): @@ -353,7 +352,7 @@ class _MatchedRegex(Regex): note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API. """ - def __init__(self, regex: Regex, matches: Dict[str, Set[Address]]): + def __init__(self, regex: Regex, matches: dict[str, set[Address]]): """ args: regex: the regex feature that matches. diff --git a/capa/features/extractors/base_extractor.py b/capa/features/extractors/base_extractor.py index a58016bcc..17aa2c208 100644 --- a/capa/features/extractors/base_extractor.py +++ b/capa/features/extractors/base_extractor.py @@ -11,13 +11,9 @@ import dataclasses from copy import copy from types import MethodType -from typing import Any, Set, Dict, Tuple, Union, Iterator +from typing import Any, Union, Iterator, TypeAlias from dataclasses import dataclass -# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated -# https://github.com/mandiant/capa/issues/1699 -from typing_extensions import TypeAlias - import capa.features.address from capa.features.common import Feature from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress @@ -59,7 +55,7 @@ class FunctionHandle: address: Address inner: Any - ctx: Dict[str, Any] = dataclasses.field(default_factory=dict) + ctx: dict[str, Any] = dataclasses.field(default_factory=dict) @dataclass @@ -135,7 +131,7 @@ def get_sample_hashes(self) -> SampleHashes: return self._sample_hashes @abc.abstractmethod - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: """ extract features found at every scope ("global"). @@ -146,12 +142,12 @@ def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: print('0x%x: %s', va, feature) yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @abc.abstractmethod - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: """ extract file-scope features. @@ -162,7 +158,7 @@ def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: print('0x%x: %s', va, feature) yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @@ -211,7 +207,7 @@ def get_function_name(self, addr: Address) -> str: raise KeyError(addr) @abc.abstractmethod - def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, f: FunctionHandle) -> Iterator[tuple[Feature, Address]]: """ extract function-scope features. the arguments are opaque values previously provided by `.get_functions()`, etc. @@ -227,7 +223,7 @@ def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`. yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @@ -240,7 +236,7 @@ def get_basic_blocks(self, f: FunctionHandle) -> Iterator[BBHandle]: raise NotImplementedError() @abc.abstractmethod - def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """ extract basic block-scope features. the arguments are opaque values previously provided by `.get_functions()`, etc. @@ -258,7 +254,7 @@ def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Itera bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`. yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @@ -273,7 +269,7 @@ def get_instructions(self, f: FunctionHandle, bb: BBHandle) -> Iterator[InsnHand @abc.abstractmethod def extract_insn_features( self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: """ extract instruction-scope features. the arguments are opaque values previously provided by `.get_functions()`, etc. @@ -293,12 +289,12 @@ def extract_insn_features( insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`. yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() -def FunctionFilter(extractor: StaticFeatureExtractor, functions: Set) -> StaticFeatureExtractor: +def FunctionFilter(extractor: StaticFeatureExtractor, functions: set) -> StaticFeatureExtractor: original_get_functions = extractor.get_functions def filtered_get_functions(self): @@ -387,7 +383,7 @@ def get_sample_hashes(self) -> SampleHashes: return self._sample_hashes @abc.abstractmethod - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: """ extract features found at every scope ("global"). @@ -398,12 +394,12 @@ def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: print(addr, feature) yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @abc.abstractmethod - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: """ extract file-scope features. @@ -414,7 +410,7 @@ def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: print(addr, feature) yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @@ -426,7 +422,7 @@ def get_processes(self) -> Iterator[ProcessHandle]: raise NotImplementedError() @abc.abstractmethod - def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: """ Yields all the features of a process. These include: - file features of the process' image @@ -449,7 +445,7 @@ def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: raise NotImplementedError() @abc.abstractmethod - def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]: """ Yields all the features of a thread. These include: - sequenced api traces @@ -466,7 +462,7 @@ def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle] @abc.abstractmethod def extract_call_features( self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: """ Yields all features of a call. These include: - api name @@ -485,7 +481,7 @@ def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> raise NotImplementedError() -def ProcessFilter(extractor: DynamicFeatureExtractor, processes: Set) -> DynamicFeatureExtractor: +def ProcessFilter(extractor: DynamicFeatureExtractor, processes: set) -> DynamicFeatureExtractor: original_get_processes = extractor.get_processes def filtered_get_processes(self): diff --git a/capa/features/extractors/binexport2/__init__.py b/capa/features/extractors/binexport2/__init__.py index d3ce77d22..8032b2fca 100644 --- a/capa/features/extractors/binexport2/__init__.py +++ b/capa/features/extractors/binexport2/__init__.py @@ -17,7 +17,7 @@ import hashlib import logging import contextlib -from typing import Set, Dict, List, Tuple, Iterator +from typing import Iterator from pathlib import Path from collections import defaultdict from dataclasses import dataclass @@ -51,13 +51,13 @@ def compute_common_prefix_length(m: str, n: str) -> int: return len(m) -def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: List[Path]) -> Path: +def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: list[Path]) -> Path: """attempt to find the sample file, given a BinExport2 file. searches in the same directory as the BinExport2 file, and then in search_paths. """ - def filename_similarity_key(p: Path) -> Tuple[int, str]: + def filename_similarity_key(p: Path) -> tuple[int, str]: # note closure over input_file. # sort first by length of common prefix, then by name (for stability) return (compute_common_prefix_length(p.name, input_file.name), p.name) @@ -65,7 +65,7 @@ def filename_similarity_key(p: Path) -> Tuple[int, str]: wanted_sha256: str = be2.meta_information.executable_id.lower() input_directory: Path = input_file.parent - siblings: List[Path] = [p for p in input_directory.iterdir() if p.is_file()] + siblings: list[Path] = [p for p in input_directory.iterdir() if p.is_file()] siblings.sort(key=filename_similarity_key, reverse=True) for sibling in siblings: # e.g. with open IDA files in the same directory on Windows @@ -74,7 +74,7 @@ def filename_similarity_key(p: Path) -> Tuple[int, str]: return sibling for search_path in search_paths: - candidates: List[Path] = [p for p in search_path.iterdir() if p.is_file()] + candidates: list[Path] = [p for p in search_path.iterdir() if p.is_file()] candidates.sort(key=filename_similarity_key, reverse=True) for candidate in candidates: with contextlib.suppress(PermissionError): @@ -88,27 +88,27 @@ class BinExport2Index: def __init__(self, be2: BinExport2): self.be2: BinExport2 = be2 - self.callers_by_vertex_index: Dict[int, List[int]] = defaultdict(list) - self.callees_by_vertex_index: Dict[int, List[int]] = defaultdict(list) + self.callers_by_vertex_index: dict[int, list[int]] = defaultdict(list) + self.callees_by_vertex_index: dict[int, list[int]] = defaultdict(list) # note: flow graph != call graph (vertex) - self.flow_graph_index_by_address: Dict[int, int] = {} - self.flow_graph_address_by_index: Dict[int, int] = {} + self.flow_graph_index_by_address: dict[int, int] = {} + self.flow_graph_address_by_index: dict[int, int] = {} # edges that come from the given basic block - self.source_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list) + self.source_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list) # edges that end up at the given basic block - self.target_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list) + self.target_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list) - self.vertex_index_by_address: Dict[int, int] = {} + self.vertex_index_by_address: dict[int, int] = {} - self.data_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list) - self.data_reference_index_by_target_address: Dict[int, List[int]] = defaultdict(list) - self.string_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list) + self.data_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list) + self.data_reference_index_by_target_address: dict[int, list[int]] = defaultdict(list) + self.string_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list) - self.insn_address_by_index: Dict[int, int] = {} - self.insn_index_by_address: Dict[int, int] = {} - self.insn_by_address: Dict[int, BinExport2.Instruction] = {} + self.insn_address_by_index: dict[int, int] = {} + self.insn_index_by_address: dict[int, int] = {} + self.insn_by_address: dict[int, BinExport2.Instruction] = {} # must index instructions first self._index_insn_addresses() @@ -208,7 +208,7 @@ def instruction_indices(basic_block: BinExport2.BasicBlock) -> Iterator[int]: def basic_block_instructions( self, basic_block: BinExport2.BasicBlock - ) -> Iterator[Tuple[int, BinExport2.Instruction, int]]: + ) -> Iterator[tuple[int, BinExport2.Instruction, int]]: """ For a given basic block, enumerate the instruction indices, the instruction instances, and their addresses. @@ -253,7 +253,7 @@ def __init__(self, be2: BinExport2, idx: BinExport2Index, buf: bytes): self.idx: BinExport2Index = idx self.buf: bytes = buf self.base_address: int = 0 - self.thunks: Dict[int, int] = {} + self.thunks: dict[int, int] = {} self._find_base_address() self._compute_thunks() @@ -279,7 +279,7 @@ def _compute_thunks(self): curr_idx: int = idx for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA): - thunk_callees: List[int] = self.idx.callees_by_vertex_index[curr_idx] + thunk_callees: list[int] = self.idx.callees_by_vertex_index[curr_idx] # if this doesn't hold, then it doesn't seem like this is a thunk, # because either, len is: # 0 and the thunk doesn't point to anything, or @@ -324,7 +324,7 @@ class AddressNotMappedError(ReadMemoryError): ... @dataclass class AddressSpace: base_address: int - memory_regions: Tuple[MemoryRegion, ...] + memory_regions: tuple[MemoryRegion, ...] def read_memory(self, address: int, length: int) -> bytes: rva: int = address - self.base_address @@ -337,7 +337,7 @@ def read_memory(self, address: int, length: int) -> bytes: @classmethod def from_pe(cls, pe: PE, base_address: int): - regions: List[MemoryRegion] = [] + regions: list[MemoryRegion] = [] for section in pe.sections: address: int = section.VirtualAddress size: int = section.Misc_VirtualSize @@ -355,7 +355,7 @@ def from_pe(cls, pe: PE, base_address: int): @classmethod def from_elf(cls, elf: ELFFile, base_address: int): - regions: List[MemoryRegion] = [] + regions: list[MemoryRegion] = [] # ELF segments are for runtime data, # ELF sections are for link-time data. @@ -401,9 +401,9 @@ class AnalysisContext: class FunctionContext: ctx: AnalysisContext flow_graph_index: int - format: Set[str] - os: Set[str] - arch: Set[str] + format: set[str] + os: set[str] + arch: set[str] @dataclass diff --git a/capa/features/extractors/binexport2/arch/arm/insn.py b/capa/features/extractors/binexport2/arch/arm/insn.py index 7af93aaff..deb6a7d4d 100644 --- a/capa/features/extractors/binexport2/arch/arm/insn.py +++ b/capa/features/extractors/binexport2/arch/arm/insn.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator, Optional +from typing import Iterator, Optional import capa.features.extractors.binexport2.helpers from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset @@ -30,7 +30,7 @@ def extract_insn_number_features( fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -91,7 +91,7 @@ def extract_insn_number_features( def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -120,7 +120,7 @@ def extract_insn_offset_features( def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner be2: BinExport2 = fhi.ctx.be2 @@ -131,7 +131,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] + operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] if operands[1] != operands[2]: yield Characteristic("nzxor"), ih.address @@ -146,7 +146,7 @@ def extract_insn_nzxor_characteristic_features( def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner be2: BinExport2 = fhi.ctx.be2 diff --git a/capa/features/extractors/binexport2/arch/intel/helpers.py b/capa/features/extractors/binexport2/arch/intel/helpers.py index 3696c0d93..508be3ab4 100644 --- a/capa/features/extractors/binexport2/arch/intel/helpers.py +++ b/capa/features/extractors/binexport2/arch/intel/helpers.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Optional +from typing import Optional from dataclasses import dataclass from capa.features.extractors.binexport2.helpers import get_operand_expressions @@ -32,7 +32,7 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt # Base: Any general purpose register # Displacement: An integral offset - expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand) + expressions: list[BinExport2.Expression] = get_operand_expressions(be2, operand) # skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller # has checked for BinExport2.Expression.DEREFERENCE diff --git a/capa/features/extractors/binexport2/arch/intel/insn.py b/capa/features/extractors/binexport2/arch/intel/insn.py index efb4a6fe5..5f40e8709 100644 --- a/capa/features/extractors/binexport2/arch/intel/insn.py +++ b/capa/features/extractors/binexport2/arch/intel/insn.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator +from typing import Iterator import capa.features.extractors.strings import capa.features.extractors.binexport2.helpers @@ -63,7 +63,7 @@ def extract_insn_number_features( fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -123,7 +123,7 @@ def extract_insn_number_features( def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -161,7 +161,7 @@ def is_security_cookie( # security cookie check should use SP or BP op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]] - op1_exprs: List[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index] + op1_exprs: list[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index] if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs): return False @@ -192,7 +192,7 @@ def is_security_cookie( def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse non-zeroing XOR instruction from the given instruction. ignore expected non-zeroing XORs, e.g. security cookies. @@ -209,7 +209,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] + operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] if operands[0] == operands[1]: return @@ -236,7 +236,7 @@ def extract_insn_nzxor_characteristic_features( def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner be2: BinExport2 = fhi.ctx.be2 diff --git a/capa/features/extractors/binexport2/basicblock.py b/capa/features/extractors/binexport2/basicblock.py index bcb7977b4..15ad0b83b 100644 --- a/capa/features/extractors/binexport2/basicblock.py +++ b/capa/features/extractors/binexport2/basicblock.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress @@ -16,20 +16,20 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner bbi: BasicBlockContext = bbh.inner idx = fhi.ctx.idx basic_block_index: int = bbi.basic_block_index - target_edges: List[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index] + target_edges: list[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index] if basic_block_index in (e.source_basic_block_index for e in target_edges): basic_block_address: int = idx.get_basic_block_address(basic_block_index) yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address) -def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract basic block features""" for bb_handler in BASIC_BLOCK_HANDLERS: for feature, addr in bb_handler(fh, bbh): diff --git a/capa/features/extractors/binexport2/extractor.py b/capa/features/extractors/binexport2/extractor.py index 40d61e694..34c57a896 100644 --- a/capa/features/extractors/binexport2/extractor.py +++ b/capa/features/extractors/binexport2/extractor.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Set, List, Tuple, Iterator +from typing import Iterator import capa.features.extractors.elf import capa.features.extractors.common @@ -48,14 +48,14 @@ def __init__(self, be2: BinExport2, buf: bytes): address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address) self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space) - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf))) self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf))) self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf))) - self.format: Set[str] = set() - self.os: Set[str] = set() - self.arch: Set[str] = set() + self.format: set[str] = set() + self.os: set[str] = set() + self.arch: set[str] = set() for feature, _ in self.global_features: assert isinstance(feature.value, str) @@ -72,10 +72,10 @@ def __init__(self, be2: BinExport2, buf: bytes): def get_base_address(self) -> AbsoluteVirtualAddress: return AbsoluteVirtualAddress(self.analysis.base_address) - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: yield from self.global_features - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf) def get_functions(self) -> Iterator[FunctionHandle]: @@ -97,7 +97,7 @@ def get_functions(self) -> Iterator[FunctionHandle]: inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch), ) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binexport2.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -112,7 +112,7 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: inner=BasicBlockContext(basic_block_index), ) - def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: @@ -126,5 +126,5 @@ def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHa def extract_insn_features( self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih) diff --git a/capa/features/extractors/binexport2/file.py b/capa/features/extractors/binexport2/file.py index 9d9872bc2..fa05a1280 100644 --- a/capa/features/extractors/binexport2/file.py +++ b/capa/features/extractors/binexport2/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import io import logging -from typing import Tuple, Iterator +from typing import Iterator import pefile from elftools.elf.elffile import ELFFile @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: if buf.startswith(capa.features.extractors.common.MATCH_PE): pe: pefile.PE = pefile.PE(data=buf) yield from capa.features.extractors.pefile.extract_file_export_names(pe) @@ -34,7 +34,7 @@ def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe logger.warning("unsupported format") -def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: if buf.startswith(capa.features.extractors.common.MATCH_PE): pe: pefile.PE = pefile.PE(data=buf) yield from capa.features.extractors.pefile.extract_file_import_names(pe) @@ -45,7 +45,7 @@ def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe logger.warning("unsupported format") -def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: if buf.startswith(capa.features.extractors.common.MATCH_PE): pe: pefile.PE = pefile.PE(data=buf) yield from capa.features.extractors.pefile.extract_file_section_names(pe) @@ -56,15 +56,15 @@ def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[F logger.warning("unsupported format") -def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_file_strings(buf) -def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_format(buf) -def extract_features(be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_features(be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: """extract file features""" for file_handler in FILE_HANDLERS: for feature, addr in file_handler(be2, buf): diff --git a/capa/features/extractors/binexport2/function.py b/capa/features/extractors/binexport2/function.py index 0c49036d1..c550b81cc 100644 --- a/capa/features/extractors/binexport2/function.py +++ b/capa/features/extractors/binexport2/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic @@ -16,7 +16,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner be2: BinExport2 = fhi.ctx.be2 @@ -32,7 +32,7 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Add yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address) -def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner be2: BinExport2 = fhi.ctx.be2 @@ -40,7 +40,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address flow_graph_index: int = fhi.flow_graph_index flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index] - edges: List[Tuple[int, int]] = [] + edges: list[tuple[int, int]] = [] for edge in flow_graph.edge: edges.append((edge.source_basic_block_index, edge.target_basic_block_index)) @@ -48,7 +48,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address yield Characteristic("loop"), fh.address -def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner be2: BinExport2 = fhi.ctx.be2 @@ -63,7 +63,7 @@ def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address yield FunctionName(vertex.mangled_name), fh.address -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/binexport2/helpers.py b/capa/features/extractors/binexport2/helpers.py index e4e7f7b76..29c40e81d 100644 --- a/capa/features/extractors/binexport2/helpers.py +++ b/capa/features/extractors/binexport2/helpers.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import re -from typing import Set, Dict, List, Tuple, Union, Iterator, Optional +from typing import Union, Iterator, Optional from collections import defaultdict from dataclasses import dataclass @@ -22,7 +22,7 @@ HAS_ARCH_ARM = {ARCH_AARCH64} -def mask_immediate(arch: Set[str], immediate: int) -> int: +def mask_immediate(arch: set[str], immediate: int) -> int: if arch & HAS_ARCH64: immediate &= 0xFFFFFFFFFFFFFFFF elif arch & HAS_ARCH32: @@ -30,7 +30,7 @@ def mask_immediate(arch: Set[str], immediate: int) -> int: return immediate -def twos_complement(arch: Set[str], immediate: int, default: Optional[int] = None) -> int: +def twos_complement(arch: set[str], immediate: int, default: Optional[int] = None) -> int: if default is not None: return capa.features.extractors.helpers.twos_complement(immediate, default) elif arch & HAS_ARCH64: @@ -55,12 +55,12 @@ def is_vertex_type(vertex: BinExport2.CallGraph.Vertex, type_: BinExport2.CallGr def _prune_expression_tree_empty_shifts( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], tree_index: int, ): expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.OPERATOR: if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"): @@ -85,12 +85,12 @@ def _prune_expression_tree_empty_shifts( def _prune_expression_tree_empty_commas( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], tree_index: int, ): expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.OPERATOR: if len(children_tree_indexes) == 1 and expression.symbol == ",": @@ -121,7 +121,7 @@ def _prune_expression_tree_empty_commas( def _prune_expression_tree( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], ): _prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0) _prune_expression_tree_empty_commas(be2, operand, expression_tree, 0) @@ -131,7 +131,7 @@ def _prune_expression_tree( def _build_expression_tree( be2: BinExport2, operand: BinExport2.Operand, -) -> List[List[int]]: +) -> list[list[int]]: # The reconstructed expression tree layout, linking parent nodes to their children. # # There is one list of integers for each expression in the operand. @@ -159,7 +159,7 @@ def _build_expression_tree( # exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817) return [] - tree: List[List[int]] = [] + tree: list[list[int]] = [] for i, expression_index in enumerate(operand.expression_index): children = [] @@ -181,16 +181,16 @@ def _build_expression_tree( def _fill_operand_expression_list( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], tree_index: int, - expression_list: List[BinExport2.Expression], + expression_list: list[BinExport2.Expression], ): """ Walk the given expression tree and collect the expression nodes in-order. """ expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.REGISTER: assert len(children_tree_indexes) == 0 @@ -282,10 +282,10 @@ def _fill_operand_expression_list( raise NotImplementedError(expression.type) -def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> List[BinExport2.Expression]: +def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> list[BinExport2.Expression]: tree = _build_expression_tree(be2, op) - expressions: List[BinExport2.Expression] = [] + expressions: list[BinExport2.Expression] = [] _fill_operand_expression_list(be2, op, tree, 0, expressions) return expressions @@ -331,11 +331,11 @@ def get_instruction_mnemonic(be2: BinExport2, instruction: BinExport2.Instructio return be2.mnemonic[instruction.mnemonic_index].name.lower() -def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> List[BinExport2.Operand]: +def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> list[BinExport2.Operand]: return [be2.operand[operand_index] for operand_index in instruction.operand_index] -def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]: +def split_with_delimiters(s: str, delimiters: tuple[str, ...]) -> Iterator[str]: """ Splits a string by any of the provided delimiter characters, including the delimiters in the results. @@ -355,7 +355,7 @@ def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]: yield s[start:] -BinExport2OperandPattern = Union[str, Tuple[str, ...]] +BinExport2OperandPattern = Union[str, tuple[str, ...]] @dataclass @@ -382,8 +382,8 @@ class BinExport2InstructionPattern: This matcher uses the BinExport2 data layout under the hood. """ - mnemonics: Tuple[str, ...] - operands: Tuple[Union[str, BinExport2OperandPattern], ...] + mnemonics: tuple[str, ...] + operands: tuple[Union[str, BinExport2OperandPattern], ...] capture: Optional[str] @classmethod @@ -438,7 +438,7 @@ def from_str(cls, query: str): mnemonic, _, rest = pattern.partition(" ") mnemonics = mnemonic.split("|") - operands: List[Union[str, Tuple[str, ...]]] = [] + operands: list[Union[str, tuple[str, ...]]] = [] while rest: rest = rest.strip() if not rest.startswith("["): @@ -509,7 +509,7 @@ class MatchResult: expression: BinExport2.Expression def match( - self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]] + self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]] ) -> Optional["BinExport2InstructionPattern.MatchResult"]: """ Match the given BinExport2 data against this pattern. @@ -602,10 +602,10 @@ def match( class BinExport2InstructionPatternMatcher: """Index and match a collection of instruction patterns.""" - def __init__(self, queries: List[BinExport2InstructionPattern]): + def __init__(self, queries: list[BinExport2InstructionPattern]): self.queries = queries # shard the patterns by (mnemonic, #operands) - self._index: Dict[Tuple[str, int], List[BinExport2InstructionPattern]] = defaultdict(list) + self._index: dict[tuple[str, int], list[BinExport2InstructionPattern]] = defaultdict(list) for query in queries: for mnemonic in query.mnemonics: @@ -623,7 +623,7 @@ def from_str(cls, patterns: str): ) def match( - self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]] + self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]] ) -> Optional[BinExport2InstructionPattern.MatchResult]: queries = self._index.get((mnemonic.lower(), len(operand_expressions)), []) for query in queries: diff --git a/capa/features/extractors/binexport2/insn.py b/capa/features/extractors/binexport2/insn.py index 8f2e6af99..42abe95d1 100644 --- a/capa/features/extractors/binexport2/insn.py +++ b/capa/features/extractors/binexport2/insn.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator +from typing import Iterator import capa.features.extractors.helpers import capa.features.extractors.strings @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) -def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -68,7 +68,7 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner if fhi.arch & HAS_ARCH_INTEL: @@ -77,7 +77,7 @@ def extract_insn_number_features( yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih) -def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -92,7 +92,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl # disassembler already identified string reference from instruction return - reference_addresses: List[int] = [] + reference_addresses: list[int] = [] if instruction_index in idx.data_reference_index_by_source_instruction_index: for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]: @@ -142,7 +142,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_insn_string_features( fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -161,7 +161,7 @@ def extract_insn_string_features( def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner if fhi.arch & HAS_ARCH_INTEL: @@ -172,7 +172,7 @@ def extract_insn_offset_features( def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner if fhi.arch & HAS_ARCH_INTEL: @@ -187,7 +187,7 @@ def extract_insn_nzxor_characteristic_features( def extract_insn_mnemonic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -199,7 +199,7 @@ def extract_insn_mnemonic_features( yield Mnemonic(mnemonic_name), ih.address -def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract functions calls from features most relevant at the function scope; @@ -221,7 +221,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner if fhi.arch & HAS_ARCH_INTEL: @@ -234,7 +234,7 @@ def extract_function_indirect_call_characteristic_features( ) -def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: for feature, ea in inst_handler(f, bbh, insn): diff --git a/capa/features/extractors/binja/basicblock.py b/capa/features/extractors/binja/basicblock.py index e74c9f486..5cb8ca138 100644 --- a/capa/features/extractors/binja/basicblock.py +++ b/capa/features/extractors/binja/basicblock.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import string -from typing import Tuple, Iterator +from typing import Iterator from binaryninja import Function from binaryninja import BasicBlock as BinjaBasicBlock @@ -98,22 +98,22 @@ def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool: return False -def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract stackstring indicators from basic block""" - bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner + bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]): yield Characteristic("stack string"), bbh.address -def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract tight loop indicators from a basic block""" - bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner + bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner for edge in bb[0].outgoing_edges: if edge.target.start == bb[0].start: yield Characteristic("tight loop"), bbh.address -def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract basic block features""" for bb_handler in BASIC_BLOCK_HANDLERS: for feature, addr in bb_handler(fh, bbh): diff --git a/capa/features/extractors/binja/extractor.py b/capa/features/extractors/binja/extractor.py index e542494af..a5bea1596 100644 --- a/capa/features/extractors/binja/extractor.py +++ b/capa/features/extractors/binja/extractor.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator import binaryninja as binja @@ -30,7 +30,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor): def __init__(self, bv: binja.BinaryView): super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length))) self.bv = bv - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv)) self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv)) self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv)) @@ -48,7 +48,7 @@ def get_functions(self) -> Iterator[FunctionHandle]: for f in self.bv.functions: yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binja.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -63,13 +63,13 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb)) - def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: import capa.features.extractors.binja.helpers as binja_helpers - bb: Tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner + bb: tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner addr = bb[0].start for text, length in bb[0]: diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index d5bb5a7c5..b3426212c 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding @@ -18,7 +18,7 @@ from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name -def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]: +def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]: """check segment for embedded PE""" start = 0 if bv.view_type == "PE" and seg.start == bv.start: @@ -32,13 +32,13 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset) -def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract embedded PE features""" for seg in bv.segments: yield from check_segment_for_pe(bv, seg) -def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract function exports""" for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol): if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]: @@ -72,7 +72,7 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address) -def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract function imports 1. imports by ordinal: @@ -96,19 +96,19 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address yield Import(name), addr -def extract_file_section_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract section names""" for name, section in bv.sections.items(): yield Section(name), AbsoluteVirtualAddress(section.start) -def extract_file_strings(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract ASCII and UTF-16 LE strings""" for s in bv.strings: yield String(s.value), FileOffsetAddress(s.start) -def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """ extract the names of statically-linked library functions. """ @@ -127,7 +127,7 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre yield FunctionName(name[1:]), sym.address -def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_format(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: view_type = bv.view_type if view_type in ["PE", "COFF"]: yield Format(FORMAT_PE), NO_ADDRESS @@ -140,7 +140,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: raise NotImplementedError(f"unexpected file format: {view_type}") -def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_features(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract file features""" for file_handler in FILE_HANDLERS: for feature, addr in file_handler(bv): diff --git a/capa/features/extractors/binja/function.py b/capa/features/extractors/binja/function.py index 520de0b3f..058f98a94 100644 --- a/capa/features/extractors/binja/function.py +++ b/capa/features/extractors/binja/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation @@ -95,7 +95,7 @@ def extract_function_name(fh: FunctionHandle): yield FunctionName(name[1:]), sym.address -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/binja/global_.py b/capa/features/extractors/binja/global_.py index 7430d7405..4f774ffe7 100644 --- a/capa/features/extractors/binja/global_.py +++ b/capa/features/extractors/binja/global_.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from binaryninja import BinaryView @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) -def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_os(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: name = bv.platform.name if "-" in name: name = name.split("-")[0] @@ -45,7 +45,7 @@ def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: return -def extract_arch(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: arch = bv.arch.name if arch == "x86_64": yield Arch(ARCH_AMD64), NO_ADDRESS diff --git a/capa/features/extractors/binja/helpers.py b/capa/features/extractors/binja/helpers.py index 0ce0f073b..27e8d29e0 100644 --- a/capa/features/extractors/binja/helpers.py +++ b/capa/features/extractors/binja/helpers.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import re -from typing import List, Callable +from typing import Callable from dataclasses import dataclass from binaryninja import BinaryView, LowLevelILInstruction @@ -17,7 +17,7 @@ class DisassemblyInstruction: address: int length: int - text: List[InstructionTextToken] + text: list[InstructionTextToken] LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool] @@ -54,7 +54,7 @@ def unmangle_c_name(name: str) -> str: def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str: - s: List[str] = [] + s: list[str] = [] while len(s) < max_len: try: c = bv.read(offset + len(s), 1)[0] diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index f2b8fefc2..0e8b74ea8 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Any, List, Tuple, Iterator, Optional +from typing import Any, Iterator, Optional from binaryninja import Function from binaryninja import BasicBlock as BinjaBasicBlock @@ -64,7 +64,7 @@ def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]: return None -def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction API features @@ -123,7 +123,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction number features example: @@ -131,7 +131,7 @@ def extract_insn_number_features( """ func: Function = fh.inner - results: List[Tuple[Any[Number, OperandNumber], Address]] = [] + results: list[tuple[Any[Number, OperandNumber], Address]] = [] def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool: if il.operation == LowLevelILOperation.LLIL_LOAD: @@ -162,7 +162,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index yield from results -def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse referenced byte sequences example: @@ -209,7 +209,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index def extract_insn_string_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction string features @@ -266,7 +266,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction structure offset features @@ -275,7 +275,7 @@ def extract_insn_offset_features( """ func: Function = fh.inner - results: List[Tuple[Any[Offset, OperandOffset], Address]] = [] + results: list[tuple[Any[Offset, OperandOffset], Address]] = [] address_size = func.view.arch.address_size * 8 def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool: @@ -353,7 +353,7 @@ def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInst def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction non-zeroing XOR instruction ignore expected non-zeroing XORs, e.g. security cookies @@ -382,7 +382,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index def extract_insn_mnemonic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction mnemonic features""" insn: DisassemblyInstruction = ih.inner yield Mnemonic(insn.text[0].text), ih.address @@ -390,7 +390,7 @@ def extract_insn_mnemonic_features( def extract_insn_obfs_call_plus_5_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse call $+5 instruction from the given instruction. """ @@ -401,7 +401,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features( def extract_insn_peb_access_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction peb access fs:[0x30] on x86, gs:[0x60] on x64 @@ -444,7 +444,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILOperation, index: def extract_insn_segment_access_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction fs or gs access""" func: Function = fh.inner @@ -471,7 +471,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index def extract_insn_cross_section_cflow( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """inspect the instruction for a CALL or JMP that crosses section boundaries""" func: Function = fh.inner bv: BinaryView = func.view @@ -491,7 +491,7 @@ def extract_insn_cross_section_cflow( yield Characteristic("cross section flow"), ih.address -def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope @@ -534,7 +534,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 @@ -562,7 +562,7 @@ def extract_function_indirect_call_characteristic_features( yield Characteristic("indirect call"), ih.address -def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: for feature, ea in inst_handler(f, bbh, insn): diff --git a/capa/features/extractors/cape/call.py b/capa/features/extractors/cape/call.py index 0bee22fcc..229edf155 100644 --- a/capa/features/extractors/cape/call.py +++ b/capa/features/extractors/cape/call.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator import capa.features.extractors.helpers from capa.helpers import assert_never @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) -def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: """ this method extracts the given call's features (such as API name and arguments), and returns them as API, Number, and String features. @@ -55,7 +55,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - yield API(name), ch.address -def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: for handler in CALL_HANDLERS: for feature, addr in handler(ph, th, ch): yield feature, addr diff --git a/capa/features/extractors/cape/extractor.py b/capa/features/extractors/cape/extractor.py index 923781aeb..fd042f570 100644 --- a/capa/features/extractors/cape/extractor.py +++ b/capa/features/extractors/cape/extractor.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, Tuple, Union, Iterator +from typing import Union, Iterator import capa.features.extractors.cape.call import capa.features.extractors.cape.file @@ -50,16 +50,16 @@ def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]: assert self.report.static is not None and self.report.static.pe is not None return AbsoluteVirtualAddress(self.report.static.pe.imagebase) - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: yield from self.global_features - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.cape.file.extract_features(self.report) def get_processes(self) -> Iterator[ProcessHandle]: yield from capa.features.extractors.cape.file.get_processes(self.report) - def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.cape.process.extract_features(ph) def get_process_name(self, ph) -> str: @@ -69,7 +69,7 @@ def get_process_name(self, ph) -> str: def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: yield from capa.features.extractors.cape.process.get_threads(ph) - def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]: if False: # force this routine to be a generator, # but we don't actually have any elements to generate. @@ -81,7 +81,7 @@ def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle] def extract_call_features( self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.cape.call.extract_features(ph, th, ch) def get_call_name(self, ph, th, ch) -> str: @@ -122,7 +122,7 @@ def get_call_name(self, ph, th, ch) -> str: return "".join(parts) @classmethod - def from_report(cls, report: Dict) -> "CapeExtractor": + def from_report(cls, report: dict) -> "CapeExtractor": cr = CapeReport.model_validate(report) if cr.info.version not in TESTED_VERSIONS: diff --git a/capa/features/extractors/cape/file.py b/capa/features/extractors/cape/file.py index 3143504c0..945b22089 100644 --- a/capa/features/extractors/cape/file.py +++ b/capa/features/extractors/cape/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.file import Export, Import, Section from capa.features.common import String, Feature @@ -41,7 +41,7 @@ def get_processes(report: CapeReport) -> Iterator[ProcessHandle]: seen_processes[addr].append(process) -def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]: """ extract imported function names """ @@ -62,57 +62,57 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address] yield Import(name), AbsoluteVirtualAddress(function.address) -def extract_export_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_export_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]: assert report.static is not None and report.static.pe is not None for function in report.static.pe.exports: yield Export(function.name), AbsoluteVirtualAddress(function.address) -def extract_section_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_section_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]: assert report.static is not None and report.static.pe is not None for section in report.static.pe.sections: yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address) -def extract_file_strings(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(report: CapeReport) -> Iterator[tuple[Feature, Address]]: if report.strings is not None: for string in report.strings: yield String(string), NO_ADDRESS -def extract_used_regkeys(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_regkeys(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for regkey in report.behavior.summary.keys: yield String(regkey), NO_ADDRESS -def extract_used_files(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_files(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for file in report.behavior.summary.files: yield String(file), NO_ADDRESS -def extract_used_mutexes(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_mutexes(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for mutex in report.behavior.summary.mutexes: yield String(mutex), NO_ADDRESS -def extract_used_commands(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_commands(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for cmd in report.behavior.summary.executed_commands: yield String(cmd), NO_ADDRESS -def extract_used_apis(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_apis(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for symbol in report.behavior.summary.resolved_apis: yield String(symbol), NO_ADDRESS -def extract_used_services(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_services(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for svc in report.behavior.summary.created_services: yield String(svc), NO_ADDRESS for svc in report.behavior.summary.started_services: yield String(svc), NO_ADDRESS -def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for handler in FILE_HANDLERS: for feature, addr in handler(report): yield feature, addr diff --git a/capa/features/extractors/cape/global_.py b/capa/features/extractors/cape/global_.py index b73e5ab40..a3b23a7f7 100644 --- a/capa/features/extractors/cape/global_.py +++ b/capa/features/extractors/cape/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import ( OS, @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) -def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]: if "Intel 80386" in report.target.file.type: yield Arch(ARCH_I386), NO_ADDRESS elif "x86-64" in report.target.file.type: @@ -40,7 +40,7 @@ def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: ) -def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]: if "PE" in report.target.file.type: yield Format(FORMAT_PE), NO_ADDRESS elif "ELF" in report.target.file.type: @@ -52,7 +52,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: ) -def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]: # this variable contains the output of the file command file_output = report.target.file.type @@ -80,7 +80,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: yield OS(OS_ANY), NO_ADDRESS -def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for global_handler in GLOBAL_HANDLER: for feature, addr in global_handler(report): yield feature, addr diff --git a/capa/features/extractors/cape/helpers.py b/capa/features/extractors/cape/helpers.py index 31dc6c91b..46c584017 100644 --- a/capa/features/extractors/cape/helpers.py +++ b/capa/features/extractors/cape/helpers.py @@ -6,12 +6,12 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Any, Dict, List +from typing import Any from capa.features.extractors.base_extractor import ProcessHandle -def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]: +def find_process(processes: list[dict[str, Any]], ph: ProcessHandle) -> dict[str, Any]: """ find a specific process identified by a process handler. diff --git a/capa/features/extractors/cape/models.py b/capa/features/extractors/cape/models.py index 61e60f716..7aa2c651f 100644 --- a/capa/features/extractors/cape/models.py +++ b/capa/features/extractors/cape/models.py @@ -6,10 +6,10 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import binascii -from typing import Any, Dict, List, Union, Literal, Optional +from typing import Any, Union, Literal, Optional, TypeAlias from pydantic import Field, BaseModel, ConfigDict -from typing_extensions import Annotated, TypeAlias +from typing_extensions import Annotated from pydantic.functional_validators import BeforeValidator @@ -59,11 +59,11 @@ class FlexibleModel(BaseModel): # in a field with this type. # then we can update the model with the discovered shape. TODO: TypeAlias = None -ListTODO: TypeAlias = List[None] +ListTODO: TypeAlias = list[None] DictTODO: TypeAlias = ExactModel -EmptyDict: TypeAlias = BaseModel -EmptyList: TypeAlias = List[Any] +Emptydict: TypeAlias = BaseModel +EmptyList: TypeAlias = list[Any] class Info(FlexibleModel): @@ -77,7 +77,7 @@ class ImportedSymbol(ExactModel): class ImportedDll(ExactModel): dll: str - imports: List[ImportedSymbol] + imports: list[ImportedSymbol] class DirectoryEntry(ExactModel): @@ -149,7 +149,7 @@ class Signer(ExactModel): aux_valid: Optional[bool] = None aux_error: Optional[bool] = None aux_error_desc: Optional[str] = None - aux_signers: Optional[List[AuxSigner]] = None + aux_signers: Optional[list[AuxSigner]] = None class Overlay(ExactModel): @@ -178,22 +178,22 @@ class PE(ExactModel): pdbpath: Optional[str] = None timestamp: str - # List[ImportedDll], or Dict[basename(dll), ImportedDll] - imports: Union[List[ImportedDll], Dict[str, ImportedDll]] + # list[ImportedDll], or dict[basename(dll), ImportedDll] + imports: Union[list[ImportedDll], dict[str, ImportedDll]] imported_dll_count: Optional[int] = None imphash: str exported_dll_name: Optional[str] = None - exports: List[ExportedSymbol] + exports: list[ExportedSymbol] - dirents: List[DirectoryEntry] - sections: List[Section] + dirents: list[DirectoryEntry] + sections: list[Section] ep_bytes: Optional[HexBytes] = None overlay: Optional[Overlay] = None - resources: List[Resource] - versioninfo: List[KV] + resources: list[Resource] + versioninfo: list[KV] # base64 encoded data icon: Optional[str] = None @@ -204,7 +204,7 @@ class PE(ExactModel): # short hex string icon_dhash: Optional[str] = None - digital_signers: List[DigitalSigner] + digital_signers: list[DigitalSigner] guest_signers: Signer @@ -217,9 +217,9 @@ class File(FlexibleModel): cape_type: Optional[str] = None pid: Optional[Union[int, Literal[""]]] = None - name: Union[List[str], str] + name: Union[list[str], str] path: str - guest_paths: Union[List[str], str, None] + guest_paths: Union[list[str], str, None] timestamp: Optional[str] = None # @@ -244,7 +244,7 @@ class File(FlexibleModel): ep_bytes: Optional[HexBytes] = None entrypoint: Optional[int] = None data: Optional[str] = None - strings: Optional[List[str]] = None + strings: Optional[list[str]] = None # # detections (skip) @@ -283,7 +283,7 @@ class Call(ExactModel): api: str - arguments: List[Argument] + arguments: list[Argument] status: bool return_: HexInt = Field(alias="return") pretty_return: Optional[str] = None @@ -304,9 +304,9 @@ class Process(ExactModel): parent_id: int module_path: str first_seen: str - calls: List[Call] - threads: List[int] - environ: Dict[str, str] + calls: list[Call] + threads: list[int] + environ: dict[str, str] class ProcessTree(ExactModel): @@ -314,25 +314,25 @@ class ProcessTree(ExactModel): pid: int parent_id: int module_path: str - threads: List[int] - environ: Dict[str, str] - children: List["ProcessTree"] + threads: list[int] + environ: dict[str, str] + children: list["ProcessTree"] class Summary(ExactModel): - files: List[str] - read_files: List[str] - write_files: List[str] - delete_files: List[str] - keys: List[str] - read_keys: List[str] - write_keys: List[str] - delete_keys: List[str] - executed_commands: List[str] - resolved_apis: List[str] - mutexes: List[str] - created_services: List[str] - started_services: List[str] + files: list[str] + read_files: list[str] + write_files: list[str] + delete_files: list[str] + keys: list[str] + read_keys: list[str] + write_keys: list[str] + delete_keys: list[str] + executed_commands: list[str] + resolved_apis: list[str] + mutexes: list[str] + created_services: list[str] + started_services: list[str] class EncryptedBuffer(ExactModel): @@ -349,12 +349,12 @@ class Behavior(ExactModel): summary: Summary # list of processes, of threads, of calls - processes: List[Process] + processes: list[Process] # tree of processes - processtree: List[ProcessTree] + processtree: list[ProcessTree] - anomaly: List[str] - encryptedbuffers: List[EncryptedBuffer] + anomaly: list[str] + encryptedbuffers: list[EncryptedBuffer] # these are small objects that describe atomic events, # like file move, registry access. # we'll detect the same with our API call analysis. @@ -373,7 +373,7 @@ class Static(ExactModel): class Cape(ExactModel): - payloads: List[ProcessFile] + payloads: list[ProcessFile] configs: Skip = None @@ -389,7 +389,7 @@ class CapeReport(FlexibleModel): # static analysis results # static: Optional[Static] = None - strings: Optional[List[str]] = None + strings: Optional[list[str]] = None # # dynamic analysis results @@ -398,9 +398,9 @@ class CapeReport(FlexibleModel): behavior: Behavior # post-processed results: payloads and extracted configs - CAPE: Optional[Union[Cape, List]] = None - dropped: Optional[List[File]] = None - procdump: Optional[List[ProcessFile]] = None + CAPE: Optional[Union[Cape, list]] = None + dropped: Optional[list[File]] = None + procdump: Optional[list[ProcessFile]] = None procmemory: ListTODO # ========================================================================= @@ -437,7 +437,7 @@ class CapeReport(FlexibleModel): malfamily_tag: Optional[str] = None malscore: float detections: Skip = None - detections2pid: Optional[Dict[int, List[str]]] = None + detections2pid: Optional[dict[int, list[str]]] = None # AV detections for the sample. virustotal: Skip = None diff --git a/capa/features/extractors/cape/process.py b/capa/features/extractors/cape/process.py index 909a9637e..d2bc260c5 100644 --- a/capa/features/extractors/cape/process.py +++ b/capa/features/extractors/cape/process.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator +from typing import Iterator from capa.features.common import String, Feature from capa.features.address import Address, ThreadAddress @@ -22,14 +22,14 @@ def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]: get the threads associated with a given process """ process: Process = ph.inner - threads: List[int] = process.threads + threads: list[int] = process.threads for thread in threads: address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread) yield ThreadHandle(address=address, inner={}) -def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: """ extract strings from a process' provided environment variables. """ @@ -39,7 +39,7 @@ def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Addres yield String(value), ph.address -def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: for handler in PROCESS_HANDLERS: for feature, addr in handler(ph): yield feature, addr diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py index aa2144c73..e2e95a857 100644 --- a/capa/features/extractors/common.py +++ b/capa/features/extractors/common.py @@ -10,7 +10,7 @@ import logging import binascii import contextlib -from typing import Tuple, Iterator +from typing import Iterator import pefile @@ -45,7 +45,7 @@ MATCH_JSON_OBJECT = b'{"' -def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]: +def extract_file_strings(buf: bytes, **kwargs) -> Iterator[tuple[String, Address]]: """ extract ASCII and UTF-16 LE strings from file """ @@ -56,7 +56,7 @@ def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address yield String(s.s), FileOffsetAddress(s.offset) -def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_format(buf: bytes) -> Iterator[tuple[Feature, Address]]: if buf.startswith(MATCH_PE): yield Format(FORMAT_PE), NO_ADDRESS elif buf.startswith(MATCH_ELF): @@ -79,7 +79,7 @@ def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]: return -def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(buf) -> Iterator[tuple[Feature, Address]]: if buf.startswith(MATCH_PE): yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf)) @@ -111,7 +111,7 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]: return -def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]: +def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]: if os != OS_AUTO: yield OS(os), NO_ADDRESS diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py index fae20db36..8f1efcf4f 100644 --- a/capa/features/extractors/dnfile/extractor.py +++ b/capa/features/extractors/dnfile/extractor.py @@ -8,7 +8,7 @@ from __future__ import annotations -from typing import Dict, List, Tuple, Union, Iterator, Optional +from typing import Union, Iterator, Optional from pathlib import Path import dnfile @@ -41,11 +41,11 @@ class DnFileFeatureExtractorCache: def __init__(self, pe: dnfile.dnPE): - self.imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} - self.native_imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} - self.methods: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} - self.fields: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} - self.types: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.native_imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.methods: dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.fields: dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.types: dict[int, Union[DnType, DnUnmanagedMethod]] = {} for import_ in get_dotnet_managed_imports(pe): self.imports[import_.token] = import_ @@ -84,7 +84,7 @@ def __init__(self, path: Path): self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe) # pre-compute these because we'll yield them at *every* scope. - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format()) self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe)) self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe)) @@ -100,7 +100,7 @@ def extract_file_features(self): def get_functions(self) -> Iterator[FunctionHandle]: # create a method lookup table - methods: Dict[Address, FunctionHandle] = {} + methods: dict[Address, FunctionHandle] = {} for token, method in get_dotnet_managed_method_bodies(self.pe): fh: FunctionHandle = FunctionHandle( address=DNTokenAddress(token), @@ -136,7 +136,7 @@ def get_functions(self) -> Iterator[FunctionHandle]: yield from methods.values() - def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.dnfile.function.extract_features(fh) def get_basic_blocks(self, f) -> Iterator[BBHandle]: @@ -157,5 +157,5 @@ def get_instructions(self, fh, bbh): inner=insn, ) - def extract_insn_features(self, fh, bbh, ih) -> Iterator[Tuple[Feature, Address]]: + def extract_insn_features(self, fh, bbh, ih) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih) diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index 47f63ca05..b4748b66e 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -8,7 +8,7 @@ from __future__ import annotations -from typing import Tuple, Iterator +from typing import Iterator import dnfile @@ -18,35 +18,35 @@ from capa.features.address import Address -def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]: +def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[tuple[Import, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe) -def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, Address]]: +def extract_file_format(pe: dnfile.dnPE) -> Iterator[tuple[Format, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe) -def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, Address]]: +def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[tuple[FunctionName, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe) -def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]: +def extract_file_strings(pe: dnfile.dnPE) -> Iterator[tuple[String, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe) -def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]: +def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[tuple[Characteristic, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe) -def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]: +def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[tuple[Namespace, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe) -def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]: +def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[tuple[Class, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe) -def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: +def extract_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]: for file_handler in FILE_HANDLERS: for feature, address in file_handler(pe): yield feature, address diff --git a/capa/features/extractors/dnfile/function.py b/capa/features/extractors/dnfile/function.py index ed1bdf8a0..a1fc60d81 100644 --- a/capa/features/extractors/dnfile/function.py +++ b/capa/features/extractors/dnfile/function.py @@ -9,7 +9,7 @@ from __future__ import annotations import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import Feature, Characteristic from capa.features.address import Address @@ -18,30 +18,30 @@ logger = logging.getLogger(__name__) -def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: +def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]: """extract callers to a function""" for dest in fh.ctx["calls_to"]: yield Characteristic("calls to"), dest -def extract_function_calls_from(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: +def extract_function_calls_from(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]: """extract callers from a function""" for src in fh.ctx["calls_from"]: yield Characteristic("calls from"), src -def extract_recursive_call(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: +def extract_recursive_call(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]: """extract recursive function call""" if fh.address in fh.ctx["calls_to"]: yield Characteristic("recursive call"), fh.address -def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: +def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]: """extract loop indicators from a function""" raise NotImplementedError() -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index d7f4499ec..deabafa1a 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -9,7 +9,7 @@ from __future__ import annotations import logging -from typing import Dict, Tuple, Union, Iterator, Optional +from typing import Union, Iterator, Optional import dnfile from dncil.cil.body import CilMethodBody @@ -144,7 +144,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: ) -def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: +def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]: """get MethodDef methods used to access properties see https://www.ntcore.com/files/dotnetformat.htm @@ -194,7 +194,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: """ nested_class_table = get_dotnet_nested_class_table_index(pe) - accessor_map: Dict[int, str] = {} + accessor_map: dict[int, str] = {} for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): accessor_map[methoddef] = methoddef_access @@ -252,7 +252,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name) -def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: +def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]: """get managed methods from MethodDef table""" for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number): assert isinstance(method_def, dnfile.mdtable.MethodDefRow) @@ -332,7 +332,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O def resolve_nested_typedef_name( nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE -) -> Tuple[str, Tuple[str, ...]]: +) -> tuple[str, tuple[str, ...]]: """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" if index in nested_class_table: @@ -368,7 +368,7 @@ def resolve_nested_typedef_name( def resolve_nested_typeref_name( index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE -) -> Tuple[str, Tuple[str, ...]]: +) -> tuple[str, tuple[str, ...]]: """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" # If the ResolutionScope decodes to a typeRef type then it is nested if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): @@ -398,7 +398,7 @@ def resolve_nested_typeref_name( return str(typeref.TypeNamespace), (str(typeref.TypeName),) -def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]: +def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict[int, int]: """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" nested_class_table = {} @@ -442,7 +442,7 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: return not bool(pe.net.Flags.CLR_ILONLY) -def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]: +def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[tuple[int, dnfile.base.MDTableRow]]: assert pe.net is not None assert pe.net.mdtables is not None diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index e6e9f9406..257af99fa 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -9,7 +9,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Tuple, Union, Iterator, Optional +from typing import TYPE_CHECKING, Union, Iterator, Optional if TYPE_CHECKING: from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache @@ -61,7 +61,7 @@ def get_callee( return callee -def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse instruction API features""" if ih.inner.opcode not in ( OpCodes.Call, @@ -83,7 +83,7 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato yield API(name), ih.address -def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse instruction property features""" name: Optional[str] = None access: Optional[str] = None @@ -118,7 +118,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It def extract_insn_namespace_class_features( fh: FunctionHandle, bh, ih: InsnHandle -) -> Iterator[Tuple[Union[Namespace, Class], Address]]: +) -> Iterator[tuple[Union[Namespace, Class], Address]]: """parse instruction namespace and class features""" type_: Optional[Union[DnType, DnUnmanagedMethod]] = None @@ -173,13 +173,13 @@ def extract_insn_namespace_class_features( yield Namespace(type_.namespace), ih.address -def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse instruction number features""" if ih.inner.is_ldc(): yield Number(ih.inner.get_ldc()), ih.address -def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse instruction string features""" if not ih.inner.is_ldstr(): return @@ -197,7 +197,7 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter def extract_unmanaged_call_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Characteristic, Address]]: +) -> Iterator[tuple[Characteristic, Address]]: if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp): return @@ -209,7 +209,7 @@ def extract_unmanaged_call_characteristic_features( yield Characteristic("unmanaged call"), ih.address -def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: for feature, addr in inst_handler(fh, bbh, ih): diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 12aac5d61..7b5758ea0 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -6,17 +6,17 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Optional +from typing import Optional class DnType: def __init__( - self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None + self, token: int, class_: tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None ): self.token: int = token self.access: Optional[str] = access self.namespace: str = namespace - self.class_: Tuple[str, ...] = class_ + self.class_: tuple[str, ...] = class_ if member == ".ctor": member = "ctor" @@ -44,7 +44,7 @@ def __repr__(self): return str(self) @staticmethod - def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""): + def format_name(class_: tuple[str, ...], namespace: str = "", member: str = ""): if len(class_) > 1: class_str = "/".join(class_) # Concat items in tuple, separated by a "/" else: diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 5ab998579..bfdfcfa4a 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from pathlib import Path import dnfile @@ -48,12 +48,12 @@ logger = logging.getLogger(__name__) -def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]: +def extract_file_format(**kwargs) -> Iterator[tuple[Format, Address]]: yield Format(FORMAT_DOTNET), NO_ADDRESS yield Format(FORMAT_PE), NO_ADDRESS -def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]: +def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Import, Address]]: for method in get_dotnet_managed_imports(pe): # like System.IO.File::OpenRead yield Import(str(method)), DNTokenAddress(method.token) @@ -64,12 +64,12 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor yield Import(name), DNTokenAddress(imp.token) -def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]: +def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[FunctionName, Address]]: for method in get_dotnet_managed_methods(pe): yield FunctionName(str(method)), DNTokenAddress(method.token) -def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]: +def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Namespace, Address]]: """emit namespace features from TypeRef and TypeDef tables""" # namespaces may be referenced multiple times, so we need to filter @@ -93,7 +93,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple yield Namespace(namespace), NO_ADDRESS -def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: +def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" nested_class_table = get_dotnet_nested_class_table_index(pe) @@ -116,11 +116,11 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token) -def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]: +def extract_file_os(**kwargs) -> Iterator[tuple[OS, Address]]: yield OS(OS_ANY), NO_ADDRESS -def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]: +def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Arch, Address]]: # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 # .NET 4.5 added option: any CPU, 32-bit preferred assert pe.net is not None @@ -134,18 +134,18 @@ def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address yield Arch(ARCH_ANY), NO_ADDRESS -def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Address]]: +def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[String, Address]]: yield from capa.features.extractors.common.extract_file_strings(pe.__data__) def extract_file_mixed_mode_characteristic_features( pe: dnfile.dnPE, **kwargs -) -> Iterator[Tuple[Characteristic, Address]]: +) -> Iterator[tuple[Characteristic, Address]]: if is_dotnet_mixed_mode(pe): yield Characteristic("mixed mode"), NO_ADDRESS -def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: +def extract_file_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]: for file_handler in FILE_HANDLERS: for feature, addr in file_handler(pe=pe): # type: ignore yield feature, addr @@ -162,7 +162,7 @@ def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: ) -def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: +def extract_global_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]: for handler in GLOBAL_HANDLERS: for feature, va in handler(pe=pe): # type: ignore yield feature, va @@ -204,7 +204,7 @@ def is_dotnet_file(self) -> bool: def is_mixed_mode(self) -> bool: return is_dotnet_mixed_mode(self.pe) - def get_runtime_version(self) -> Tuple[int, int]: + def get_runtime_version(self) -> tuple[int, int]: assert self.pe.net is not None assert self.pe.net.struct is not None assert self.pe.net.struct.MajorRuntimeVersion is not None diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index 7d0e2a5ee..c6af7035c 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator import capa.features.extractors.helpers from capa.features.insn import API, Number @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: """ This method extracts the given call's features (such as API name and arguments), and returns them as API, Number, and String features. @@ -49,7 +49,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - yield API(name), ch.address -def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: for handler in CALL_HANDLERS: for feature, addr in handler(ph, th, ch): yield feature, addr diff --git a/capa/features/extractors/drakvuf/extractor.py b/capa/features/extractors/drakvuf/extractor.py index 1a4f5062e..e7fb69eff 100644 --- a/capa/features/extractors/drakvuf/extractor.py +++ b/capa/features/extractors/drakvuf/extractor.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Tuple, Union, Iterator +from typing import Union, Iterator import capa.features.extractors.drakvuf.call import capa.features.extractors.drakvuf.file @@ -39,7 +39,7 @@ def __init__(self, report: DrakvufReport): self.report: DrakvufReport = report # sort the api calls to prevent going through the entire list each time - self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = index_calls(report) + self.sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = index_calls(report) # pre-compute these because we'll yield them at *every* scope. self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report)) @@ -48,16 +48,16 @@ def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]: # DRAKVUF currently does not yield information about the PE's address return NO_ADDRESS - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: yield from self.global_features - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.drakvuf.file.extract_features(self.report) def get_processes(self) -> Iterator[ProcessHandle]: yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls) - def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.drakvuf.process.extract_features(ph) def get_process_name(self, ph: ProcessHandle) -> str: @@ -66,7 +66,7 @@ def get_process_name(self, ph: ProcessHandle) -> str: def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph) - def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]: if False: # force this routine to be a generator, # but we don't actually have any elements to generate. @@ -87,10 +87,10 @@ def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> def extract_call_features( self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch) @classmethod - def from_report(cls, report: Iterator[Dict]) -> "DrakvufExtractor": + def from_report(cls, report: Iterator[dict]) -> "DrakvufExtractor": dr = DrakvufReport.from_raw_report(report) return DrakvufExtractor(report=dr) diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index d93c354b2..f0d310ba9 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Tuple, Iterator +from typing import Iterator from capa.features.file import Import from capa.features.common import Feature @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) -> Iterator[ProcessHandle]: +def get_processes(calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]]) -> Iterator[ProcessHandle]: """ Get all the created processes for a sample. """ @@ -28,7 +28,7 @@ def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name}) -def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_import_names(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: """ Extract imported function names. """ @@ -43,7 +43,7 @@ def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Addre yield Import(name), AbsoluteVirtualAddress(function_address) -def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: for handler in FILE_HANDLERS: for feature, addr in handler(report): yield feature, addr diff --git a/capa/features/extractors/drakvuf/global_.py b/capa/features/extractors/drakvuf/global_.py index 00d18afc7..0475583b6 100644 --- a/capa/features/extractors/drakvuf/global_.py +++ b/capa/features/extractors/drakvuf/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature from capa.features.address import NO_ADDRESS, Address @@ -16,22 +16,22 @@ logger = logging.getLogger(__name__) -def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_format(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: # DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield Format(FORMAT_PE), NO_ADDRESS -def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_os(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: # DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield OS(OS_WINDOWS), NO_ADDRESS -def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: # DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield Arch(ARCH_AMD64), NO_ADDRESS -def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: for global_handler in GLOBAL_HANDLER: for feature, addr in global_handler(report): yield feature, addr diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py index 59708f5df..a47c62be7 100644 --- a/capa/features/extractors/drakvuf/helpers.py +++ b/capa/features/extractors/drakvuf/helpers.py @@ -7,16 +7,15 @@ # See the License for the specific language governing permissions and limitations under the License. import itertools -from typing import Dict, List from capa.features.address import ThreadAddress, ProcessAddress from capa.features.extractors.drakvuf.models import Call, DrakvufReport -def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]: +def index_calls(report: DrakvufReport) -> dict[ProcessAddress, dict[ThreadAddress, list[Call]]]: # this method organizes calls into processes and threads, and then sorts them based on # timestamp so that we can address individual calls per index (CallAddress requires call index) - result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {} + result: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = {} for call in itertools.chain(report.syscalls, report.apicalls): if call.pid == 0: # DRAKVUF captures api/native calls from all processes running on the system. diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index fbfd649c7..0af4b11e7 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Any, Dict, List, Iterator +from typing import Any, Iterator from pydantic import Field, BaseModel, ConfigDict, model_validator @@ -47,7 +47,7 @@ class LoadedDLL(ConciseModel): plugin_name: str = Field(alias="Plugin") event: str = Field(alias="Event") name: str = Field(alias="DllName") - imports: Dict[str, int] = Field(alias="Rva") + imports: dict[str, int] = Field(alias="Rva") class Call(ConciseModel): @@ -58,18 +58,18 @@ class Call(ConciseModel): pid: int = Field(alias="PID") tid: int = Field(alias="TID") name: str = Field(alias="Method") - arguments: Dict[str, str] + arguments: dict[str, str] class WinApiCall(Call): # This class models Windows API calls captured by DRAKVUF (DLLs, etc.). - arguments: Dict[str, str] = Field(alias="Arguments") + arguments: dict[str, str] = Field(alias="Arguments") event: str = Field(alias="Event") return_value: str = Field(alias="ReturnValue") @model_validator(mode="before") @classmethod - def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]: + def build_arguments(cls, values: dict[str, Any]) -> dict[str, Any]: args = values["Arguments"] values["Arguments"] = dict(arg.split("=", 1) for arg in args) return values @@ -100,7 +100,7 @@ class SystemCall(Call): @model_validator(mode="before") @classmethod - def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]: # DRAKVUF stores argument names and values as entries in the syscall's entry. # This model validator collects those arguments into a list in the model. values["arguments"] = { @@ -110,13 +110,13 @@ def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: class DrakvufReport(ConciseModel): - syscalls: List[SystemCall] = [] - apicalls: List[WinApiCall] = [] - discovered_dlls: List[DiscoveredDLL] = [] - loaded_dlls: List[LoadedDLL] = [] + syscalls: list[SystemCall] = [] + apicalls: list[WinApiCall] = [] + discovered_dlls: list[DiscoveredDLL] = [] + loaded_dlls: list[LoadedDLL] = [] @classmethod - def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport": + def from_raw_report(cls, entries: Iterator[dict]) -> "DrakvufReport": report = cls() for entry in entries: diff --git a/capa/features/extractors/drakvuf/process.py b/capa/features/extractors/drakvuf/process.py index 8b0819264..292e8af3c 100644 --- a/capa/features/extractors/drakvuf/process.py +++ b/capa/features/extractors/drakvuf/process.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Tuple, Iterator +from typing import Iterator from capa.features.common import String, Feature from capa.features.address import Address, ThreadAddress, ProcessAddress @@ -18,7 +18,7 @@ def get_threads( - calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle + calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle ) -> Iterator[ThreadHandle]: """ Get the threads associated with a given process. @@ -27,11 +27,11 @@ def get_threads( yield ThreadHandle(address=thread_addr, inner={}) -def extract_process_name(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_process_name(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: yield String(ph.inner["process_name"]), ph.address -def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: for handler in PROCESS_HANDLERS: for feature, addr in handler(ph): yield feature, addr diff --git a/capa/features/extractors/drakvuf/thread.py b/capa/features/extractors/drakvuf/thread.py index 5e72b51ab..830098579 100644 --- a/capa/features/extractors/drakvuf/thread.py +++ b/capa/features/extractors/drakvuf/thread.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Iterator +from typing import Iterator from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle @@ -17,7 +17,7 @@ def get_calls( - sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle, th: ThreadHandle + sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle, th: ThreadHandle ) -> Iterator[CallHandle]: for i, call in enumerate(sorted_calls[ph.address][th.address]): call_addr = DynamicCallAddress(thread=th.address, id=i) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 82c8c3da9..8ce0c7b4a 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -10,7 +10,7 @@ import itertools import collections from enum import Enum -from typing import TYPE_CHECKING, Set, Dict, List, Tuple, BinaryIO, Iterator, Optional +from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional from dataclasses import dataclass if TYPE_CHECKING: @@ -394,7 +394,7 @@ def linker(self): return read_cstr(phdr.buf, 0) @property - def versions_needed(self) -> Dict[str, Set[str]]: + def versions_needed(self) -> dict[str, set[str]]: # symbol version requirements are stored in the .gnu.version_r section, # which has type SHT_GNU_verneed (0x6ffffffe). # @@ -452,7 +452,7 @@ def versions_needed(self) -> Dict[str, Set[str]]: return {} @property - def dynamic_entries(self) -> Iterator[Tuple[int, int]]: + def dynamic_entries(self) -> Iterator[tuple[int, int]]: """ read the entries from the dynamic section, yielding the tag and value for each entry. @@ -547,7 +547,7 @@ def needed(self) -> Iterator[str]: logger.warning("failed to read DT_NEEDED entry: %s", str(e)) @property - def symtab(self) -> Optional[Tuple[Shdr, Shdr]]: + def symtab(self) -> Optional[tuple[Shdr, Shdr]]: """ fetch the Shdr for the symtab and the associated strtab. """ @@ -682,7 +682,7 @@ def __init__( symtab: Shdr, strtab: Shdr, ) -> None: - self.symbols: List[Symbol] = [] + self.symbols: list[Symbol] = [] self.symtab = symtab self.strtab = strtab diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 630f8024e..b63039a19 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import io import logging -from typing import Tuple, Iterator +from typing import Iterator from pathlib import Path from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection @@ -166,7 +166,7 @@ def extract_file_arch(elf: ELFFile, **kwargs): logger.warning("unsupported architecture: %s", arch) -def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]: +def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]: for file_handler in FILE_HANDLERS: for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore yield feature, addr @@ -182,7 +182,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i ) -def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]: +def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]: for global_handler in GLOBAL_HANDLERS: for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore yield feature, addr diff --git a/capa/features/extractors/ghidra/basicblock.py b/capa/features/extractors/ghidra/basicblock.py index b3271586e..a89586a60 100644 --- a/capa/features/extractors/ghidra/basicblock.py +++ b/capa/features/extractors/ghidra/basicblock.py @@ -8,7 +8,7 @@ import string import struct -from typing import Tuple, Iterator +from typing import Iterator import ghidra from ghidra.program.model.lang import OperandType @@ -97,7 +97,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock): return False -def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract stackstring indicators from basic block""" bb: ghidra.program.model.block.CodeBlock = bbh.inner @@ -105,7 +105,7 @@ def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[ yield Characteristic("stack string"), bbh.address -def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """check basic block for tight loop indicators""" bb: ghidra.program.model.block.CodeBlock = bbh.inner @@ -119,7 +119,7 @@ def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[F ) -def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """ extract features from the given basic block. @@ -127,7 +127,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur bb: the basic block to process. yields: - Tuple[Feature, int]: the features and their location found in this basic block. + tuple[Feature, int]: the features and their location found in this basic block. """ yield BasicBlock(), bbh.address for bb_handler in BASIC_BLOCK_HANDLERS: diff --git a/capa/features/extractors/ghidra/extractor.py b/capa/features/extractors/ghidra/extractor.py index c7ed637bb..c694e0865 100644 --- a/capa/features/extractors/ghidra/extractor.py +++ b/capa/features/extractors/ghidra/extractor.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator import capa.features.extractors.ghidra.file import capa.features.extractors.ghidra.insn @@ -40,7 +40,7 @@ def __init__(self): ) ) - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format()) self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os()) self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch()) @@ -73,7 +73,7 @@ def get_function(addr: int) -> FunctionHandle: func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821 return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.ghidra.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -81,7 +81,7 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: yield from ghidra_helpers.get_function_blocks(fh) - def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py index 0e7407b28..2e18cecdc 100644 --- a/capa/features/extractors/ghidra/file.py +++ b/capa/features/extractors/ghidra/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import re import struct -from typing import List, Tuple, Iterator +from typing import Iterator from ghidra.program.model.symbol import SourceType, SymbolType @@ -22,7 +22,7 @@ MAX_OFFSET_PE_AFTER_MZ = 0x200 -def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) -> Iterator[Tuple[int, int]]: +def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]]) -> Iterator[tuple[int, int]]: """check segment for embedded PE adapted for Ghidra from: @@ -60,11 +60,11 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) yield off, i -def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: +def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]: """extract embedded PE features""" # pre-compute XOR pairs - mz_xor: List[Tuple[bytes, bytes, int]] = [ + mz_xor: list[tuple[bytes, bytes, int]] = [ ( capa.features.extractors.helpers.xor_static(b"MZ", i), capa.features.extractors.helpers.xor_static(b"PE", i), @@ -84,14 +84,14 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: yield Characteristic("embedded pe"), FileOffsetAddress(ea) -def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names() -> Iterator[tuple[Feature, Address]]: """extract function exports""" st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821 for addr in st.getExternalEntryPointIterator(): yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset()) -def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names() -> Iterator[tuple[Feature, Address]]: """extract function imports 1. imports by ordinal: @@ -116,14 +116,14 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: yield Import(name), AbsoluteVirtualAddress(addr) -def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names() -> Iterator[tuple[Feature, Address]]: """extract section names""" for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821 yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset()) -def extract_file_strings() -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings() -> Iterator[tuple[Feature, Address]]: """extract ASCII and UTF-16 LE strings""" for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821 @@ -141,7 +141,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]: yield String(s.s), FileOffsetAddress(offset) -def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_function_names() -> Iterator[tuple[Feature, Address]]: """ extract the names of statically-linked library functions. """ @@ -162,7 +162,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: yield FunctionName(name[1:]), addr -def extract_file_format() -> Iterator[Tuple[Feature, Address]]: +def extract_file_format() -> Iterator[tuple[Feature, Address]]: ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821 if "PE" in ef: yield Format(FORMAT_PE), NO_ADDRESS @@ -175,7 +175,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]: raise NotImplementedError(f"unexpected file format: {ef}") -def extract_features() -> Iterator[Tuple[Feature, Address]]: +def extract_features() -> Iterator[tuple[Feature, Address]]: """extract file features""" for file_handler in FILE_HANDLERS: for feature, addr in file_handler(): diff --git a/capa/features/extractors/ghidra/function.py b/capa/features/extractors/ghidra/function.py index d31ba86a6..59f0c7212 100644 --- a/capa/features/extractors/ghidra/function.py +++ b/capa/features/extractors/ghidra/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator import ghidra from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator @@ -49,7 +49,7 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset()) -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/ghidra/global_.py b/capa/features/extractors/ghidra/global_.py index 0df58a084..65e8d6a21 100644 --- a/capa/features/extractors/ghidra/global_.py +++ b/capa/features/extractors/ghidra/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging import contextlib -from typing import Tuple, Iterator +from typing import Iterator import capa.ghidra.helpers import capa.features.extractors.elf @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) -def extract_os() -> Iterator[Tuple[Feature, Address]]: +def extract_os() -> Iterator[tuple[Feature, Address]]: format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821 if "PE" in format_name: @@ -45,7 +45,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]: return -def extract_arch() -> Iterator[Tuple[Feature, Address]]: +def extract_arch() -> Iterator[tuple[Feature, Address]]: lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821 if "x86" in lang_id and "64" in lang_id: diff --git a/capa/features/extractors/ghidra/helpers.py b/capa/features/extractors/ghidra/helpers.py index 22e0ed6d4..7f4a3790e 100644 --- a/capa/features/extractors/ghidra/helpers.py +++ b/capa/features/extractors/ghidra/helpers.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List, Iterator +from typing import Iterator import ghidra import java.lang @@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle -def ints_to_bytes(bytez: List[int]) -> bytes: +def ints_to_bytes(bytez: list[int]) -> bytes: """convert Java signed ints to Python bytes args: @@ -83,10 +83,10 @@ def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]: yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn) -def get_file_imports() -> Dict[int, List[str]]: +def get_file_imports() -> dict[int, list[str]]: """get all import names & addrs""" - import_dict: Dict[int, List[str]] = {} + import_dict: dict[int, list[str]] = {} for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821 for r in f.getSymbol().getReferences(): @@ -110,7 +110,7 @@ def get_file_imports() -> Dict[int, List[str]]: return import_dict -def get_file_externs() -> Dict[int, List[str]]: +def get_file_externs() -> dict[int, list[str]]: """ Gets function names & addresses of statically-linked library functions @@ -124,7 +124,7 @@ def get_file_externs() -> Dict[int, List[str]]: - Note: See Symbol Table labels """ - extern_dict: Dict[int, List[str]] = {} + extern_dict: dict[int, list[str]] = {} for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821 # .isExternal() misses more than this config for the function symbols @@ -143,7 +143,7 @@ def get_file_externs() -> Dict[int, List[str]]: return extern_dict -def map_fake_import_addrs() -> Dict[int, List[int]]: +def map_fake_import_addrs() -> dict[int, list[int]]: """ Map ghidra's fake import entrypoints to their real addresses @@ -162,7 +162,7 @@ def map_fake_import_addrs() -> Dict[int, List[int]]: - 0x473090 -> PTR_CreateServiceW_00473090 - 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress) """ - fake_dict: Dict[int, List[int]] = {} + fake_dict: dict[int, list[int]] = {} for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821 for r in f.getSymbol().getReferences(): @@ -174,9 +174,9 @@ def map_fake_import_addrs() -> Dict[int, List[int]]: def check_addr_for_api( addr: ghidra.program.model.address.Address, - fakes: Dict[int, List[int]], - imports: Dict[int, List[str]], - externs: Dict[int, List[str]], + fakes: dict[int, list[int]], + imports: dict[int, list[str]], + externs: dict[int, list[str]], ) -> bool: offset = addr.getOffset() diff --git a/capa/features/extractors/ghidra/insn.py b/capa/features/extractors/ghidra/insn.py index c9f2dada3..cd8d65d02 100644 --- a/capa/features/extractors/ghidra/insn.py +++ b/capa/features/extractors/ghidra/insn.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Any, Dict, Tuple, Iterator +from typing import Any, Iterator import ghidra from ghidra.program.model.lang import OperandType @@ -26,21 +26,21 @@ OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS -def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_imports(ctx: dict[str, Any]) -> dict[int, Any]: """Populate the import cache for this context""" if "imports_cache" not in ctx: ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports() return ctx["imports_cache"] -def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_externs(ctx: dict[str, Any]) -> dict[int, Any]: """Populate the externs cache for this context""" if "externs_cache" not in ctx: ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs() return ctx["externs_cache"] -def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_fakes(ctx: dict[str, Any]) -> dict[int, Any]: """Populate the fake import addrs cache for this context""" if "fakes_cache" not in ctx: ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs() @@ -48,7 +48,7 @@ def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]: def check_for_api_call( - insn, externs: Dict[int, Any], fakes: Dict[int, Any], imports: Dict[int, Any], imp_or_ex: bool + insn, externs: dict[int, Any], fakes: dict[int, Any], imports: dict[int, Any], imp_or_ex: bool ) -> Iterator[Any]: """check instruction for API call @@ -110,7 +110,7 @@ def check_for_api_call( yield info -def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: insn: ghidra.program.database.code.InstructionDB = ih.inner if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn): @@ -131,7 +131,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) yield API(ext), ih.address -def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction number features example: @@ -186,7 +186,7 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl yield OperandOffset(i, const), addr -def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction structure offset features @@ -219,7 +219,7 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl yield OperandOffset(i, op_off), ih.address -def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse referenced byte sequences @@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle yield Bytes(extracted_bytes), ih.address -def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction string features @@ -249,7 +249,7 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl def extract_insn_mnemonic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction mnemonic features""" insn: ghidra.program.database.code.InstructionDB = ih.inner @@ -258,7 +258,7 @@ def extract_insn_mnemonic_features( def extract_insn_obfs_call_plus_5_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse call $+5 instruction from the given instruction. """ @@ -279,7 +279,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features( def extract_insn_segment_access_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction fs or gs access""" insn: ghidra.program.database.code.InstructionDB = ih.inner @@ -294,7 +294,7 @@ def extract_insn_segment_access_features( def extract_insn_peb_access_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction peb access fs:[0x30] on x86, gs:[0x60] on x64 @@ -310,7 +310,7 @@ def extract_insn_peb_access_characteristic_features( def extract_insn_cross_section_cflow( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """inspect the instruction for a CALL or JMP that crosses section boundaries""" insn: ghidra.program.database.code.InstructionDB = ih.inner @@ -364,7 +364,7 @@ def extract_function_calls_from( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle, -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope @@ -393,7 +393,7 @@ def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle, -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 @@ -442,7 +442,7 @@ def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle, -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: f: ghidra.program.database.function.FunctionDB = fh.inner insn: ghidra.program.database.code.InstructionDB = ih.inner @@ -461,7 +461,7 @@ def extract_features( fh: FunctionHandle, bb: BBHandle, insn: InsnHandle, -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: for insn_handler in INSTRUCTION_HANDLERS: for feature, addr in insn_handler(fh, bb, insn): yield feature, addr diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index 09f76f589..3f1060200 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -8,7 +8,7 @@ import struct import builtins -from typing import Tuple, Iterator +from typing import Iterator MIN_STACKSTRING_LEN = 8 @@ -119,7 +119,7 @@ def twos_complement(val: int, bits: int) -> int: return val -def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[Tuple[int, int]]: +def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]: """ Generate (offset, key) tuples of embedded PEs diff --git a/capa/features/extractors/ida/basicblock.py b/capa/features/extractors/ida/basicblock.py index 88a1247ea..2a11bb509 100644 --- a/capa/features/extractors/ida/basicblock.py +++ b/capa/features/extractors/ida/basicblock.py @@ -8,7 +8,7 @@ import string import struct -from typing import Tuple, Iterator +from typing import Iterator import idaapi @@ -80,19 +80,19 @@ def bb_contains_stackstring(f: idaapi.func_t, bb: idaapi.BasicBlock) -> bool: return False -def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract stackstring indicators from basic block""" if bb_contains_stackstring(fh.inner, bbh.inner): yield Characteristic("stack string"), bbh.address -def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract tight loop indicators from a basic block""" if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bbh.inner): yield Characteristic("tight loop"), bbh.address -def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract basic block features""" for bb_handler in BASIC_BLOCK_HANDLERS: for feature, addr in bb_handler(fh, bbh): diff --git a/capa/features/extractors/ida/extractor.py b/capa/features/extractors/ida/extractor.py index a2b4f7913..5222b3c8f 100644 --- a/capa/features/extractors/ida/extractor.py +++ b/capa/features/extractors/ida/extractor.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator import idaapi @@ -36,7 +36,7 @@ def __init__(self): sha256=capa.ida.helpers.retrieve_input_file_sha256(), ) ) - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.ida.file.extract_file_format()) self.global_features.extend(capa.features.extractors.ida.global_.extract_os()) self.global_features.extend(capa.features.extractors.ida.global_.extract_arch()) @@ -61,7 +61,7 @@ def get_function(ea: int) -> FunctionHandle: f = idaapi.get_func(ea) return FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.ida.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -70,7 +70,7 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: for bb in ida_helpers.get_function_blocks(fh.inner): yield BBHandle(address=AbsoluteVirtualAddress(bb.start_ea), inner=bb) - def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.ida.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 78200e438..30408060a 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import struct -from typing import Tuple, Iterator +from typing import Iterator import idc import idaapi @@ -26,7 +26,7 @@ MAX_OFFSET_PE_AFTER_MZ = 0x200 -def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: +def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]: """check segment for embedded PE adapted for IDA from: @@ -71,7 +71,7 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: yield off, i -def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: +def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]: """extract embedded PE features IDA must load resource sections for this to be complete @@ -83,7 +83,7 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: yield Characteristic("embedded pe"), FileOffsetAddress(ea) -def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names() -> Iterator[tuple[Feature, Address]]: """extract function exports""" for _, ordinal, ea, name in idautils.Entries(): forwarded_name = ida_entry.get_entry_forwarder(ordinal) @@ -95,7 +95,7 @@ def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]: yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea) -def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names() -> Iterator[tuple[Feature, Address]]: """extract function imports 1. imports by ordinal: @@ -131,7 +131,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: yield Import(info[1]), AbsoluteVirtualAddress(ea) -def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names() -> Iterator[tuple[Feature, Address]]: """extract section names IDA must load resource sections for this to be complete @@ -142,7 +142,7 @@ def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]: yield Section(idaapi.get_segm_name(seg)), AbsoluteVirtualAddress(seg.start_ea) -def extract_file_strings() -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings() -> Iterator[tuple[Feature, Address]]: """extract ASCII and UTF-16 LE strings IDA must load resource sections for this to be complete @@ -160,7 +160,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]: yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset) -def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_function_names() -> Iterator[tuple[Feature, Address]]: """ extract the names of statically-linked library functions. """ @@ -177,7 +177,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: yield FunctionName(name[1:]), addr -def extract_file_format() -> Iterator[Tuple[Feature, Address]]: +def extract_file_format() -> Iterator[tuple[Feature, Address]]: filetype = capa.ida.helpers.get_filetype() if filetype in (idaapi.f_PE, idaapi.f_COFF): @@ -191,7 +191,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]: raise NotImplementedError(f"unexpected file format: {filetype}") -def extract_features() -> Iterator[Tuple[Feature, Address]]: +def extract_features() -> Iterator[tuple[Feature, Address]]: """extract file features""" for file_handler in FILE_HANDLERS: for feature, addr in file_handler(): diff --git a/capa/features/extractors/ida/function.py b/capa/features/extractors/ida/function.py index cb4d63290..f636791da 100644 --- a/capa/features/extractors/ida/function.py +++ b/capa/features/extractors/ida/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator import idaapi import idautils @@ -43,7 +43,7 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), fh.address -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/ida/global_.py b/capa/features/extractors/ida/global_.py index 3c5f4623e..a7724e126 100644 --- a/capa/features/extractors/ida/global_.py +++ b/capa/features/extractors/ida/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging import contextlib -from typing import Tuple, Iterator +from typing import Iterator import ida_loader @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -def extract_os() -> Iterator[Tuple[Feature, Address]]: +def extract_os() -> Iterator[tuple[Feature, Address]]: format_name: str = ida_loader.get_file_type_name() if "PE" in format_name: @@ -46,7 +46,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]: return -def extract_arch() -> Iterator[Tuple[Feature, Address]]: +def extract_arch() -> Iterator[tuple[Feature, Address]]: procname = capa.ida.helpers.get_processor_name() if procname == "metapc" and capa.ida.helpers.is_64bit(): yield Arch(ARCH_AMD64), NO_ADDRESS diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index fc22bc38d..dbd2166a8 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import functools -from typing import Any, Dict, Tuple, Iterator, Optional +from typing import Any, Iterator, Optional import idc import idaapi @@ -124,9 +124,9 @@ def inspect_import(imports, library, ea, function, ordinal): return True -def get_file_imports() -> Dict[int, Tuple[str, str, int]]: +def get_file_imports() -> dict[int, tuple[str, str, int]]: """get file imports""" - imports: Dict[int, Tuple[str, str, int]] = {} + imports: dict[int, tuple[str, str, int]] = {} for idx in range(idaapi.get_import_module_qty()): library = idaapi.get_import_module_name(idx) @@ -147,7 +147,7 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]: return imports -def get_file_externs() -> Dict[int, Tuple[str, str, int]]: +def get_file_externs() -> dict[int, tuple[str, str, int]]: externs = {} for seg in get_segments(skip_header_segments=True): @@ -248,7 +248,7 @@ def find_string_at(ea: int, min_: int = 4) -> str: return "" -def get_op_phrase_info(op: idaapi.op_t) -> Dict: +def get_op_phrase_info(op: idaapi.op_t) -> dict: """parse phrase features from operand Pretty much dup of sark's implementation: @@ -323,7 +323,7 @@ def is_frame_register(reg: int) -> bool: return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg) -def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[Tuple[Any]] = None) -> idaapi.op_t: +def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[tuple[Any]] = None) -> idaapi.op_t: """yield op_t for instruction, filter on type if specified""" for op in insn.ops: if op.type == idaapi.o_void: diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index bd70d0faa..caf90c732 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import re -from typing import Any, Dict, Tuple, Iterator, Optional +from typing import Any, Iterator, Optional import idc import ida_ua @@ -25,19 +25,19 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40 -def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_imports(ctx: dict[str, Any]) -> dict[int, Any]: if "imports_cache" not in ctx: ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports() return ctx["imports_cache"] -def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_externs(ctx: dict[str, Any]) -> dict[int, Any]: if "externs_cache" not in ctx: ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs() return ctx["externs_cache"] -def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[Tuple[str, str]]: +def check_for_api_call(insn: idaapi.insn_t, funcs: dict[int, Any]) -> Optional[tuple[str, str]]: """check instruction for API call""" info = None ref = insn.ea @@ -65,7 +65,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[T return info -def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction API features @@ -135,7 +135,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction number features example: @@ -181,7 +181,7 @@ def extract_insn_number_features( yield OperandOffset(i, const), ih.address -def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse referenced byte sequences example: @@ -203,7 +203,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_insn_string_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction string features @@ -221,7 +221,7 @@ def extract_insn_string_features( def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction structure offset features @@ -369,7 +369,7 @@ def is_nzxor_stack_cookie(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi. def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction non-zeroing XOR instruction ignore expected non-zeroing XORs, e.g. security cookies @@ -387,14 +387,14 @@ def extract_insn_nzxor_characteristic_features( def extract_insn_mnemonic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction mnemonic features""" yield Mnemonic(idc.print_insn_mnem(ih.inner.ea)), ih.address def extract_insn_obfs_call_plus_5_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse call $+5 instruction from the given instruction. """ @@ -409,7 +409,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features( def extract_insn_peb_access_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction peb access fs:[0x30] on x86, gs:[0x60] on x64 @@ -437,7 +437,7 @@ def extract_insn_peb_access_characteristic_features( def extract_insn_segment_access_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction fs or gs access TODO: @@ -466,7 +466,7 @@ def extract_insn_segment_access_features( def extract_insn_cross_section_cflow( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """inspect the instruction for a CALL or JMP that crosses section boundaries""" insn: idaapi.insn_t = ih.inner @@ -482,7 +482,7 @@ def extract_insn_cross_section_cflow( yield Characteristic("cross section flow"), ih.address -def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope @@ -496,7 +496,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 @@ -509,7 +509,7 @@ def extract_function_indirect_call_characteristic_features( yield Characteristic("indirect call"), ih.address -def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: for feature, ea in inst_handler(f, bbh, insn): diff --git a/capa/features/extractors/null.py b/capa/features/extractors/null.py index a0dd9104d..ad7be0adb 100644 --- a/capa/features/extractors/null.py +++ b/capa/features/extractors/null.py @@ -5,11 +5,9 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List, Tuple, Union +from typing import Union, TypeAlias from dataclasses import dataclass -from typing_extensions import TypeAlias - from capa.features.common import Feature from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, DynamicCallAddress from capa.features.extractors.base_extractor import ( @@ -27,19 +25,19 @@ @dataclass class InstructionFeatures: - features: List[Tuple[Address, Feature]] + features: list[tuple[Address, Feature]] @dataclass class BasicBlockFeatures: - features: List[Tuple[Address, Feature]] - instructions: Dict[Address, InstructionFeatures] + features: list[tuple[Address, Feature]] + instructions: dict[Address, InstructionFeatures] @dataclass class FunctionFeatures: - features: List[Tuple[Address, Feature]] - basic_blocks: Dict[Address, BasicBlockFeatures] + features: list[tuple[Address, Feature]] + basic_blocks: dict[Address, BasicBlockFeatures] @dataclass @@ -52,9 +50,9 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor): base_address: Address sample_hashes: SampleHashes - global_features: List[Feature] - file_features: List[Tuple[Address, Feature]] - functions: Dict[Address, FunctionFeatures] + global_features: list[Feature] + file_features: list[tuple[Address, Feature]] + functions: dict[Address, FunctionFeatures] def get_base_address(self): return self.base_address @@ -98,19 +96,19 @@ def extract_insn_features(self, f, bb, insn): @dataclass class CallFeatures: name: str - features: List[Tuple[Address, Feature]] + features: list[tuple[Address, Feature]] @dataclass class ThreadFeatures: - features: List[Tuple[Address, Feature]] - calls: Dict[Address, CallFeatures] + features: list[tuple[Address, Feature]] + calls: dict[Address, CallFeatures] @dataclass class ProcessFeatures: - features: List[Tuple[Address, Feature]] - threads: Dict[Address, ThreadFeatures] + features: list[tuple[Address, Feature]] + threads: dict[Address, ThreadFeatures] name: str @@ -118,9 +116,9 @@ class ProcessFeatures: class NullDynamicFeatureExtractor(DynamicFeatureExtractor): base_address: Address sample_hashes: SampleHashes - global_features: List[Feature] - file_features: List[Tuple[Address, Feature]] - processes: Dict[Address, ProcessFeatures] + global_features: list[Feature] + file_features: list[tuple[Address, Feature]] + processes: dict[Address, ProcessFeatures] def extract_global_features(self): for feature in self.global_features: diff --git a/capa/features/extractors/pefile.py b/capa/features/extractors/pefile.py index 1dd478adf..cac7ecc42 100644 --- a/capa/features/extractors/pefile.py +++ b/capa/features/extractors/pefile.py @@ -148,11 +148,11 @@ def extract_file_features(pe, buf): buf: the raw sample bytes yields: - Tuple[Feature, VA]: a feature and its location. + tuple[Feature, VA]: a feature and its location. """ for file_handler in FILE_HANDLERS: - # file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]] + # file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]] for feature, va in file_handler(pe=pe, buf=buf): # type: ignore yield feature, va @@ -177,10 +177,10 @@ def extract_global_features(pe, buf): buf: the raw sample bytes yields: - Tuple[Feature, VA]: a feature and its location. + tuple[Feature, VA]: a feature and its location. """ for handler in GLOBAL_HANDLERS: - # file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]] + # file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]] for feature, va in handler(pe=pe, buf=buf): # type: ignore yield feature, va diff --git a/capa/features/extractors/viv/basicblock.py b/capa/features/extractors/viv/basicblock.py index 2e450fb69..3515c29cf 100644 --- a/capa/features/extractors/viv/basicblock.py +++ b/capa/features/extractors/viv/basicblock.py @@ -8,7 +8,7 @@ import string import struct -from typing import Tuple, Iterator +from typing import Iterator import envi import envi.archs.i386.disasm @@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import BBHandle, FunctionHandle -def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """ parse features from the given basic block. @@ -47,7 +47,7 @@ def _bb_has_tight_loop(f, bb): return False -def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """check basic block for tight loop indicators""" if _bb_has_tight_loop(f, bb.inner): yield Characteristic("tight loop"), bb.address @@ -70,7 +70,7 @@ def _bb_has_stackstring(f, bb): return False -def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """check basic block for stackstring indicators""" if _bb_has_stackstring(f, bb.inner): yield Characteristic("stack string"), bb.address @@ -145,7 +145,7 @@ def is_printable_utf16le(chars: bytes) -> bool: return False -def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """ extract features from the given basic block. @@ -154,7 +154,7 @@ def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, bb (viv_utils.BasicBlock): the basic block to process. yields: - Tuple[Feature, int]: the features and their location found in this basic block. + tuple[Feature, int]: the features and their location found in this basic block. """ yield BasicBlock(), AbsoluteVirtualAddress(bb.inner.va) for bb_handler in BASIC_BLOCK_HANDLERS: diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index 001e9e35a..ad64858ff 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Any, Dict, List, Tuple, Iterator +from typing import Any, Iterator from pathlib import Path import viv_utils @@ -39,7 +39,7 @@ def __init__(self, vw, path: Path, os): super().__init__(hashes=SampleHashes.from_bytes(self.buf)) # pre-compute these because we'll yield them at *every* scope. - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf)) self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os)) self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw)) @@ -55,13 +55,13 @@ def extract_file_features(self): yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf) def get_functions(self) -> Iterator[FunctionHandle]: - cache: Dict[str, Any] = {} + cache: dict[str, Any] = {} for va in sorted(self.vw.getFunctions()): yield FunctionHandle( address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache} ) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.viv.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -69,7 +69,7 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: for bb in f.basic_blocks: yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb) - def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.viv.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: @@ -79,7 +79,7 @@ def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHa def extract_insn_features( self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih) def is_library_function(self, addr): diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index 2fc09841b..41ce836b6 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator import PE.carve as pe_carve # vivisect PE import vivisect @@ -21,7 +21,7 @@ from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress -def extract_file_embedded_pe(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_embedded_pe(buf, **kwargs) -> Iterator[tuple[Feature, Address]]: for offset, _ in pe_carve.carve(buf, 1): yield Characteristic("embedded pe"), FileOffsetAddress(offset) @@ -37,7 +37,7 @@ def get_first_vw_filename(vw: vivisect.VivWorkspace): return next(iter(vw.filemeta.keys())) -def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[tuple[Feature, Address]]: for va, _, name, _ in vw.getExports(): yield Export(name), AbsoluteVirtualAddress(va) @@ -56,7 +56,7 @@ def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[T yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va) -def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]: """ extract imported function names 1. imports by ordinal: @@ -91,16 +91,16 @@ def is_viv_ord_impname(impname: str) -> bool: return True -def extract_file_section_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]: for va, _, segname, _ in vw.getSegments(): yield Section(segname), AbsoluteVirtualAddress(va) -def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(buf, **kwargs) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_file_strings(buf) -def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_function_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]: """ extract the names of statically-linked library functions. """ @@ -117,11 +117,11 @@ def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address yield FunctionName(name[1:]), addr -def extract_file_format(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_format(buf, **kwargs) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_format(buf) -def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_features(vw, buf: bytes) -> Iterator[tuple[Feature, Address]]: """ extract file features from given workspace @@ -130,7 +130,7 @@ def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]: buf: the raw input file bytes yields: - Tuple[Feature, Address]: a feature and its location. + tuple[Feature, Address]: a feature and its location. """ for file_handler in FILE_HANDLERS: diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index ab1dcb429..9cc1e2168 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator import envi import viv_utils @@ -19,7 +19,7 @@ from capa.features.extractors.base_extractor import FunctionHandle -def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: """ parse features from the given function. @@ -32,7 +32,7 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature raise NotImplementedError -def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: if fh.inner.vw.metadata["Format"] == "elf": # the file's symbol table gets added to the metadata of the vivisect workspace. # this is in order to eliminate the computational overhead of refetching symtab each time. @@ -54,13 +54,13 @@ def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, yield FunctionName(sym_name), fh.address -def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]: f: viv_utils.Function = fhandle.inner for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE): yield Characteristic("calls to"), AbsoluteVirtualAddress(src) -def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]: """ parse if a function has a loop """ @@ -88,7 +88,7 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Ad yield Characteristic("loop"), fhandle.address -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: """ extract features from the given function. @@ -96,7 +96,7 @@ def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: fh: the function handle from which to extract features yields: - Tuple[Feature, int]: the features and their location found in this function. + tuple[Feature, int]: the features and their location found in this function. """ for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): diff --git a/capa/features/extractors/viv/global_.py b/capa/features/extractors/viv/global_.py index 39ee79bb2..4f1970a89 100644 --- a/capa/features/extractors/viv/global_.py +++ b/capa/features/extractors/viv/global_.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import ARCH_I386, ARCH_AMD64, Arch, Feature from capa.features.address import NO_ADDRESS, Address @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -def extract_arch(vw) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(vw) -> Iterator[tuple[Feature, Address]]: arch = vw.getMeta("Architecture") if arch == "amd64": yield Arch(ARCH_AMD64), NO_ADDRESS diff --git a/capa/features/extractors/viv/indirect_calls.py b/capa/features/extractors/viv/indirect_calls.py index d39f08c91..6646d8716 100644 --- a/capa/features/extractors/viv/indirect_calls.py +++ b/capa/features/extractors/viv/indirect_calls.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import collections -from typing import Set, List, Deque, Tuple, Optional +from typing import Deque, Optional import envi import vivisect.const @@ -28,7 +28,7 @@ DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor") -def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]: +def get_previous_instructions(vw: VivWorkspace, va: int) -> list[int]: """ collect the instructions that flow to the given address, local to the current function. @@ -37,7 +37,7 @@ def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]: va (int): the virtual address to inspect returns: - List[int]: the prior instructions, which may fallthrough and/or jump here + list[int]: the prior instructions, which may fallthrough and/or jump here """ ret = [] @@ -71,7 +71,7 @@ class NotFoundError(Exception): pass -def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[int]]: +def find_definition(vw: VivWorkspace, va: int, reg: int) -> tuple[int, Optional[int]]: """ scan backwards from the given address looking for assignments to the given register. if a constant, return that value. @@ -88,7 +88,7 @@ def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[ NotFoundError: when the definition cannot be found. """ q: Deque[int] = collections.deque() - seen: Set[int] = set() + seen: set[int] = set() q.extend(get_previous_instructions(vw, va)) while q: @@ -139,7 +139,7 @@ def is_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> bool: return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper) -def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> Tuple[int, Optional[int]]: +def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> tuple[int, Optional[int]]: """ inspect the given indirect call instruction and attempt to resolve the target address. diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 329bc94d0..2964db834 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Callable, Iterator +from typing import Callable, Iterator import envi import envi.exc @@ -33,7 +33,7 @@ def interface_extract_instruction_XXX( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse features from the given instruction. @@ -53,7 +53,7 @@ def get_imports(vw): caching accessor to vivisect workspace imports avoids performance issues in vivisect when collecting locations - returns: Dict[int, Tuple[str, str]] + returns: dict[int, tuple[str, str]] """ if "imports" in vw.metadata: return vw.metadata["imports"] @@ -65,7 +65,7 @@ def get_imports(vw): return imports -def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse API features from the given instruction. @@ -260,7 +260,7 @@ def read_bytes(vw, va: int) -> bytes: raise -def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse byte sequence features from the given instruction. example: @@ -371,7 +371,7 @@ def is_security_cookie(f, bb, insn) -> bool: def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbhandle: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse non-zeroing XOR instruction from the given instruction. ignore expected non-zeroing XORs, e.g. security cookies. @@ -392,12 +392,12 @@ def extract_insn_nzxor_characteristic_features( yield Characteristic("nzxor"), ih.address -def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse mnemonic features from the given instruction.""" yield Mnemonic(ih.inner.mnem), ih.address -def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse call $+5 instruction from the given instruction. """ @@ -415,7 +415,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) yield Characteristic("call $+5"), ih.address -def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 """ @@ -451,7 +451,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> It pass -def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse the instruction for access to fs or gs""" insn: envi.Opcode = ih.inner @@ -472,7 +472,7 @@ def get_section(vw, va: int): raise KeyError(va) -def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ inspect the instruction for a CALL or JMP that crosses section boundaries. """ @@ -513,7 +513,7 @@ def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> # this is a feature that's most relevant at the function scope, # however, its most efficient to extract at the instruction scope. -def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: insn: envi.Opcode = ih.inner f: viv_utils.Function = fh.inner @@ -554,7 +554,7 @@ def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera # this is a feature that's most relevant at the function or basic block scope, # however, its most efficient to extract at the instruction scope. -def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 @@ -578,7 +578,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle def extract_op_number_features( fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse number features from the given operand. example: @@ -623,7 +623,7 @@ def extract_op_number_features( def extract_op_offset_features( fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse structure offset features from the given operand.""" # example: # @@ -674,7 +674,7 @@ def extract_op_offset_features( def extract_op_string_features( fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse string features from the given operand.""" # example: # @@ -705,15 +705,15 @@ def extract_op_string_features( yield String(s), ih.address -def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]: for i, oper in enumerate(insn.inner.opers): for op_handler in OPERAND_HANDLERS: for feature, addr in op_handler(f, bb, insn, i, oper): yield feature, addr -OPERAND_HANDLERS: List[ - Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[Tuple[Feature, Address]]] +OPERAND_HANDLERS: list[ + Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[tuple[Feature, Address]]] ] = [ extract_op_number_features, extract_op_offset_features, @@ -721,7 +721,7 @@ def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterato ] -def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f, bb, insn) -> Iterator[tuple[Feature, Address]]: """ extract features from the given insn. @@ -731,14 +731,14 @@ def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]: insn (vivisect...Instruction): the instruction to process. yields: - Tuple[Feature, Address]: the features and their location found in this insn. + tuple[Feature, Address]: the features and their location found in this insn. """ for insn_handler in INSTRUCTION_HANDLERS: for feature, addr in insn_handler(f, bb, insn): yield feature, addr -INSTRUCTION_HANDLERS: List[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[Tuple[Feature, Address]]]] = [ +INSTRUCTION_HANDLERS: list[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[tuple[Feature, Address]]]] = [ extract_insn_api_features, extract_insn_bytes_features, extract_insn_nzxor_characteristic_features, diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py index 4a004af61..a8976cd8c 100644 --- a/capa/features/extractors/vmray/__init__.py +++ b/capa/features/extractors/vmray/__init__.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Tuple, Optional +from typing import Optional from pathlib import Path from zipfile import ZipFile from collections import defaultdict @@ -58,17 +58,17 @@ def __init__(self, zipfile_path: Path): "VMRay feature extractor does not support flog version %s" % self.flog.analysis.log_version ) - self.exports: Dict[int, str] = {} - self.imports: Dict[int, Tuple[str, str]] = {} - self.sections: Dict[int, str] = {} - self.monitor_processes: Dict[int, VMRayMonitorProcess] = {} - self.monitor_threads: Dict[int, VMRayMonitorThread] = {} + self.exports: dict[int, str] = {} + self.imports: dict[int, tuple[str, str]] = {} + self.sections: dict[int, str] = {} + self.monitor_processes: dict[int, VMRayMonitorProcess] = {} + self.monitor_threads: dict[int, VMRayMonitorThread] = {} # map monitor thread IDs to their associated monitor process ID - self.monitor_threads_by_monitor_process: Dict[int, List[int]] = defaultdict(list) + self.monitor_threads_by_monitor_process: dict[int, list[int]] = defaultdict(list) # map function calls to their associated monitor thread ID mapped to its associated monitor process ID - self.monitor_process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list)) + self.monitor_process_calls: dict[int, dict[int, list[FunctionCall]]] = defaultdict(lambda: defaultdict(list)) self.base_address: int diff --git a/capa/features/extractors/vmray/call.py b/capa/features/extractors/vmray/call.py index febb1b338..6ded3a4fc 100644 --- a/capa/features/extractors/vmray/call.py +++ b/capa/features/extractors/vmray/call.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator import capa.features.extractors.helpers from capa.features.insn import API, Number @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) -def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: if param.deref is not None: # pointer types contain a special "deref" member that stores the deref'd value # so we check for this first and ignore Param.value as this always contains the @@ -39,7 +39,7 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feat yield Number(hexint(param.value)), ch.address -def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: call: FunctionCall = ch.inner if call.params_in: @@ -50,7 +50,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - yield API(name), ch.address -def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: for handler in CALL_HANDLERS: for feature, addr in handler(ph, th, ch): yield feature, addr diff --git a/capa/features/extractors/vmray/extractor.py b/capa/features/extractors/vmray/extractor.py index 36a0b430f..a9f0491c9 100644 --- a/capa/features/extractors/vmray/extractor.py +++ b/capa/features/extractors/vmray/extractor.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator from pathlib import Path import capa.helpers @@ -34,8 +34,8 @@ ) -def get_formatted_params(params: ParamList) -> List[str]: - params_list: List[str] = [] +def get_formatted_params(params: ParamList) -> list[str]: + params_list: list[str] = [] for param in params: if param.deref and param.deref.value is not None: @@ -69,10 +69,10 @@ def get_base_address(self) -> Address: # value according to the PE header, the actual trace may use a different imagebase return AbsoluteVirtualAddress(self.analysis.base_address) - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.vmray.file.extract_features(self.analysis) - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: yield from self.global_features def get_processes(self) -> Iterator[ProcessHandle]: @@ -80,7 +80,7 @@ def get_processes(self) -> Iterator[ProcessHandle]: address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid) yield ProcessHandle(address, inner=monitor_process) - def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: # we have not identified process-specific features for VMRay yet yield from [] @@ -95,7 +95,7 @@ def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: address: ThreadAddress = ThreadAddress(process=ph.address, tid=monitor_thread.tid) yield ThreadHandle(address=address, inner=monitor_thread) - def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]: if False: # force this routine to be a generator, # but we don't actually have any elements to generate. @@ -109,7 +109,7 @@ def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle] def extract_call_features( self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch) def get_call_name(self, ph, th, ch) -> str: diff --git a/capa/features/extractors/vmray/file.py b/capa/features/extractors/vmray/file.py index 7f4ba0395..b0e1772f2 100644 --- a/capa/features/extractors/vmray/file.py +++ b/capa/features/extractors/vmray/file.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator import capa.features.extractors.common from capa.features.file import Export, Import, Section @@ -18,52 +18,52 @@ logger = logging.getLogger(__name__) -def extract_export_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_export_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for addr, name in analysis.exports.items(): yield Export(name), AbsoluteVirtualAddress(addr) -def extract_import_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_import_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for addr, (module, api) in analysis.imports.items(): for symbol in generate_symbols(module, api, include_dll=True): yield Import(symbol), AbsoluteVirtualAddress(addr) -def extract_section_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_section_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for addr, name in analysis.sections.items(): yield Section(name), AbsoluteVirtualAddress(addr) -def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for filename in analysis.sv2.filenames.values(): yield String(filename.filename), NO_ADDRESS -def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for mutex in analysis.sv2.mutexes.values(): yield String(mutex.name), NO_ADDRESS -def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for domain in analysis.sv2.domains.values(): yield String(domain.domain), NO_ADDRESS -def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for ip_address in analysis.sv2.ip_addresses.values(): yield String(ip_address.ip_address), NO_ADDRESS -def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for registry_record in analysis.sv2.registry_records.values(): yield String(registry_record.reg_key_name), NO_ADDRESS -def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_file_strings(analysis.sample_file_buf) -def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for handler in FILE_HANDLERS: for feature, addr in handler(analysis): yield feature, addr diff --git a/capa/features/extractors/vmray/global_.py b/capa/features/extractors/vmray/global_.py index a42ce511e..c923a87fc 100644 --- a/capa/features/extractors/vmray/global_.py +++ b/capa/features/extractors/vmray/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import ( OS, @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) -def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: file_type: str = analysis.file_type if "x86-32" in file_type: @@ -38,7 +38,7 @@ def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: raise ValueError("unrecognized arch from the VMRay report: %s" % file_type) -def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_format(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: assert analysis.sample_file_static_data is not None if analysis.sample_file_static_data.pe: yield Format(FORMAT_PE), NO_ADDRESS @@ -48,7 +48,7 @@ def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]] raise ValueError("unrecognized file format from the VMRay report: %s" % analysis.file_type) -def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_os(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: file_type: str = analysis.file_type if "windows" in file_type.lower(): @@ -59,7 +59,7 @@ def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: raise ValueError("unrecognized OS from the VMRay report: %s" % file_type) -def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for global_handler in GLOBAL_HANDLER: for feature, addr in global_handler(analysis): yield feature, addr diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index f5371bec1..ef0513fb2 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List, Union, Optional +from typing import Union, Optional import xmltodict from pydantic import Field, BaseModel @@ -87,7 +87,7 @@ class Param(BaseModel): deref: Optional[ParamDeref] = None -def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]: +def validate_ensure_is_list(value: Union[list[Param], Param]) -> list[Param]: if isinstance(value, list): return value else: @@ -95,9 +95,9 @@ def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]: # params may be stored as a list of Param or a single Param so we convert -# the input value to Python list type before the inner validation (List[Param]) +# the input value to Python list type before the inner validation (list[Param]) # is called -ParamList = Annotated[List[Param], BeforeValidator(validate_ensure_is_list)] +ParamList = Annotated[list[Param], BeforeValidator(validate_ensure_is_list)] class Params(BaseModel): @@ -164,9 +164,9 @@ class MonitorThread(BaseModel): # handle if there's only single entries, but the model expects a list -MonitorProcessList = Annotated[List[MonitorProcess], BeforeValidator(validate_ensure_is_list)] -MonitorThreadList = Annotated[List[MonitorThread], BeforeValidator(validate_ensure_is_list)] -FunctionCallList = Annotated[List[FunctionCall], BeforeValidator(validate_ensure_is_list)] +MonitorProcessList = Annotated[list[MonitorProcess], BeforeValidator(validate_ensure_is_list)] +MonitorThreadList = Annotated[list[MonitorThread], BeforeValidator(validate_ensure_is_list)] +FunctionCallList = Annotated[list[FunctionCall], BeforeValidator(validate_ensure_is_list)] class Analysis(BaseModel): @@ -177,7 +177,7 @@ class Analysis(BaseModel): monitor_processes: MonitorProcessList = Field(alias="monitor_process", default=[]) monitor_threads: MonitorThreadList = Field(alias="monitor_thread", default=[]) function_calls: FunctionCallList = Field(alias="fncall", default=[]) - # function_returns: List[FunctionReturn] = Field(alias="fnret", default=[]) + # function_returns: list[FunctionReturn] = Field(alias="fnret", default=[]) class Flog(BaseModel): @@ -186,7 +186,7 @@ class Flog(BaseModel): # models for summary_v2.json file, certain fields left as comments for documentation purposes class GenericReference(BaseModel): - path: List[str] + path: list[str] source: str @@ -226,12 +226,12 @@ class PEFileImport(BaseModel): class PEFileImportModule(BaseModel): dll: str - apis: List[PEFileImport] + apis: list[PEFileImport] class PEFileSection(BaseModel): # entropy: float - # flags: List[str] = [] + # flags: list[str] = [] name: str # raw_data_offset: int # raw_data_size: int @@ -241,9 +241,9 @@ class PEFileSection(BaseModel): class PEFile(BaseModel): basic_info: PEFileBasicInfo - exports: List[PEFileExport] = [] - imports: List[PEFileImportModule] = [] - sections: List[PEFileSection] = [] + exports: list[PEFileExport] = [] + imports: list[PEFileImportModule] = [] + sections: list[PEFileSection] = [] class ElfFileSectionHeader(BaseModel): @@ -268,7 +268,7 @@ class ElfFileHeader(BaseModel): class ElfFile(BaseModel): # file_header: ElfFileHeader - sections: List[ElfFileSection] + sections: list[ElfFileSection] class StaticData(BaseModel): @@ -284,7 +284,7 @@ class FileHashes(BaseModel): class File(BaseModel): - # categories: List[str] + # categories: list[str] hash_values: FileHashes # is_artifact: bool # is_ioc: bool @@ -292,11 +292,11 @@ class File(BaseModel): # size: int # is_truncated: bool # mime_type: Optional[str] = None - # operations: List[str] = [] - # ref_filenames: List[GenericReference] = [] - # ref_gfncalls: List[GenericReference] = [] + # operations: list[str] = [] + # ref_filenames: list[GenericReference] = [] + # ref_gfncalls: list[GenericReference] = [] ref_static_data: Optional[StaticDataReference] = None - # ref_vti_matches: List[GenericReference] = [] + # ref_vti_matches: list[GenericReference] = [] # verdict: str @@ -356,13 +356,13 @@ class AnalysisMetadata(BaseModel): class SummaryV2(BaseModel): analysis_metadata: AnalysisMetadata - static_data: Dict[str, StaticData] = {} + static_data: dict[str, StaticData] = {} # recorded artifacts - files: Dict[str, File] = {} - processes: Dict[str, Process] = {} - filenames: Dict[str, Filename] = {} - mutexes: Dict[str, Mutex] = {} - domains: Dict[str, Domain] = {} - ip_addresses: Dict[str, IPAddress] = {} - registry_records: Dict[str, Registry] = {} + files: dict[str, File] = {} + processes: dict[str, Process] = {} + filenames: dict[str, Filename] = {} + mutexes: dict[str, Mutex] = {} + domains: dict[str, Domain] = {} + ip_addresses: dict[str, IPAddress] = {} + registry_records: dict[str, Registry] = {} diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index ec0d6f609..bb6b3ded0 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -14,14 +14,10 @@ import zlib import logging from enum import Enum -from typing import List, Tuple, Union, Literal +from typing import Union, Literal, TypeAlias from pydantic import Field, BaseModel, ConfigDict -# TODO(williballenthin): use typing.TypeAlias directly in Python 3.10+ -# https://github.com/mandiant/capa/issues/1699 -from typing_extensions import TypeAlias - import capa.helpers import capa.version import capa.features.file @@ -62,7 +58,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS + value: Union[int, tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": @@ -272,52 +268,52 @@ class InstructionFeature(HashableModel): class InstructionFeatures(BaseModel): address: Address - features: Tuple[InstructionFeature, ...] + features: tuple[InstructionFeature, ...] class BasicBlockFeatures(BaseModel): address: Address - features: Tuple[BasicBlockFeature, ...] - instructions: Tuple[InstructionFeatures, ...] + features: tuple[BasicBlockFeature, ...] + instructions: tuple[InstructionFeatures, ...] class FunctionFeatures(BaseModel): address: Address - features: Tuple[FunctionFeature, ...] - basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks") + features: tuple[FunctionFeature, ...] + basic_blocks: tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks") model_config = ConfigDict(populate_by_name=True) class CallFeatures(BaseModel): address: Address name: str - features: Tuple[CallFeature, ...] + features: tuple[CallFeature, ...] class ThreadFeatures(BaseModel): address: Address - features: Tuple[ThreadFeature, ...] - calls: Tuple[CallFeatures, ...] + features: tuple[ThreadFeature, ...] + calls: tuple[CallFeatures, ...] class ProcessFeatures(BaseModel): address: Address name: str - features: Tuple[ProcessFeature, ...] - threads: Tuple[ThreadFeatures, ...] + features: tuple[ProcessFeature, ...] + threads: tuple[ThreadFeatures, ...] class StaticFeatures(BaseModel): - global_: Tuple[GlobalFeature, ...] = Field(alias="global") - file: Tuple[FileFeature, ...] - functions: Tuple[FunctionFeatures, ...] + global_: tuple[GlobalFeature, ...] = Field(alias="global") + file: tuple[FileFeature, ...] + functions: tuple[FunctionFeatures, ...] model_config = ConfigDict(populate_by_name=True) class DynamicFeatures(BaseModel): - global_: Tuple[GlobalFeature, ...] = Field(alias="global") - file: Tuple[FileFeature, ...] - processes: Tuple[ProcessFeatures, ...] + global_: tuple[GlobalFeature, ...] = Field(alias="global") + file: tuple[FileFeature, ...] + processes: tuple[ProcessFeatures, ...] model_config = ConfigDict(populate_by_name=True) @@ -344,7 +340,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: """ serialize the given extractor to a string """ - global_features: List[GlobalFeature] = [] + global_features: list[GlobalFeature] = [] for feature, _ in extractor.extract_global_features(): global_features.append( GlobalFeature( @@ -352,7 +348,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: ) ) - file_features: List[FileFeature] = [] + file_features: list[FileFeature] = [] for feature, address in extractor.extract_file_features(): file_features.append( FileFeature( @@ -361,7 +357,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: ) ) - function_features: List[FunctionFeatures] = [] + function_features: list[FunctionFeatures] = [] for f in extractor.get_functions(): faddr = Address.from_capa(f.address) ffeatures = [ @@ -446,7 +442,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str: """ serialize the given extractor to a string """ - global_features: List[GlobalFeature] = [] + global_features: list[GlobalFeature] = [] for feature, _ in extractor.extract_global_features(): global_features.append( GlobalFeature( @@ -454,7 +450,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str: ) ) - file_features: List[FileFeature] = [] + file_features: list[FileFeature] = [] for feature, address in extractor.extract_file_features(): file_features.append( FileFeature( @@ -463,7 +459,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str: ) ) - process_features: List[ProcessFeatures] = [] + process_features: list[ProcessFeatures] = [] for p in extractor.get_processes(): paddr = Address.from_capa(p.address) pname = extractor.get_process_name(p) diff --git a/capa/ghidra/capa_explorer.py b/capa/ghidra/capa_explorer.py index efbfd0e3e..0fe5243c8 100644 --- a/capa/ghidra/capa_explorer.py +++ b/capa/ghidra/capa_explorer.py @@ -13,7 +13,7 @@ import json import logging import pathlib -from typing import Any, Dict, List +from typing import Any from ghidra.app.cmd.label import AddLabelCmd, CreateNamespacesCmd from ghidra.program.model.symbol import Namespace, SourceType, SymbolType @@ -68,8 +68,8 @@ def __init__( scope, capability, matches, - attack: List[Dict[Any, Any]], - mbc: List[Dict[Any, Any]], + attack: list[dict[Any, Any]], + mbc: list[dict[Any, Any]], ): self.namespace = namespace self.scope = scope @@ -282,7 +282,7 @@ def parse_json(capa_data): for rule, capability in capa_data.get("rules", {}).items(): # structure to contain rule match address & supporting feature data # {rule match addr:[{feature addr:{node_data}}]} - rule_matches: Dict[Any, List[Any]] = {} + rule_matches: dict[Any, list[Any]] = {} for i in range(len(capability.get("matches"))): # grab rule match location match_loc = capability.get("matches")[i][0].get("value") diff --git a/capa/ghidra/helpers.py b/capa/ghidra/helpers.py index 44af2f017..77c1ec14b 100644 --- a/capa/ghidra/helpers.py +++ b/capa/ghidra/helpers.py @@ -8,7 +8,6 @@ import logging import datetime import contextlib -from typing import List from pathlib import Path import capa @@ -112,7 +111,7 @@ def get_file_sha256(): return currentProgram().getExecutableSHA256() # type: ignore [name-defined] # noqa: F821 -def collect_metadata(rules: List[Path]): +def collect_metadata(rules: list[Path]): md5 = get_file_md5() sha256 = get_file_sha256() diff --git a/capa/helpers.py b/capa/helpers.py index 70f1358c3..b1c9c2801 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -14,7 +14,7 @@ import tempfile import contextlib import importlib.util -from typing import Dict, List, Union, BinaryIO, Iterator, NoReturn +from typing import BinaryIO, Iterator, NoReturn from pathlib import Path from zipfile import ZipFile from datetime import datetime @@ -164,7 +164,7 @@ def load_json_from_path(json_path: Path): return report -def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): +def decode_json_lines(fd: BinaryIO | gzip.GzipFile): for line in fd: try: line_s = line.strip().decode() @@ -175,7 +175,7 @@ def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): logger.debug("bad DRAKVUF log line: %s", line) -def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: +def load_jsonl_from_path(jsonl_path: Path) -> Iterator[dict]: try: with gzip.open(jsonl_path, "rb") as fg: yield from decode_json_lines(fg) @@ -204,7 +204,7 @@ def get_format_from_report(sample: Path) -> str: return FORMAT_DRAKVUF elif sample.name.endswith(".zip"): with ZipFile(sample, "r") as zipfile: - namelist: List[str] = zipfile.namelist() + namelist: list[str] = zipfile.namelist() if "logs/summary_v2.json" in namelist and "logs/flog.xml" in namelist: # assume VMRay zipfile at a minimum has these files return FORMAT_VMRAY diff --git a/capa/ida/helpers.py b/capa/ida/helpers.py index 066e8605d..df231c496 100644 --- a/capa/ida/helpers.py +++ b/capa/ida/helpers.py @@ -8,7 +8,7 @@ import logging import datetime import contextlib -from typing import List, Optional +from typing import Optional from pathlib import Path import idc @@ -165,7 +165,7 @@ def get_file_sha256(): return sha256 -def collect_metadata(rules: List[Path]): +def collect_metadata(rules: list[Path]): """ """ md5 = get_file_md5() sha256 = get_file_sha256() diff --git a/capa/ida/plugin/cache.py b/capa/ida/plugin/cache.py index 42910ee56..1473b5046 100644 --- a/capa/ida/plugin/cache.py +++ b/capa/ida/plugin/cache.py @@ -10,7 +10,7 @@ import itertools import collections -from typing import Set, Dict, Tuple, Union, Optional +from typing import Union, Optional import capa.engine from capa.rules import Scope, RuleSet @@ -34,7 +34,7 @@ def __init__( self.parent.children.add(self) self.features: FeatureSet = collections.defaultdict(set) - self.children: Set[CapaRuleGenFeatureCacheNode] = set() + self.children: set[CapaRuleGenFeatureCacheNode] = set() def __hash__(self): # TODO(mike-hunhoff): confirm this is unique enough @@ -55,9 +55,9 @@ def __init__(self, extractor: CapaExplorerFeatureExtractor): self.global_features: FeatureSet = collections.defaultdict(set) self.file_node: CapaRuleGenFeatureCacheNode = CapaRuleGenFeatureCacheNode(None, None) - self.func_nodes: Dict[Address, CapaRuleGenFeatureCacheNode] = {} - self.bb_nodes: Dict[Address, CapaRuleGenFeatureCacheNode] = {} - self.insn_nodes: Dict[Address, CapaRuleGenFeatureCacheNode] = {} + self.func_nodes: dict[Address, CapaRuleGenFeatureCacheNode] = {} + self.bb_nodes: dict[Address, CapaRuleGenFeatureCacheNode] = {} + self.insn_nodes: dict[Address, CapaRuleGenFeatureCacheNode] = {} self._find_global_features() self._find_file_features() @@ -115,7 +115,7 @@ def _find_function_and_below_features(self, fh: FunctionHandle): def _find_instruction_capabilities( self, ruleset: RuleSet, insn: CapaRuleGenFeatureCacheNode - ) -> Tuple[FeatureSet, MatchResults]: + ) -> tuple[FeatureSet, MatchResults]: features: FeatureSet = collections.defaultdict(set) for feature, locs in itertools.chain(insn.features.items(), self.global_features.items()): @@ -131,7 +131,7 @@ def _find_instruction_capabilities( def _find_basic_block_capabilities( self, ruleset: RuleSet, bb: CapaRuleGenFeatureCacheNode - ) -> Tuple[FeatureSet, MatchResults, MatchResults]: + ) -> tuple[FeatureSet, MatchResults, MatchResults]: features: FeatureSet = collections.defaultdict(set) insn_matches: MatchResults = collections.defaultdict(list) @@ -155,7 +155,7 @@ def _find_basic_block_capabilities( def find_code_capabilities( self, ruleset: RuleSet, fh: FunctionHandle - ) -> Tuple[FeatureSet, MatchResults, MatchResults, MatchResults]: + ) -> tuple[FeatureSet, MatchResults, MatchResults, MatchResults]: f_node: Optional[CapaRuleGenFeatureCacheNode] = self._get_cached_func_node(fh) if f_node is None: return {}, {}, {}, {} @@ -179,7 +179,7 @@ def find_code_capabilities( _, function_matches = ruleset.match(Scope.FUNCTION, function_features, f_node.address) return function_features, function_matches, bb_matches, insn_matches - def find_file_capabilities(self, ruleset: RuleSet) -> Tuple[FeatureSet, MatchResults]: + def find_file_capabilities(self, ruleset: RuleSet) -> tuple[FeatureSet, MatchResults]: features: FeatureSet = collections.defaultdict(set) for func_node in self.file_node.children: diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 028ce2078..54bd70409 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -10,7 +10,7 @@ import itertools import collections from enum import IntFlag -from typing import Any, List, Optional +from typing import Any, Optional from pathlib import Path import idaapi @@ -1146,7 +1146,7 @@ def set_rulegen_preview_border_success(self): def update_rule_status(self, rule_text: str): """ """ rule: capa.rules.Rule - rules: List[Rule] + rules: list[Rule] ruleset: capa.rules.RuleSet if self.view_rulegen_editor.invisibleRootItem().childCount() == 0: diff --git a/capa/ida/plugin/item.py b/capa/ida/plugin/item.py index b2be1c141..4e8f1738a 100644 --- a/capa/ida/plugin/item.py +++ b/capa/ida/plugin/item.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import codecs -from typing import List, Iterator, Optional +from typing import Iterator, Optional import idc import idaapi @@ -36,11 +36,11 @@ def ea_to_hex(ea): class CapaExplorerDataItem: """store data for CapaExplorerDataModel""" - def __init__(self, parent: Optional["CapaExplorerDataItem"], data: List[str], can_check=True): + def __init__(self, parent: Optional["CapaExplorerDataItem"], data: list[str], can_check=True): """initialize item""" self.pred = parent self._data = data - self._children: List["CapaExplorerDataItem"] = [] + self._children: list["CapaExplorerDataItem"] = [] self._checked = False self._can_check = can_check diff --git a/capa/ida/plugin/model.py b/capa/ida/plugin/model.py index c3b41670c..0d8221b12 100644 --- a/capa/ida/plugin/model.py +++ b/capa/ida/plugin/model.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Set, Dict, List, Tuple, Optional +from typing import Optional from collections import deque import idc @@ -354,7 +354,7 @@ def render_capa_doc_statement_node( parent: CapaExplorerDataItem, match: rd.Match, statement: rd.Statement, - locations: List[Address], + locations: list[Address], doc: rd.ResultDocument, ): """render capa statement read from doc @@ -447,9 +447,9 @@ def render_capa_doc_match(self, parent: CapaExplorerDataItem, match: rd.Match, d def render_capa_doc_by_function(self, doc: rd.ResultDocument): """render rule matches by function meaning each rule match is nested under function where it was found""" - matches_by_function: Dict[AbsoluteVirtualAddress, Tuple[CapaExplorerFunctionItem, Set[str]]] = {} + matches_by_function: dict[AbsoluteVirtualAddress, tuple[CapaExplorerFunctionItem, set[str]]] = {} for rule in rutils.capability_rules(doc): - match_eas: List[int] = [] + match_eas: list[int] = [] # initial pass of rule matches for addr_, _ in rule.matches: @@ -560,7 +560,7 @@ def render_capa_doc_feature_node( parent: CapaExplorerDataItem, match: rd.Match, feature: frzf.Feature, - locations: List[Address], + locations: list[Address], doc: rd.ResultDocument, ): """process capa doc feature node diff --git a/capa/ida/plugin/view.py b/capa/ida/plugin/view.py index b93c31a8c..d5aa113be 100644 --- a/capa/ida/plugin/view.py +++ b/capa/ida/plugin/view.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import re -from typing import Dict, Optional +from typing import Optional from collections import Counter import idc @@ -1019,7 +1019,7 @@ def new_leaf_node(self, parent, data, feature=None): return o - def load_features(self, file_features, func_features: Optional[Dict] = None): + def load_features(self, file_features, func_features: Optional[dict] = None): """ """ self.parse_features_for_tree(self.new_parent_node(self, ("File Scope",)), file_features) if func_features: diff --git a/capa/loader.py b/capa/loader.py index f481d7b8d..700d1a3ba 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -10,7 +10,7 @@ import logging import datetime import contextlib -from typing import Set, Dict, List, Optional +from typing import Optional from pathlib import Path from rich.console import Console @@ -128,7 +128,7 @@ def get_meta_str(vw): return f"{', '.join(meta)}, number of functions: {len(vw.getFunctions())}" -def get_workspace(path: Path, input_format: str, sigpaths: List[Path]): +def get_workspace(path: Path, input_format: str, sigpaths: list[Path]): """ load the program at the given path into a vivisect workspace using the given format. also apply the given FLIRT signatures. @@ -198,7 +198,7 @@ def get_extractor( input_format: str, os_: str, backend: str, - sigpaths: List[Path], + sigpaths: list[Path], should_save_workspace=False, disable_progress=False, sample_path: Optional[Path] = None, @@ -346,7 +346,7 @@ def get_extractor( raise ValueError("unexpected backend: " + backend) -def _get_binexport2_file_extractors(input_file: Path) -> List[FeatureExtractor]: +def _get_binexport2_file_extractors(input_file: Path) -> list[FeatureExtractor]: # I'm not sure this is where this logic should live, but it works for now. # we'll keep this a "private" routine until we're sure. import capa.features.extractors.binexport2 @@ -368,8 +368,8 @@ def _get_binexport2_file_extractors(input_file: Path) -> List[FeatureExtractor]: return [] -def get_file_extractors(input_file: Path, input_format: str) -> List[FeatureExtractor]: - file_extractors: List[FeatureExtractor] = [] +def get_file_extractors(input_file: Path, input_format: str) -> list[FeatureExtractor]: + file_extractors: list[FeatureExtractor] = [] # we use lazy importing here to avoid eagerly loading dependencies # that some specialized environments may not have, @@ -416,11 +416,11 @@ def get_file_extractors(input_file: Path, input_format: str) -> List[FeatureExtr return file_extractors -def get_signatures(sigs_path: Path) -> List[Path]: +def get_signatures(sigs_path: Path) -> list[Path]: if not sigs_path.exists(): raise IOError(f"signatures path {sigs_path} does not exist or cannot be accessed") - paths: List[Path] = [] + paths: list[Path] = [] if sigs_path.is_file(): paths.append(sigs_path) elif sigs_path.is_dir(): @@ -478,11 +478,11 @@ def get_sample_analysis(format_, arch, os_, extractor, rules_path, counts): def collect_metadata( - argv: List[str], + argv: list[str], input_path: Path, input_format: str, os_: str, - rules_path: List[Path], + rules_path: list[Path], extractor: FeatureExtractor, counts: dict, ) -> rdoc.Metadata: @@ -545,7 +545,7 @@ def compute_dynamic_layout( """ assert isinstance(extractor, DynamicFeatureExtractor) - matched_calls: Set[Address] = set() + matched_calls: set[Address] = set() def result_rec(result: capa.features.common.Result): for loc in result.locations: @@ -558,14 +558,14 @@ def result_rec(result: capa.features.common.Result): for _, result in matches: result_rec(result) - names_by_process: Dict[Address, str] = {} - names_by_call: Dict[Address, str] = {} + names_by_process: dict[Address, str] = {} + names_by_call: dict[Address, str] = {} - matched_processes: Set[Address] = set() - matched_threads: Set[Address] = set() + matched_processes: set[Address] = set() + matched_threads: set[Address] = set() - threads_by_process: Dict[Address, List[Address]] = {} - calls_by_thread: Dict[Address, List[Address]] = {} + threads_by_process: dict[Address, list[Address]] = {} + calls_by_thread: dict[Address, list[Address]] = {} for p in extractor.get_processes(): threads_by_process[p.address] = [] @@ -625,8 +625,8 @@ def compute_static_layout(rules: RuleSet, extractor: StaticFeatureExtractor, cap otherwise, we may pollute the json document with a large amount of un-referenced data. """ - functions_by_bb: Dict[Address, Address] = {} - bbs_by_function: Dict[Address, List[Address]] = {} + functions_by_bb: dict[Address, Address] = {} + bbs_by_function: dict[Address, list[Address]] = {} for f in extractor.get_functions(): bbs_by_function[f.address] = [] for bb in extractor.get_basic_blocks(f): diff --git a/capa/main.py b/capa/main.py index ca1434d50..46619a66a 100644 --- a/capa/main.py +++ b/capa/main.py @@ -17,7 +17,7 @@ import textwrap import contextlib from types import TracebackType -from typing import Any, Set, Dict, List, Optional, TypedDict +from typing import Any, Optional, TypedDict from pathlib import Path import colorama @@ -129,8 +129,8 @@ class FilterConfig(TypedDict, total=False): - processes: Set[int] - functions: Set[int] + processes: set[int] + functions: set[int] @contextlib.contextmanager @@ -170,7 +170,7 @@ def get_default_root() -> Path: return Path(__file__).resolve().parent.parent -def get_default_signatures() -> List[Path]: +def get_default_signatures() -> list[Path]: """ compute a list of file system paths to the default FLIRT signatures. """ @@ -214,7 +214,7 @@ def install_common_args(parser, wanted=None): args: parser (argparse.ArgumentParser): a parser to update in place, adding common arguments. - wanted (Set[str]): collection of arguments to opt-into, including: + wanted (set[str]): collection of arguments to opt-into, including: - "input_file": required positional argument to input file. - "format": flag to override file format. - "os": flag to override file operating system. @@ -457,7 +457,7 @@ def handle_common_args(args): args.input_file = Path(args.input_file) if hasattr(args, "rules"): - rules_paths: List[Path] = [] + rules_paths: list[Path] = [] if args.rules == [RULES_PATH_DEFAULT_STRING]: logger.debug("-" * 80) @@ -695,7 +695,7 @@ def get_rules_from_cli(args) -> RuleSet: return rules -def get_file_extractors_from_cli(args, input_format: str) -> List[FeatureExtractor]: +def get_file_extractors_from_cli(args, input_format: str) -> list[FeatureExtractor]: """ args: args: The parsed command line arguments from `install_common_args`. @@ -741,7 +741,7 @@ def get_file_extractors_from_cli(args, input_format: str) -> List[FeatureExtract raise ShouldExitError(E_INVALID_FILE_TYPE) from e -def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: List[FeatureExtractor]) -> bool: +def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]) -> bool: """ args: args: The parsed command line arguments from `install_common_args`. @@ -776,7 +776,7 @@ def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: List[F return found_file_limitation -def get_signatures_from_cli(args, input_format: str, backend: str) -> List[Path]: +def get_signatures_from_cli(args, input_format: str, backend: str) -> list[Path]: if backend != BACKEND_VIV: logger.debug("skipping library code matching: only supported by the vivisect backend") return [] @@ -896,7 +896,7 @@ def apply_extractor_filters(extractor: FeatureExtractor, extractor_filters: Filt raise ShouldExitError(E_INVALID_FEATURE_EXTRACTOR) -def main(argv: Optional[List[str]] = None): +def main(argv: Optional[list[str]] = None): if sys.version_info < (3, 10): raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+") @@ -967,7 +967,7 @@ def main(argv: Optional[List[str]] = None): meta: rdoc.Metadata capabilities: MatchResults - counts: Dict[str, Any] + counts: dict[str, Any] if input_format == FORMAT_RESULT: # result document directly parses into meta, capabilities diff --git a/capa/perf.py b/capa/perf.py index 2dcdb4603..38962222f 100644 --- a/capa/perf.py +++ b/capa/perf.py @@ -5,11 +5,10 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -import typing import collections # this structure is unstable and may change before the next major release. -counters: typing.Counter[str] = collections.Counter() +counters: collections.Counter[str] = collections.Counter() def reset(): diff --git a/capa/render/proto/__init__.py b/capa/render/proto/__init__.py index ed4c690e1..dbde5949c 100644 --- a/capa/render/proto/__init__.py +++ b/capa/render/proto/__init__.py @@ -25,7 +25,7 @@ Alternatively, --pyi_out=. can be used to generate a Python Interface file that supports development """ import datetime -from typing import Any, Dict, Union +from typing import Any, Union import google.protobuf.json_format @@ -553,7 +553,7 @@ def rule_metadata_to_pb2(rule_metadata: rd.RuleMetadata) -> capa_pb2.RuleMetadat def doc_to_pb2(doc: rd.ResultDocument) -> capa_pb2.ResultDocument: - rule_matches: Dict[str, capa_pb2.RuleMatches] = {} + rule_matches: dict[str, capa_pb2.RuleMatches] = {} for rule_name, matches in doc.rules.items(): m = capa_pb2.RuleMatches( meta=rule_metadata_to_pb2(matches.meta), @@ -977,7 +977,7 @@ def rule_metadata_from_pb2(pb: capa_pb2.RuleMetadata) -> rd.RuleMetadata: def doc_from_pb2(doc: capa_pb2.ResultDocument) -> rd.ResultDocument: - rule_matches: Dict[str, rd.RuleMatches] = {} + rule_matches: dict[str, rd.RuleMatches] = {} for rule_name, matches in doc.rules.items(): m = rd.RuleMatches( meta=rule_metadata_from_pb2(matches.meta), diff --git a/capa/render/result_document.py b/capa/render/result_document.py index ab6b03979..8aece5c9c 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -8,11 +8,10 @@ import datetime import collections from enum import Enum -from typing import Dict, List, Tuple, Union, Literal, Optional +from typing import Union, Literal, Optional, TypeAlias from pathlib import Path from pydantic import Field, BaseModel, ConfigDict -from typing_extensions import TypeAlias import capa.rules import capa.engine @@ -46,7 +45,7 @@ class BasicBlockLayout(Model): class FunctionLayout(Model): address: frz.Address - matched_basic_blocks: Tuple[BasicBlockLayout, ...] + matched_basic_blocks: tuple[BasicBlockLayout, ...] class CallLayout(Model): @@ -56,21 +55,21 @@ class CallLayout(Model): class ThreadLayout(Model): address: frz.Address - matched_calls: Tuple[CallLayout, ...] + matched_calls: tuple[CallLayout, ...] class ProcessLayout(Model): address: frz.Address name: str - matched_threads: Tuple[ThreadLayout, ...] + matched_threads: tuple[ThreadLayout, ...] class StaticLayout(Model): - functions: Tuple[FunctionLayout, ...] + functions: tuple[FunctionLayout, ...] class DynamicLayout(Model): - processes: Tuple[ProcessLayout, ...] + processes: tuple[ProcessLayout, ...] Layout: TypeAlias = Union[StaticLayout, DynamicLayout] @@ -93,12 +92,12 @@ class ProcessFeatureCount(Model): class StaticFeatureCounts(Model): file: int - functions: Tuple[FunctionFeatureCount, ...] + functions: tuple[FunctionFeatureCount, ...] class DynamicFeatureCounts(Model): file: int - processes: Tuple[ProcessFeatureCount, ...] + processes: tuple[ProcessFeatureCount, ...] FeatureCounts: TypeAlias = Union[StaticFeatureCounts, DynamicFeatureCounts] @@ -109,11 +108,11 @@ class StaticAnalysis(Model): arch: str os: str extractor: str - rules: Tuple[str, ...] + rules: tuple[str, ...] base_address: frz.Address layout: StaticLayout feature_counts: StaticFeatureCounts - library_functions: Tuple[LibraryFunction, ...] + library_functions: tuple[LibraryFunction, ...] class DynamicAnalysis(Model): @@ -121,7 +120,7 @@ class DynamicAnalysis(Model): arch: str os: str extractor: str - rules: Tuple[str, ...] + rules: tuple[str, ...] layout: DynamicLayout feature_counts: DynamicFeatureCounts @@ -137,7 +136,7 @@ class Flavor(str, Enum): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] + argv: Optional[tuple[str, ...]] sample: Sample flavor: Flavor analysis: Analysis @@ -254,7 +253,7 @@ def node_from_capa(node: Union[capa.engine.Statement, capa.engine.Feature]) -> N def node_to_capa( - node: Node, children: List[Union[capa.engine.Statement, capa.engine.Feature]] + node: Node, children: list[Union[capa.engine.Statement, capa.engine.Feature]] ) -> Union[capa.engine.Statement, capa.engine.Feature]: if isinstance(node, StatementNode): if isinstance(node.statement, CompoundStatement): @@ -313,9 +312,9 @@ class Match(FrozenModel): success: bool node: Node - children: Tuple["Match", ...] - locations: Tuple[frz.Address, ...] - captures: Dict[str, Tuple[frz.Address, ...]] + children: tuple["Match", ...] + locations: tuple[frz.Address, ...] + captures: dict[str, tuple[frz.Address, ...]] @classmethod def from_capa( @@ -435,7 +434,7 @@ def from_capa( captures={capture: tuple(captures[capture]) for capture in captures}, ) - def to_capa(self, rules_by_name: Dict[str, capa.rules.Rule]) -> capa.engine.Result: + def to_capa(self, rules_by_name: dict[str, capa.rules.Rule]) -> capa.engine.Result: children = [child.to_capa(rules_by_name) for child in self.children] statement = node_to_capa(self.node, [child.statement for child in children]) @@ -492,7 +491,7 @@ class AttackSpec(FrozenModel): id: like `Identifier` above, perhaps "T1059.006" """ - parts: Tuple[str, ...] + parts: tuple[str, ...] tactic: str technique: str subtechnique: str @@ -532,7 +531,7 @@ class MBCSpec(FrozenModel): id: like `Identifier` above, perhaps "E1056.m01" """ - parts: Tuple[str, ...] + parts: tuple[str, ...] objective: str behavior: str method: str @@ -572,12 +571,12 @@ class MaecMetadata(FrozenModel): class RuleMetadata(FrozenModel): name: str namespace: Optional[str] = None - authors: Tuple[str, ...] + authors: tuple[str, ...] scopes: capa.rules.Scopes - attack: Tuple[AttackSpec, ...] = Field(alias="att&ck") - mbc: Tuple[MBCSpec, ...] - references: Tuple[str, ...] - examples: Tuple[str, ...] + attack: tuple[AttackSpec, ...] = Field(alias="att&ck") + mbc: tuple[MBCSpec, ...] + references: tuple[str, ...] + examples: tuple[str, ...] description: str lib: bool = Field(False, alias="lib") @@ -621,16 +620,16 @@ class RuleMatches(FrozenModel): meta: RuleMetadata source: str - matches: Tuple[Tuple[frz.Address, Match], ...] + matches: tuple[tuple[frz.Address, Match], ...] class ResultDocument(FrozenModel): meta: Metadata - rules: Dict[str, RuleMatches] + rules: dict[str, RuleMatches] @classmethod def from_capa(cls, meta: Metadata, rules: RuleSet, capabilities: MatchResults) -> "ResultDocument": - rule_matches: Dict[str, RuleMatches] = {} + rule_matches: dict[str, RuleMatches] = {} for rule_name, matches in capabilities.items(): rule = rules[rule_name] @@ -648,8 +647,8 @@ def from_capa(cls, meta: Metadata, rules: RuleSet, capabilities: MatchResults) - return ResultDocument(meta=meta, rules=rule_matches) - def to_capa(self) -> Tuple[Metadata, Dict]: - capabilities: Dict[str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]] = ( + def to_capa(self) -> tuple[Metadata, dict]: + capabilities: dict[str, list[tuple[capa.features.address.Address, capa.features.common.Result]]] = ( collections.defaultdict(list) ) diff --git a/capa/render/utils.py b/capa/render/utils.py index 73ed1d296..6f42f249a 100644 --- a/capa/render/utils.py +++ b/capa/render/utils.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import io -from typing import Dict, List, Tuple, Union, Iterator, Optional +from typing import Union, Iterator, Optional import rich.console from rich.progress import Text @@ -41,7 +41,7 @@ def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]): return f"{'::'.join(data.parts)} [{data.id}]" -def sort_rules(rules: Dict[str, rd.RuleMatches]) -> List[Tuple[Optional[str], str, rd.RuleMatches]]: +def sort_rules(rules: dict[str, rd.RuleMatches]) -> list[tuple[Optional[str], str, rd.RuleMatches]]: """Sort rules by namespace and name.""" return sorted((rule.meta.namespace or "", rule.meta.name, rule) for rule in rules.values()) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 9c45119e5..c021bfbb0 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging import textwrap -from typing import Dict, Iterable, Optional +from typing import Iterable, Optional from rich.text import Text from rich.table import Table @@ -323,7 +323,7 @@ def render_rules(console: Console, doc: rd.ResultDocument): """ import capa.render.verbose as v - functions_by_bb: Dict[capa.features.address.Address, capa.features.address.Address] = {} + functions_by_bb: dict[capa.features.address.Address, capa.features.address.Address] = {} if isinstance(doc.meta.analysis, rd.StaticAnalysis): for finfo in doc.meta.analysis.layout.functions: faddress = finfo.address.to_capa() diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 1d7f13ed1..11c0c0c2a 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -26,7 +26,7 @@ # https://github.com/python/mypy/issues/1153 from backports.functools_lru_cache import lru_cache # type: ignore -from typing import Any, Set, Dict, List, Tuple, Union, Callable, Iterator, Optional, cast +from typing import Any, Union, Callable, Iterator, Optional, cast from dataclasses import asdict, dataclass import yaml @@ -132,10 +132,10 @@ def __repr__(self) -> str: raise ValueError("invalid rules class. at least one scope must be specified") @classmethod - def from_dict(self, scopes: Dict[str, str]) -> "Scopes": + def from_dict(self, scopes: dict[str, str]) -> "Scopes": # make local copy so we don't make changes outside of this routine. # we'll use the value None to indicate the scope is not supported. - scopes_: Dict[str, Optional[str]] = dict(scopes) + scopes_: dict[str, Optional[str]] = dict(scopes) # mark non-specified scopes as invalid if "static" not in scopes_: @@ -168,7 +168,7 @@ def from_dict(self, scopes: Dict[str, str]) -> "Scopes": ) -SUPPORTED_FEATURES: Dict[str, Set] = { +SUPPORTED_FEATURES: dict[str, set] = { Scope.GLOBAL: { # these will be added to other scopes, see below. capa.features.common.OS, @@ -297,7 +297,7 @@ def __repr__(self): def ensure_feature_valid_for_scopes(scopes: Scopes, feature: Union[Feature, Statement]): # construct a dict of all supported features - supported_features: Set = set() + supported_features: set = set() if scopes.static: supported_features.update(SUPPORTED_FEATURES[scopes.static]) if scopes.dynamic: @@ -322,12 +322,12 @@ def ensure_feature_valid_for_scopes(scopes: Scopes, feature: Union[Feature, Stat def translate_com_feature(com_name: str, com_type: ComType) -> ceng.Statement: com_db = capa.features.com.load_com_database(com_type) - guids: Optional[List[str]] = com_db.get(com_name) + guids: Optional[list[str]] = com_db.get(com_name) if not guids: logger.error(" %s doesn't exist in COM %s database", com_name, com_type) raise InvalidRule(f"'{com_name}' doesn't exist in COM {com_type} database") - com_features: List[Feature] = [] + com_features: list[Feature] = [] for guid in guids: hex_chars = guid.replace("-", "") h = [hex_chars[i : i + 2] for i in range(0, len(hex_chars), 2)] @@ -823,11 +823,11 @@ def build_statements(d, scopes: Scopes): return feature -def first(s: List[Any]) -> Any: +def first(s: list[Any]) -> Any: return s[0] -def second(s: List[Any]) -> Any: +def second(s: list[Any]) -> Any: return s[1] @@ -853,13 +853,13 @@ def get_dependencies(self, namespaces): compute the transitive dependency graph themself, if they want it. Args: - namespaces(Dict[str, List[Rule]]): mapping from namespace name to rules in it. + namespaces(dict[str, list[Rule]]): mapping from namespace name to rules in it. see `index_rules_by_namespace`. Returns: - List[str]: names of rules upon which this rule depends. + list[str]: names of rules upon which this rule depends. """ - deps: Set[str] = set() + deps: set[str] = set() def rec(statement): if isinstance(statement, capa.features.common.MatchedRule): @@ -968,8 +968,8 @@ def extract_subscope_rules(self): yield from self._extract_subscope_rules_rec(self.statement) - def _extract_all_features_rec(self, statement) -> Set[Feature]: - feature_set: Set[Feature] = set() + def _extract_all_features_rec(self, statement) -> set[Feature]: + feature_set: set[Feature] = set() for child in statement.get_children(): if isinstance(child, Statement): @@ -978,7 +978,7 @@ def _extract_all_features_rec(self, statement) -> Set[Feature]: feature_set.add(child) return feature_set - def extract_all_features(self) -> Set[Feature]: + def extract_all_features(self) -> set[Feature]: """ recursively extracts all feature statements in this rule. @@ -1001,7 +1001,7 @@ def evaluate(self, features: FeatureSet, short_circuit=True): return self.statement.evaluate(features, short_circuit=short_circuit) @classmethod - def from_dict(cls, d: Dict[str, Any], definition: str) -> "Rule": + def from_dict(cls, d: dict[str, Any], definition: str) -> "Rule": meta = d["rule"]["meta"] name = meta["name"] @@ -1214,14 +1214,14 @@ def move_to_end(m, k): return doc -def get_rules_with_scope(rules, scope: Scope) -> List[Rule]: +def get_rules_with_scope(rules, scope: Scope) -> list[Rule]: """ from the given collection of rules, select those with the given scope. """ return [rule for rule in rules if scope in rule.scopes] -def get_rules_and_dependencies(rules: List[Rule], rule_name: str) -> Iterator[Rule]: +def get_rules_and_dependencies(rules: list[Rule], rule_name: str) -> Iterator[Rule]: """ from the given collection of rules, select a rule and its dependencies (transitively). """ @@ -1249,7 +1249,7 @@ def rec(rule: Rule): yield rule -def ensure_rules_are_unique(rules: List[Rule]) -> None: +def ensure_rules_are_unique(rules: list[Rule]) -> None: seen = set() for rule in rules: if rule.name in seen: @@ -1257,7 +1257,7 @@ def ensure_rules_are_unique(rules: List[Rule]) -> None: seen.add(rule.name) -def ensure_rule_dependencies_are_met(rules: List[Rule]) -> None: +def ensure_rule_dependencies_are_met(rules: list[Rule]) -> None: """ raise an exception if a rule dependency does not exist. @@ -1274,7 +1274,7 @@ def ensure_rule_dependencies_are_met(rules: List[Rule]) -> None: raise InvalidRule(f'rule "{rule.name}" depends on missing rule "{dep}"') -def index_rules_by_namespace(rules: List[Rule]) -> Dict[str, List[Rule]]: +def index_rules_by_namespace(rules: list[Rule]) -> dict[str, list[Rule]]: """ compute the rules that fit into each namespace found within the given rules. @@ -1303,7 +1303,7 @@ def index_rules_by_namespace(rules: List[Rule]) -> Dict[str, List[Rule]]: return dict(namespaces) -def topologically_order_rules(rules: List[Rule]) -> List[Rule]: +def topologically_order_rules(rules: list[Rule]) -> list[Rule]: """ order the given rules such that dependencies show up before dependents. this means that as we match rules, we can add features for the matches, and these @@ -1351,7 +1351,7 @@ class RuleSet: def __init__( self, - rules: List[Rule], + rules: list[Rule], ): super().__init__() @@ -1389,7 +1389,7 @@ def __init__( self.rules_by_scope = {scope: self._get_rules_for_scope(rules, scope) for scope in scopes} # these structures are unstable and may change before the next major release. - scores_by_rule: Dict[str, int] = {} + scores_by_rule: dict[str, int] = {} self._feature_indexes_by_scopes = { scope: self._index_rules_by_feature(scope, self.rules_by_scope[scope], scores_by_rule) for scope in scopes } @@ -1433,7 +1433,7 @@ def __contains__(self, rulename): # this routine is unstable and may change before the next major release. @staticmethod - def _score_feature(scores_by_rule: Dict[str, int], node: capa.features.common.Feature) -> int: + def _score_feature(scores_by_rule: dict[str, int], node: capa.features.common.Feature) -> int: """ Score the given feature by how "uncommon" we think it will be. Features that we expect to be very selective (ie. uniquely identify a rule and be required to match), @@ -1577,17 +1577,17 @@ def _score_feature(scores_by_rule: Dict[str, int], node: capa.features.common.Fe @dataclass class _RuleFeatureIndex: # Mapping from hashable feature to a list of rules that might have this feature. - rules_by_feature: Dict[Feature, Set[str]] + rules_by_feature: dict[Feature, set[str]] # Mapping from rule name to list of Regex/Substring features that have to match. # All these features will be evaluated whenever a String feature is encountered. - string_rules: Dict[str, List[Feature]] + string_rules: dict[str, list[Feature]] # Mapping from rule name to list of Bytes features that have to match. # All these features will be evaluated whenever a Bytes feature is encountered. - bytes_rules: Dict[str, List[Feature]] + bytes_rules: dict[str, list[Feature]] # this routine is unstable and may change before the next major release. @staticmethod - def _index_rules_by_feature(scope: Scope, rules: List[Rule], scores_by_rule: Dict[str, int]) -> _RuleFeatureIndex: + def _index_rules_by_feature(scope: Scope, rules: list[Rule], scores_by_rule: dict[str, int]) -> _RuleFeatureIndex: """ Index the given rules by their minimal set of most "uncommon" features required to match. @@ -1595,12 +1595,12 @@ def _index_rules_by_feature(scope: Scope, rules: List[Rule], scores_by_rule: Dic (which are not hashable and require a scan) that have to match, too. """ - rules_by_feature: Dict[Feature, Set[str]] = collections.defaultdict(set) + rules_by_feature: dict[Feature, set[str]] = collections.defaultdict(set) def rec( rule_name: str, node: Union[Feature, Statement], - ) -> Optional[Tuple[int, Set[Feature]]]: + ) -> Optional[tuple[int, set[Feature]]]: """ Walk through a rule's logic tree, picking the features to use for indexing, returning the feature and an associated score. @@ -1667,7 +1667,7 @@ def rec( # # In this case, we prefer to pick the pair of API features since each is expected # to be more common than the mnemonic. - scores: List[Tuple[int, Set[Feature]]] = [] + scores: list[tuple[int, set[Feature]]] = [] for child in node.children: score = rec(rule_name, child) @@ -1734,8 +1734,8 @@ def and_score_key(item): # These are the Regex/Substring/Bytes features that we have to use for filtering. # Ideally we find a way to get rid of all of these, eventually. - string_rules: Dict[str, List[Feature]] = {} - bytes_rules: Dict[str, List[Feature]] = {} + string_rules: dict[str, list[Feature]] = {} + bytes_rules: dict[str, list[Feature]] = {} for rule in rules: rule_name = rule.meta["name"] @@ -1765,10 +1765,10 @@ def and_score_key(item): logger.debug(" : [%d] %s", RuleSet._score_feature(scores_by_rule, feature), feature) if string_features: - string_rules[rule_name] = cast(List[Feature], string_features) + string_rules[rule_name] = cast(list[Feature], string_features) if bytes_features: - bytes_rules[rule_name] = cast(List[Feature], bytes_features) + bytes_rules[rule_name] = cast(list[Feature], bytes_features) for feature in hashable_features: rules_by_feature[feature].add(rule_name) @@ -1785,7 +1785,7 @@ def and_score_key(item): return RuleSet._RuleFeatureIndex(rules_by_feature, string_rules, bytes_rules) @staticmethod - def _get_rules_for_scope(rules, scope) -> List[Rule]: + def _get_rules_for_scope(rules, scope) -> list[Rule]: """ given a collection of rules, collect the rules that are needed at the given scope. these rules are ordered topologically. @@ -1793,7 +1793,7 @@ def _get_rules_for_scope(rules, scope) -> List[Rule]: don't include auto-generated "subscope" rules. we want to include general "lib" rules here - even if they are not dependencies of other rules, see #398 """ - scope_rules: Set[Rule] = set() + scope_rules: set[Rule] = set() # we need to process all rules, not just rules with the given scope. # this is because rules with a higher scope, e.g. file scope, may have subscope rules @@ -1807,7 +1807,7 @@ def _get_rules_for_scope(rules, scope) -> List[Rule]: return get_rules_with_scope(topologically_order_rules(list(scope_rules)), scope) @staticmethod - def _extract_subscope_rules(rules) -> List[Rule]: + def _extract_subscope_rules(rules) -> list[Rule]: """ process the given sequence of rules. for each one, extract any embedded subscope rules into their own rule. @@ -1854,16 +1854,16 @@ def filter_rules_by_meta(self, tag: str) -> "RuleSet": # this routine is unstable and may change before the next major release. @staticmethod - def _sort_rules_by_index(rule_index_by_rule_name: Dict[str, int], rules: List[Rule]): + def _sort_rules_by_index(rule_index_by_rule_name: dict[str, int], rules: list[Rule]): """ - Sort (in place) the given rules by their index provided by the given Dict. + Sort (in place) the given rules by their index provided by the given dict. This mapping is intended to represent the topologic index of the given rule; that is, rules with a lower index should be evaluated first, since their dependencies will be evaluated later. """ rules.sort(key=lambda r: rule_index_by_rule_name[r.name]) - def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[FeatureSet, ceng.MatchResults]: + def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[FeatureSet, ceng.MatchResults]: """ Match rules from this ruleset at the given scope against the given features. @@ -1872,7 +1872,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[Fea """ feature_index: RuleSet._RuleFeatureIndex = self._feature_indexes_by_scopes[scope] - rules: List[Rule] = self.rules_by_scope[scope] + rules: list[Rule] = self.rules_by_scope[scope] # Topologic location of rule given its name. # That is, rules with a lower index should be evaluated first, since their dependencies # will be evaluated later. @@ -1908,7 +1908,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[Fea # Find all the rules that could match the given feature set. # Ideally we want this set to be as small and focused as possible, # and we can tune it by tweaking `_index_rules_by_feature`. - candidate_rule_names: Set[str] = set() + candidate_rule_names: set[str] = set() for feature in features: candidate_rule_names.update(feature_index.rules_by_feature.get(feature, ())) @@ -2018,7 +2018,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[Fea new_features.append(capa.features.common.MatchedRule(namespace)) if new_features: - new_candidates: List[str] = [] + new_candidates: list[str] = [] for new_feature in new_features: new_candidates.extend(feature_index.rules_by_feature.get(new_feature, ())) @@ -2031,7 +2031,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[Fea def match( self, scope: Scope, features: FeatureSet, addr: Address, paranoid=False - ) -> Tuple[FeatureSet, ceng.MatchResults]: + ) -> tuple[FeatureSet, ceng.MatchResults]: """ Match rules from this ruleset at the given scope against the given features. @@ -2053,7 +2053,7 @@ def match( features, matches = self._match(scope, features, addr) if paranoid: - rules: List[Rule] = self.rules_by_scope[scope] + rules: list[Rule] = self.rules_by_scope[scope] paranoid_features, paranoid_matches = capa.engine.match(rules, features, addr) if features != paranoid_features: @@ -2086,7 +2086,7 @@ def is_nursery_rule_path(path: Path) -> bool: return "nursery" in path.parts -def collect_rule_file_paths(rule_paths: List[Path]) -> List[Path]: +def collect_rule_file_paths(rule_paths: list[Path]) -> list[Path]: """ collect all rule file paths, including those in subdirectories. """ @@ -2127,7 +2127,7 @@ def on_load_rule_default(_path: RulePath, i: int, _total: int) -> None: def get_rules( - rule_paths: List[RulePath], + rule_paths: list[RulePath], cache_dir=None, on_load_rule: Callable[[RulePath, int, int], None] = on_load_rule_default, enable_cache: bool = True, @@ -2154,7 +2154,7 @@ def get_rules( if ruleset is not None: return ruleset - rules: List[Rule] = [] + rules: list[Rule] = [] total_rule_count = len(rule_file_paths) for i, (path, content) in enumerate(zip(rule_file_paths, rule_contents)): diff --git a/capa/rules/cache.py b/capa/rules/cache.py index 6f87570ef..57537e959 100644 --- a/capa/rules/cache.py +++ b/capa/rules/cache.py @@ -11,7 +11,7 @@ import pickle import hashlib import logging -from typing import List, Optional +from typing import Optional from pathlib import Path from dataclasses import dataclass @@ -26,7 +26,7 @@ CacheIdentifier = str -def compute_cache_identifier(rule_content: List[bytes]) -> CacheIdentifier: +def compute_cache_identifier(rule_content: list[bytes]) -> CacheIdentifier: hash = hashlib.sha256() # note that this changes with each release, @@ -96,7 +96,7 @@ def load(data): return cache -def get_ruleset_content(ruleset: capa.rules.RuleSet) -> List[bytes]: +def get_ruleset_content(ruleset: capa.rules.RuleSet) -> list[bytes]: rule_contents = [] for rule in ruleset.rules.values(): if rule.is_subscope_rule(): @@ -132,7 +132,7 @@ def cache_ruleset(cache_dir: Path, ruleset: capa.rules.RuleSet): return -def load_cached_ruleset(cache_dir: Path, rule_contents: List[bytes]) -> Optional[capa.rules.RuleSet]: +def load_cached_ruleset(cache_dir: Path, rule_contents: list[bytes]) -> Optional[capa.rules.RuleSet]: """ load a cached ruleset from disk, using the given cache directory. the raw rule contents are required here to prove that the rules haven't changed diff --git a/pyproject.toml b/pyproject.toml index 3416c3a9b..1cbc9b0e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ authors = [ description = "The FLARE team's open-source tool to identify capabilities in executable files." readme = {file = "README.md", content-type = "text/markdown"} license = {file = "LICENSE.txt"} -requires-python = ">=3.8.1" +requires-python = ">=3.10" keywords = ["malware analysis", "reverse engineering", "capability detection", "software behaviors", "capa", "FLARE"] classifiers = [ "Development Status :: 5 - Production/Stable", @@ -95,9 +95,7 @@ dependencies = [ # typically due to dropping support for python releases # we still support. - # TODO(williballenthin): networkx 3.2 doesn't support python 3.8 while capa does. - # https://github.com/mandiant/capa/issues/1966 - "networkx>=3,<3.2", + "networkx>=3", "dnfile>=0.15.0", ] diff --git a/scripts/capa-as-library.py b/scripts/capa-as-library.py index bb1c2102a..0555a0263 100644 --- a/scripts/capa-as-library.py +++ b/scripts/capa-as-library.py @@ -9,7 +9,7 @@ import json import collections -from typing import Any, Set, Dict +from typing import Any from pathlib import Path import capa.main @@ -34,7 +34,7 @@ def render_meta(doc: rd.ResultDocument, result): result["path"] = doc.meta.sample.path -def find_subrule_matches(doc: rd.ResultDocument) -> Set[str]: +def find_subrule_matches(doc: rd.ResultDocument) -> set[str]: """ collect the rule names that have been matched as a subrule match. this way we can avoid displaying entries for things that are too specific. @@ -158,8 +158,8 @@ def render_mbc(doc, result): result["MBC"].setdefault(objective.upper(), inner_rows) -def render_dictionary(doc: rd.ResultDocument) -> Dict[str, Any]: - result: Dict[str, Any] = {} +def render_dictionary(doc: rd.ResultDocument) -> dict[str, Any]: + result: dict[str, Any] = {} render_meta(doc, result) render_attack(doc, result) render_mbc(doc, result) diff --git a/scripts/capa2sarif.py b/scripts/capa2sarif.py index 79330479d..5d663b977 100644 --- a/scripts/capa2sarif.py +++ b/scripts/capa2sarif.py @@ -25,7 +25,7 @@ import json import logging import argparse -from typing import List, Optional +from typing import Optional from pathlib import Path from capa.version import __version__ @@ -241,7 +241,7 @@ def _populate_invocations(sarif_log: dict, meta_data: dict) -> None: sarif_log["runs"][0]["invocations"].append(invoke) -def _enumerate_evidence(node: dict, related_count: int) -> List[dict]: +def _enumerate_evidence(node: dict, related_count: int) -> list[dict]: related_locations = [] if node.get("success") and node.get("node", {}).get("type") != "statement": label = "" diff --git a/scripts/compare-backends.py b/scripts/compare-backends.py index 1c000bade..fa4ddb010 100644 --- a/scripts/compare-backends.py +++ b/scripts/compare-backends.py @@ -15,7 +15,7 @@ import statistics import subprocess import multiprocessing -from typing import Set, Dict, List, Optional +from typing import Optional from pathlib import Path from collections import Counter from dataclasses import dataclass @@ -183,8 +183,8 @@ def report(args): for backend in BACKENDS: samples.update(doc[backend].keys()) - failures_by_backend: Dict[str, Set[str]] = {backend: set() for backend in BACKENDS} - durations_by_backend: Dict[str, List[float]] = {backend: [] for backend in BACKENDS} + failures_by_backend: dict[str, set[str]] = {backend: set() for backend in BACKENDS} + durations_by_backend: dict[str, list[float]] = {backend: [] for backend in BACKENDS} console = rich.get_console() for key in sorted(samples): @@ -193,7 +193,7 @@ def report(args): seen_rules: Counter[str] = Counter() - rules_by_backend: Dict[str, Set[str]] = {backend: set() for backend in BACKENDS} + rules_by_backend: dict[str, set[str]] = {backend: set() for backend in BACKENDS} for backend in BACKENDS: if key not in doc[backend]: diff --git a/scripts/detect_duplicate_features.py b/scripts/detect_duplicate_features.py index 9561339c2..c904a1a05 100644 --- a/scripts/detect_duplicate_features.py +++ b/scripts/detect_duplicate_features.py @@ -8,7 +8,6 @@ import sys import logging import argparse -from typing import Set from pathlib import Path import capa.main @@ -18,7 +17,7 @@ logger = logging.getLogger("detect_duplicate_features") -def get_features(rule_path: str) -> Set[Feature]: +def get_features(rule_path: str) -> set[Feature]: """ Extracts all features from a given rule file. diff --git a/scripts/inspect-binexport2.py b/scripts/inspect-binexport2.py index de2c82d86..07fc79eca 100644 --- a/scripts/inspect-binexport2.py +++ b/scripts/inspect-binexport2.py @@ -14,7 +14,7 @@ import logging import argparse import contextlib -from typing import Dict, List, Optional +from typing import Optional import capa.main import capa.features.extractors.binexport2 @@ -71,14 +71,14 @@ def getvalue(self): def _render_expression_tree( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], tree_index: int, o: io.StringIO, ): expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.REGISTER: o.write(expression.symbol) @@ -177,7 +177,7 @@ def _render_expression_tree( raise NotImplementedError(expression.type) -_OPERAND_CACHE: Dict[int, str] = {} +_OPERAND_CACHE: dict[int, str] = {} def render_operand(be2: BinExport2, operand: BinExport2.Operand, index: Optional[int] = None) -> str: @@ -223,7 +223,7 @@ def inspect_operand(be2: BinExport2, operand: BinExport2.Operand): def rec(tree_index, indent=0): expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] NEWLINE = "\n" print(f" {' ' * indent}expression: {str(expression).replace(NEWLINE, ', ')}") @@ -435,7 +435,7 @@ def main(argv=None): # appears to be code continue - data_xrefs: List[int] = [] + data_xrefs: list[int] = [] for data_reference_index in idx.data_reference_index_by_target_address[data_address]: data_reference = be2.data_reference[data_reference_index] instruction_address = idx.get_insn_address(data_reference.instruction_index) diff --git a/scripts/lint.py b/scripts/lint.py index e96604e64..0d6ebfa93 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -27,7 +27,6 @@ import argparse import itertools import posixpath -from typing import Set, Dict, List from pathlib import Path from dataclasses import field, dataclass @@ -59,10 +58,10 @@ class Context: capabilities_by_sample: cache of results, indexed by file path. """ - samples: Dict[str, Path] + samples: dict[str, Path] rules: RuleSet is_thorough: bool - capabilities_by_sample: Dict[Path, Set[str]] = field(default_factory=dict) + capabilities_by_sample: dict[Path, set[str]] = field(default_factory=dict) class Lint: @@ -330,7 +329,7 @@ def check_rule(self, ctx: Context, rule: Rule): DEFAULT_SIGNATURES = capa.main.get_default_signatures() -def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]: +def get_sample_capabilities(ctx: Context, path: Path) -> set[str]: nice_path = path.resolve().absolute() if path in ctx.capabilities_by_sample: logger.debug("found cached results: %s: %d capabilities", nice_path, len(ctx.capabilities_by_sample[path])) @@ -541,7 +540,7 @@ class FeatureStringTooShort(Lint): name = "feature string too short" recommendation = 'capa only extracts strings with length >= 4; will not match on "{:s}"' - def check_features(self, ctx: Context, features: List[Feature]): + def check_features(self, ctx: Context, features: list[Feature]): for feature in features: if isinstance(feature, (String, Substring)): assert isinstance(feature.value, str) @@ -559,7 +558,7 @@ class FeatureNegativeNumber(Lint): + 'representation; will not match on "{:d}"' ) - def check_features(self, ctx: Context, features: List[Feature]): + def check_features(self, ctx: Context, features: list[Feature]): for feature in features: if isinstance(feature, (capa.features.insn.Number,)): assert isinstance(feature.value, int) @@ -577,7 +576,7 @@ class FeatureNtdllNtoskrnlApi(Lint): + "module requirement to improve detection" ) - def check_features(self, ctx: Context, features: List[Feature]): + def check_features(self, ctx: Context, features: list[Feature]): for feature in features: if isinstance(feature, capa.features.insn.API): assert isinstance(feature.value, str) @@ -712,7 +711,7 @@ def run_lints(lints, ctx: Context, rule: Rule): yield lint -def run_feature_lints(lints, ctx: Context, features: List[Feature]): +def run_feature_lints(lints, ctx: Context, features: list[Feature]): for lint in lints: if lint.check_features(ctx, features): yield lint @@ -900,7 +899,7 @@ def width(s, count): def lint(ctx: Context): """ - Returns: Dict[string, Tuple(int, int)] + Returns: dict[string, tuple(int, int)] - # lints failed - # lints warned """ @@ -920,7 +919,7 @@ def lint(ctx: Context): return ret -def collect_samples(samples_path: Path) -> Dict[str, Path]: +def collect_samples(samples_path: Path) -> dict[str, Path]: """ recurse through the given path, collecting all file paths, indexed by their content sha256, md5, and filename. """ diff --git a/scripts/setup-linter-dependencies.py b/scripts/setup-linter-dependencies.py index b4ae3fd1f..4c2b321eb 100644 --- a/scripts/setup-linter-dependencies.py +++ b/scripts/setup-linter-dependencies.py @@ -43,7 +43,6 @@ import logging import argparse from sys import argv -from typing import Dict, List from pathlib import Path import requests @@ -77,7 +76,7 @@ def __init__(self): self._memory_store = MemoryStore(stix_data=stix_json["objects"]) @staticmethod - def _remove_deprecated_objects(stix_objects) -> List[AttackPattern]: + def _remove_deprecated_objects(stix_objects) -> list[AttackPattern]: """Remove any revoked or deprecated objects from queries made to the data source.""" return list( filter( @@ -86,7 +85,7 @@ def _remove_deprecated_objects(stix_objects) -> List[AttackPattern]: ) ) - def _get_tactics(self) -> List[Dict]: + def _get_tactics(self) -> list[dict]: """Get tactics IDs from Mitre matrix.""" # Only one matrix for enterprise att&ck framework matrix = self._remove_deprecated_objects( @@ -98,7 +97,7 @@ def _get_tactics(self) -> List[Dict]: )[0] return list(map(self._memory_store.get, matrix["tactic_refs"])) - def _get_techniques_from_tactic(self, tactic: str) -> List[AttackPattern]: + def _get_techniques_from_tactic(self, tactic: str) -> list[AttackPattern]: """Get techniques and sub techniques from a Mitre tactic (kill_chain_phases->phase_name)""" techniques = self._remove_deprecated_objects( self._memory_store.query( @@ -124,12 +123,12 @@ def _get_parent_technique_from_subtechnique(self, technique: AttackPattern) -> A )[0] return parent_technique - def run(self) -> Dict[str, Dict[str, str]]: + def run(self) -> dict[str, dict[str, str]]: """Iterate over every technique over every tactic. If the technique is a sub technique, then we also search for the parent technique name. """ logging.info("Starting extraction...") - data: Dict[str, Dict[str, str]] = {} + data: dict[str, dict[str, str]] = {} for tactic in self._get_tactics(): data[tactic["name"]] = {} for technique in sorted( @@ -159,7 +158,7 @@ class MbcExtractor(MitreExtractor): url = "https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json" kill_chain_name = "mitre-mbc" - def _get_tactics(self) -> List[Dict]: + def _get_tactics(self) -> list[dict]: """Override _get_tactics to edit the tactic name for Micro-objective""" tactics = super()._get_tactics() # We don't want the Micro-objective string inside objective names diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 6c8876581..e0e8fabc3 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -59,7 +59,6 @@ import logging import argparse import collections -from typing import Dict import colorama @@ -99,7 +98,7 @@ def render_matches_by_function(doc: rd.ResultDocument): - connect to HTTP server """ assert isinstance(doc.meta.analysis, rd.StaticAnalysis) - functions_by_bb: Dict[Address, Address] = {} + functions_by_bb: dict[Address, Address] = {} for finfo in doc.meta.analysis.layout.functions: faddress = finfo.address diff --git a/scripts/show-features.py b/scripts/show-features.py index 6005a810c..bf358aa21 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -67,7 +67,6 @@ import sys import logging import argparse -from typing import Tuple import capa.main import capa.rules @@ -136,7 +135,7 @@ def print_static_analysis(extractor: StaticFeatureExtractor, args): for feature, addr in extractor.extract_file_features(): print(f"file: {format_address(addr)}: {feature}") - function_handles: Tuple[FunctionHandle, ...] + function_handles: tuple[FunctionHandle, ...] if isinstance(extractor, capa.features.extractors.pefile.PefileFeatureExtractor): # pefile extractor doesn't extract function features function_handles = () diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index be850e927..55fbdad04 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -9,10 +9,8 @@ See the License for the specific language governing permissions and limitations under the License. """ import sys -import typing import logging import argparse -from typing import Set, List, Tuple from collections import Counter from rich import print @@ -40,8 +38,8 @@ def format_address(addr: capa.features.address.Address) -> str: return v.format_address(capa.features.freeze.Address.from_capa((addr))) -def get_rules_feature_set(rules: capa.rules.RuleSet) -> Set[Feature]: - rules_feature_set: Set[Feature] = set() +def get_rules_feature_set(rules: capa.rules.RuleSet) -> set[Feature]: + rules_feature_set: set[Feature] = set() for _, rule in rules.rules.items(): rules_feature_set.update(rule.extract_all_features()) @@ -49,9 +47,9 @@ def get_rules_feature_set(rules: capa.rules.RuleSet) -> Set[Feature]: def get_file_features( - functions: Tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.StaticFeatureExtractor -) -> typing.Counter[Feature]: - feature_map: typing.Counter[Feature] = Counter() + functions: tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.StaticFeatureExtractor +) -> Counter[Feature]: + feature_map: Counter[Feature] = Counter() for f in functions: if extractor.is_library_function(f.address): @@ -86,8 +84,8 @@ def get_colored(s: str) -> Text: return Text(s, style="cyan") -def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]): - unused_features: List[Tuple[str, Text]] = [] +def print_unused_features(feature_map: Counter[Feature], rules_feature_set: set[Feature]): + unused_features: list[tuple[str, Text]] = [] for feature, count in reversed(feature_map.most_common()): if feature in rules_feature_set: continue @@ -130,11 +128,11 @@ def main(argv=None): assert isinstance(extractor, StaticFeatureExtractor), "only static analysis supported today" - feature_map: typing.Counter[Feature] = Counter() + feature_map: Counter[Feature] = Counter() feature_map.update([feature for feature, _ in extractor.extract_global_features()]) - function_handles: Tuple[FunctionHandle, ...] + function_handles: tuple[FunctionHandle, ...] if isinstance(extractor, capa.features.extractors.pefile.PefileFeatureExtractor): # pefile extractor doesn't extract function features function_handles = () @@ -173,7 +171,7 @@ def ida_main(): print(f"getting features for current function {hex(function)}") extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor() - feature_map: typing.Counter[Feature] = Counter() + feature_map: Counter[Feature] = Counter() feature_map.update([feature for feature, _ in extractor.extract_file_features()]) diff --git a/tests/fixtures.py b/tests/fixtures.py index 1912a456a..19285eca4 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -9,7 +9,6 @@ import binascii import contextlib import collections -from typing import Set, Dict from pathlib import Path from functools import lru_cache @@ -310,7 +309,7 @@ def extract_basic_block_features(extractor, fh, bbh): # f may not be hashable (e.g. ida func_t) so cannot @lru_cache this -def extract_instruction_features(extractor, fh, bbh, ih) -> Dict[Feature, Set[Address]]: +def extract_instruction_features(extractor, fh, bbh, ih) -> dict[Feature, set[Address]]: features = collections.defaultdict(set) for feature, addr in extractor.extract_insn_features(fh, bbh, ih): features[feature].add(addr) diff --git a/tests/test_binexport_accessors.py b/tests/test_binexport_accessors.py index bc9ea6db1..097af8b4f 100644 --- a/tests/test_binexport_accessors.py +++ b/tests/test_binexport_accessors.py @@ -8,7 +8,7 @@ import re import logging -from typing import Any, Dict +from typing import Any from pathlib import Path import pytest @@ -297,7 +297,7 @@ def test_get_operand_immediate_expression(addr, expressions): bl 0x100 add x0, sp, 0x10 """ -BE2_DICT: Dict[str, Any] = { +BE2_DICT: dict[str, Any] = { "expression": [ {"type": BinExport2.Expression.REGISTER, "symbol": "x0"}, {"type": BinExport2.Expression.IMMEDIATE_INT, "immediate": 0x20}, diff --git a/tests/test_freeze_dynamic.py b/tests/test_freeze_dynamic.py index b3087c092..ead4d50c2 100644 --- a/tests/test_freeze_dynamic.py +++ b/tests/test_freeze_dynamic.py @@ -6,7 +6,6 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import textwrap -from typing import List from pathlib import Path import fixtures @@ -85,7 +84,7 @@ ) -def addresses(s) -> List[Address]: +def addresses(s) -> list[Address]: return sorted(i.address for i in s) diff --git a/tests/test_freeze_static.py b/tests/test_freeze_static.py index 4674afc89..bd0c90b5d 100644 --- a/tests/test_freeze_static.py +++ b/tests/test_freeze_static.py @@ -6,7 +6,6 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import textwrap -from typing import List from pathlib import Path import pytest @@ -65,7 +64,7 @@ ) -def addresses(s) -> List[Address]: +def addresses(s) -> list[Address]: return sorted(i.address for i in s) diff --git a/web/rules/scripts/build_root.py b/web/rules/scripts/build_root.py index aefd8d882..fbb0947a7 100644 --- a/web/rules/scripts/build_root.py +++ b/web/rules/scripts/build_root.py @@ -11,7 +11,6 @@ import sys import random import logging -from typing import Dict, List from pathlib import Path import capa.rules @@ -49,7 +48,7 @@ def read_file_paths(txt_file_path: Path): - categorized_files: Dict[str, List[Path]] = { + categorized_files: dict[str, list[Path]] = { "modified in the last day": [], "modified in the last week": [], "modified in the last month": [], From 20ae098cdab1e2fe3d8913072a7caea216768af1 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Tue, 22 Oct 2024 07:43:05 +0000 Subject: [PATCH 18/21] update Python >= 3.10 and ubuntu --- .github/workflows/build.yml | 7 +++++-- .github/workflows/changelog.yml | 2 +- .github/workflows/publish.yml | 2 +- .github/workflows/tag.yml | 2 +- .github/workflows/tests.yml | 4 ++-- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f434ef0bc..0935182f9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,7 @@ jobs: - os: ubuntu-20.04 artifact_name: capa asset_name: linux-py312 - python_version: "3.12" + python_version: '3.12' - os: windows-2019 artifact_name: capa.exe asset_name: windows @@ -85,6 +85,9 @@ jobs: - os: ubuntu-22.04 artifact_name: capa asset_name: linux + - os: ubuntu-22.04 + artifact_name: capa + asset_name: linux-py312 - os: windows-2022 artifact_name: capa.exe asset_name: windows @@ -103,7 +106,7 @@ jobs: # upload zipped binaries to Release page if: github.event_name == 'release' name: zip and upload ${{ matrix.asset_name }} - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest needs: [build] strategy: matrix: diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml index 86e9026b5..8bf5d67cc 100644 --- a/.github/workflows/changelog.yml +++ b/.github/workflows/changelog.yml @@ -14,7 +14,7 @@ jobs: check_changelog: # no need to check for dependency updates via dependabot if: github.actor != 'dependabot[bot]' && github.actor != 'dependabot-preview[bot]' - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: NO_CHANGELOG: '[x] No CHANGELOG update needed' steps: diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 4a591d778..f54a3cb26 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: '3.8' + python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/tag.yml b/.github/workflows/tag.yml index 34eabbedc..69151150e 100644 --- a/.github/workflows/tag.yml +++ b/.github/workflows/tag.yml @@ -9,7 +9,7 @@ permissions: read-all jobs: tag: name: Tag capa rules - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Checkout capa-rules uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 92f3e35ca..bbb39ff7b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -131,7 +131,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.11"] + python-version: ["3.10", "3.11"] steps: - name: Checkout capa with submodules # do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118 @@ -168,7 +168,7 @@ jobs: ghidra-tests: name: Ghidra tests for ${{ matrix.python-version }} - runs-on: ubuntu-22.04 + runs-on: ubuntu-20.04 needs: [tests] strategy: fail-fast: false From 7cc3ddd4eafe396b6de6238b514c559f665982d4 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Tue, 22 Oct 2024 08:57:51 +0000 Subject: [PATCH 19/21] remove typing_extensions Annotated import --- capa/features/extractors/cape/models.py | 3 +-- capa/features/extractors/vmray/models.py | 3 +-- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/cape/models.py b/capa/features/extractors/cape/models.py index 7aa2c651f..20bedec24 100644 --- a/capa/features/extractors/cape/models.py +++ b/capa/features/extractors/cape/models.py @@ -6,10 +6,9 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import binascii -from typing import Any, Union, Literal, Optional, TypeAlias +from typing import Any, Union, Literal, Optional, Annotated, TypeAlias from pydantic import Field, BaseModel, ConfigDict -from typing_extensions import Annotated from pydantic.functional_validators import BeforeValidator diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index ef0513fb2..c2d6551aa 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -6,11 +6,10 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Union, Optional +from typing import Union, Optional, Annotated import xmltodict from pydantic import Field, BaseModel -from typing_extensions import Annotated from pydantic.functional_validators import BeforeValidator """ diff --git a/pyproject.toml b/pyproject.toml index 1cbc9b0e6..f8e9c3bc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -233,7 +233,7 @@ DEP002 = [ # dependencies imported but missing from definitions DEP003 = [ - "typing_extensions" # TODO(s-ff): remove when Python 3.9 is deprecated, see #1699 + "typing_extensions" # TODO(s-ff): remove when Python 3.10 is deprecated, see #1699 ] [tool.deptry.package_module_name_map] From 23709c9d6af15b6d9a4fccabb42f662c069a0f06 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 22 Oct 2024 13:10:23 +0000 Subject: [PATCH 20/21] Sync capa rules submodule --- rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules b/rules index 9917a93ee..125419b28 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit 9917a93ee145f03333992fc2de2db783648bba32 +Subproject commit 125419b2824e9cd6a1d9238e77ff5512f56288f9 From 35767e6c6a721259cd0ba6278b20afd9971b00e4 Mon Sep 17 00:00:00 2001 From: Capa Bot Date: Tue, 22 Oct 2024 15:29:01 +0000 Subject: [PATCH 21/21] Sync capa-testfiles submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index 2de79e9df..36bae7a84 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 2de79e9dfbfda0bb446b6a9c0902d2f7bf2e8e43 +Subproject commit 36bae7a843ab209029d58f509a46154fe592c548