diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..c8293f2 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,68 @@ +name: Linting + +on: [push, pull_request] + +jobs: + pylint: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install pylint + run: | + pip install --upgrade pip + pip install pylint==3.0.3 + pip install -r requirements.txt + + - name: Run pylint + run: | + pylint kronfluence + + isort: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install isort + run: | + pip install --upgrade pip + pip install isort==5.13.2 + + - name: Run isort + run: | + isort --profile black kronfluence + + black: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install black + run: | + pip install --upgrade pip + pip install black==24.1.1 + + - name: Run black + run: | + black --check kronfluence \ No newline at end of file diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml new file mode 100644 index 0000000..ec59baa --- /dev/null +++ b/.github/workflows/python-test.yml @@ -0,0 +1,35 @@ +name: Python Tests + +on: [push, pull_request] + +jobs: + pytest: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install Dependencies + run: | + pip install --upgrade pip + pip install -r requirements.txt + python setup.py install + + - name: Run All Tests + run: | + pip install -e ."[dev]" + pytest -vx tests/test_testable_tasks.py + pytest -vx tests/test_analyzer.py + pytest -vx tests/test_modules.py + pytest -vx tests/test_samplers.py + pytest -vx tests/test_per_sample_gradients.py + pytest -vx tests/factors/test_covariances.py + pytest -vx tests/factors/test_eigens.py + pytest -vx tests/scores/test_pairwise_scores.py + pytest -vx tests/scores/test_self_scores.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2691b61 --- /dev/null +++ b/.gitignore @@ -0,0 +1,166 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# Checkpoints and influence outputs +checkpoints/ +analyses/ +data/ +*.pth \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..44300ec --- /dev/null +++ b/README.md @@ -0,0 +1,52 @@ +# Kronfluence + +[![CI](https://github.com/mlcommons/algorithmic-efficiency/actions/workflows/CI.yml/badge.svg)](https://github.com/mlcommons/algorithmic-efficiency/actions/workflows/CI.yml) +[![Lint](https://github.com/mlcommons/algorithmic-efficiency/actions/workflows/linting.yml/badge.svg)](https://github.com/mlcommons/algorithmic-efficiency/actions/workflows/linting.yml) +[![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/mlcommons/algorithmic-efficiency/blob/main/LICENSE.md) +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) + +**kronfluence** is a PyTorch-based library designed to compute [influence functions](https://arxiv.org/abs/1703.04730) using [Kronecker-factored Approximate Curvature (KFAC)](https://arxiv.org/abs/1503.05671) or [Eigenvalue-corrected KFAC (EKFAC)](https://arxiv.org/abs/1806.03884). +For a detailed description of the methodology, see the [**paper**](https://arxiv.org/abs/2308.03296) *Studying Large Language Model Generalization with Influence Functions*. + +> [!NOTE] +> This library is an unofficial community implementation. + +> [!WARNING] +> This library is under active development and has not reached its first stable release. + +## Installation + +> [!IMPORTANT] +> **Requirements:** +> - Python: Version 3.9 or later +> - PyTorch: Version 2.1 or later + +To install the latest version of `kronfluence`, use the following `pip` command: + +```bash +pip install kronfluence +``` + +Alternatively, you can install the library directly from the source: + +```bash +git clone https://github.com/pomonam/kronfluence.git +cd kronfluence +pip install -e . +``` + +## Getting Started + +(Placeholder for getting started content) + +## Examples + +(Placeholder for examples) + +## Contributing + +Your contributions are welcome! See the [CONTRIBUTING](https://github.com/pomonam/kronfluence/blob/main/CONTRIBUTING.md) file to get started. For bug fixes, please submit a pull request without prior discussion. For proposing new features, examples, or extensions, kindly initiate a discussion through an issue before proceeding. + +## License + +This software is released under the Apache 2.0 License, as detailed in the [LICENSE](https://github.com/pomonam/kronfluence/blob/main/LICENSE) file.