-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 3828bc8
Showing
269 changed files
with
351,754 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
# Created by https://www.toptal.com/developers/gitignore/api/linux,python | ||
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,python | ||
|
||
### Linux ### | ||
*~ | ||
|
||
# temporary files which can be created if a process still has a handle open of a deleted file | ||
.fuse_hidden* | ||
|
||
# KDE directory preferences | ||
.directory | ||
|
||
# Linux trash folder which might appear on any partition or disk | ||
.Trash-* | ||
|
||
# .nfs files are created when an open file is removed but is still being accessed | ||
.nfs* | ||
|
||
### Python ### | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
cover/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
.pybuilder/ | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
# For a library or package, you might want to ignore these files since the code is | ||
# intended to run in multiple environments; otherwise, check them in: | ||
# .python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# poetry | ||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | ||
# This is especially recommended for binary packages to ensure reproducibility, and is more | ||
# commonly ignored for libraries. | ||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | ||
#poetry.lock | ||
|
||
# pdm | ||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | ||
#pdm.lock | ||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | ||
# in version control. | ||
# https://pdm.fming.dev/#use-with-ide | ||
.pdm.toml | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# pytype static type analyzer | ||
.pytype/ | ||
|
||
# Cython debug symbols | ||
cython_debug/ | ||
|
||
# PyCharm | ||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can | ||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | ||
# and can be added to the global gitignore or merged into this file. For a more nuclear | ||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. | ||
#.idea/ | ||
|
||
### Python Patch ### | ||
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration | ||
poetry.toml | ||
|
||
# ruff | ||
.ruff_cache/ | ||
|
||
# LSP config files | ||
pyrightconfig.json | ||
|
||
# End of https://www.toptal.com/developers/gitignore/api/linux,python | ||
wandb/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
{ | ||
// Use IntelliSense to learn about possible attributes. | ||
// Hover to view descriptions of existing attributes. | ||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 | ||
"version": "0.2.0", | ||
"configurations": [ | ||
{ | ||
"name": "Python: Current File", | ||
"type": "python", | ||
"request": "launch", | ||
"program": "${file}", | ||
"console": "integratedTerminal", | ||
"justMyCode": true | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from attacks.DataExtraction.enron import EnronDataExtraction | ||
from models.togetherai import TogetherAIModels | ||
from attacks.Jailbreak.jailbreak import Jailbreak | ||
|
||
enron = EnronDataExtraction(data_path="data/enron") | ||
prompts, _ = enron.generate_prompts(format="prefix-50") | ||
# Replace api_key with your own API key | ||
llm = TogetherAIModels(model="togethercomputer/llama-2-7b-chat", api_key="") | ||
attack = Jailbreak() | ||
results = attack.execute_attack(prompts, llm) | ||
print("results:", results) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
[Documentation](https://llm-pbe.github.io/document) | ||
|
||
# Overview | ||
|
||
**LLM-PBE** is a toolkit to assess the data privacy of LLMs. The components of LLM-PBE are shown below. | ||
![Alt text](docs/images/components.png) | ||
|
||
# Getting Started | ||
You can refer to our primary documentation [here](https://llm-pbe.readthedocs.io/en/latest/index.html). | ||
|
||
|
||
## Env | ||
|
||
```shell | ||
conda create -n llm-pbe python=3.10 -y | ||
conda activate llm-pbe | ||
# If you encounter the issue of 'kernel image' when running torch on GPU, try to install a proper torch with cuda. | ||
pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116 | ||
pip install git+https://github.com/microsoft/analysing_pii_leakage.git | ||
pip install wandb accelerate | ||
pip install -r requirements.txt | ||
``` | ||
|
||
|
||
|
||
|
||
## Attack Demo | ||
You can find the attack demo below, which is also presented in `AttackDemo.py` | ||
```python | ||
from data import JailbreakQueries | ||
from models import TogetherAIModels | ||
from attacks import Jailbreak | ||
from metrics import JailbreakRate | ||
|
||
data = JailbreakQueries() | ||
llm = TogetherAIModels(model="togethercomputer/llama-2-7b-chat", api_key="xxx") | ||
attack = Jailbreak() | ||
results = attack.execute_attack(data, llm) | ||
rate = JailbreakRate(results).compute_metric() | ||
print("rate:", rate) | ||
``` | ||
|
||
# Evaluate DP model metrics | ||
dp_evaluation = metrics.Evaluate(attack_dp_metrics, ground_truths=dataset.labels) | ||
|
||
# Output results | ||
print(f"Attack metrics on regular model: {evaluation}") | ||
print(f"Attack metrics on DP model: {dp_evaluation}") | ||
`````` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
class AttackBase: | ||
def __init__(self, model, data, prompt, metric): | ||
""" | ||
Initialize the base class for attacks on language models. | ||
Parameters: | ||
- model (object): The language model object to be attacked. | ||
- data (list/dataset): The data that was used to train/fine-tune the model. | ||
- prompt (str): The prompt that was injected in the model. | ||
- metric (str/function): The metric used to evaluate the success of the attack. | ||
""" | ||
|
||
self.model = model # Language model to attack | ||
self.data = data # Data for performing the attack | ||
self.prompt = prompt # Prompt for the attack | ||
self.metric = metric # Metric to evaluate the attack | ||
|
||
def execute(self): | ||
""" | ||
Execute the attack. This method should be overridden by specific attack implementations. | ||
Returns: | ||
- dict: Metrics to evaluate the success or failure of the attack. | ||
""" | ||
|
||
raise NotImplementedError("This method should be overridden by subclass") | ||
|
||
def evaluate(self, results): | ||
""" | ||
Evaluate the attack based on the metric. | ||
Parameters: | ||
- results (list/dict): The raw results of the attack execution. | ||
Returns: | ||
- float/dict: Evaluation score or metrics based on the specified metric. | ||
""" | ||
|
||
# Implement metric evaluation logic here | ||
# This can be a custom function or a string-based switch case for known metrics | ||
pass |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from attacks.DataExtraction.enron import EnronDataExtraction | ||
from models.togetherai import TogetherAIModels | ||
from attacks.DataExtraction.prompt_extract import PromptExtraction | ||
|
||
|
||
enron = EnronDataExtraction(data_path="data/enron") | ||
|
||
for format in ['prefix-50','0-shot-known-domain-b','0-shot-unknown-domain-c', '3-shot-known-domain-c', '5-shot-unknown-domain-b']: | ||
prompts, _ = enron.generate_prompts(format=format) | ||
# Replace api_key with your own API key | ||
llm = TogetherAIModels(model="togethercomputer/llama-2-7b-chat", api_key="xxx") | ||
attack = PromptExtraction() | ||
results = attack.execute_attack(prompts, llm) | ||
print("results:", results) | ||
|
Oops, something went wrong.