Initial commit

QinbinLi · Mar 31, 2024 · 3828bc8 · 3828bc8
commit 3828bc8
Show file tree

Hide file tree

Showing 269 changed files with 351,754 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,192 @@
+# Created by https://www.toptal.com/developers/gitignore/api/linux,python
+# Edit at https://www.toptal.com/developers/gitignore?templates=linux,python
+
+### Linux ###
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+# End of https://www.toptal.com/developers/gitignore/api/linux,python
+wandb/
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": true
+        }
+    ]
+}
diff --git a/AttackDemo.py b/AttackDemo.py
@@ -0,0 +1,12 @@
+from attacks.DataExtraction.enron import EnronDataExtraction
+from models.togetherai import TogetherAIModels
+from attacks.Jailbreak.jailbreak import Jailbreak
+
+enron = EnronDataExtraction(data_path="data/enron")
+prompts, _ = enron.generate_prompts(format="prefix-50")
+# Replace api_key with your own API key
+llm = TogetherAIModels(model="togethercomputer/llama-2-7b-chat", api_key="")
+attack = Jailbreak()
+results = attack.execute_attack(prompts, llm)
+print("results:", results)
+
diff --git a/README.md b/README.md
@@ -0,0 +1,49 @@
+[Documentation](https://llm-pbe.github.io/document)
+
+# Overview
+
+**LLM-PBE** is a toolkit to assess the data privacy of LLMs. The components of LLM-PBE are shown below.
+![Alt text](docs/images/components.png)
+
+# Getting Started
+You can refer to our primary documentation [here](https://llm-pbe.readthedocs.io/en/latest/index.html).
+
+
+## Env
+
+```shell
+conda create -n llm-pbe python=3.10 -y
+conda activate llm-pbe
+# If you encounter the issue of 'kernel image' when running torch on GPU, try to install a proper torch with cuda.
+pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
+pip install git+https://github.com/microsoft/analysing_pii_leakage.git
+pip install wandb accelerate
+pip install -r requirements.txt
+```
+
+
+
+
+## Attack Demo
+You can find the attack demo below, which is also presented in `AttackDemo.py`
+```python
+from data import JailbreakQueries
+from models import TogetherAIModels
+from attacks import Jailbreak
+from metrics import JailbreakRate
+
+data = JailbreakQueries()
+llm = TogetherAIModels(model="togethercomputer/llama-2-7b-chat", api_key="xxx")
+attack = Jailbreak()
+results = attack.execute_attack(data, llm)
+rate = JailbreakRate(results).compute_metric()
+print("rate:", rate)
+```
+
+# Evaluate DP model metrics
+dp_evaluation = metrics.Evaluate(attack_dp_metrics, ground_truths=dataset.labels)
+
+# Output results
+print(f"Attack metrics on regular model: {evaluation}")
+print(f"Attack metrics on DP model: {dp_evaluation}")
+``````
diff --git a/attacks/AttackBase.py b/attacks/AttackBase.py
@@ -0,0 +1,41 @@
+class AttackBase:
+    def __init__(self, model, data, prompt, metric):
+        """
+        Initialize the base class for attacks on language models.
+        
+        Parameters:
+        - model (object): The language model object to be attacked.
+        - data (list/dataset): The data that was used to train/fine-tune the model.
+        - prompt (str): The prompt that was injected in the model.
+        - metric (str/function): The metric used to evaluate the success of the attack.
+        """
+
+        self.model = model  # Language model to attack
+        self.data = data  # Data for performing the attack
+        self.prompt = prompt  # Prompt for the attack
+        self.metric = metric  # Metric to evaluate the attack
+
+    def execute(self):
+        """
+        Execute the attack. This method should be overridden by specific attack implementations.
+        
+        Returns:
+        - dict: Metrics to evaluate the success or failure of the attack.
+        """
+
+        raise NotImplementedError("This method should be overridden by subclass")
+
+    def evaluate(self, results):
+        """
+        Evaluate the attack based on the metric.
+        
+        Parameters:
+        - results (list/dict): The raw results of the attack execution.
+
+        Returns:
+        - float/dict: Evaluation score or metrics based on the specified metric.
+        """
+
+        # Implement metric evaluation logic here
+        # This can be a custom function or a string-based switch case for known metrics
+        pass
diff --git a/attacks/DataExtraction/__init__.py b/attacks/DataExtraction/__init__.py
diff --git a/attacks/DataExtraction/demo.py b/attacks/DataExtraction/demo.py
@@ -0,0 +1,15 @@
+from attacks.DataExtraction.enron import EnronDataExtraction
+from models.togetherai import TogetherAIModels
+from attacks.DataExtraction.prompt_extract import PromptExtraction
+
+
+enron = EnronDataExtraction(data_path="data/enron")
+
+for format in ['prefix-50','0-shot-known-domain-b','0-shot-unknown-domain-c', '3-shot-known-domain-c', '5-shot-unknown-domain-b']:
+    prompts, _ = enron.generate_prompts(format=format)
+    # Replace api_key with your own API key
+    llm = TogetherAIModels(model="togethercomputer/llama-2-7b-chat", api_key="xxx")
+    attack = PromptExtraction()
+    results = attack.execute_attack(prompts, llm)
+    print("results:", results)
+