diff --git a/README.md b/README.md index 8fd5a2a..10ea02b 100644 --- a/README.md +++ b/README.md @@ -30,10 +30,12 @@ bash build.sh Once the build is successful, go to the folder you want to perform your experiment, create a folder named `raw_code` and create every functions in separate C files. We followed the custom to file names `_.c`, wehre the `` is the Vulnerability identifier of the function (0 for benign, 1 for vulnerable). -First, you have to extract the slices from the parsed code. Modify the [data_processing/extract_slices.ipynb](data_processing/extract_slices.ipynb) for extracting slice. +1. You have to extract the slices from the parsed code. Modify the [data_processing/extract_slices.ipynb](data_processing/extract_slices.ipynb) for extracting slice. This will generate a file `_full_data_with_slices.json` in your data directory. -Finally, run [data_processing/create_ggnn_data.py](data_processing/create_ggnn_data.py) for formatting data into different formats. +2. Run [data_processing/create_ggnn_data.py](data_processing/create_ggnn_data.py) for formatting data into different formats. + +3. Update [data_processing/full_data_prep_script.ipynb](data_processing/full_data_prep_script.ipynb) to input to the GGNN. ### Running GGNN. diff --git a/data/get_data.sh b/data/get_data.sh index 542801c..81df592 100755 --- a/data/get_data.sh +++ b/data/get_data.sh @@ -3,8 +3,8 @@ FILE=replication.zip if [[ -f "$FILE" ]]; then echo "$FILE exists, skipping download" else - # https://drive.google.com/open?id=1PXfHY9OxO3NnbHqCUq3CJoTNm42G7sCZ - fileid="1PXfHY9OxO3NnbHqCUq3CJoTNm42G7sCZ" + # https://drive.google.com/file/d/1Mn0jLaZWiPFQ8ejzlz_zXnx_TcSzbwu1 + fileid="1Mn0jLaZWiPFQ8ejzlz_zXnx_TcSzbwu1" curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${FILE} rm ./cookie diff --git a/data_processing/Untitled.ipynb b/data_processing/Untitled.ipynb deleted file mode 100644 index b76c428..0000000 --- a/data_processing/Untitled.ipynb +++ /dev/null @@ -1,32 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/data_processing/data_for_all.ipynb b/data_processing/data_for_all.ipynb deleted file mode 100644 index a9221f3..0000000 --- a/data_processing/data_for_all.ipynb +++ /dev/null @@ -1,77 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import gensim\n", - "import os\n", - "import argparse\n", - "import subprocess\n", - "import numpy as np\n", - "from gensim.models import Word2Vec\n", - "import nltk\n", - "import csv\n", - "import json\n", - "\n", - "type_map = {\n", - " 'CallExpression': 1, 'InclusiveOrExpression': 2, 'ShiftExpression': 3, 'IdentifierDeclStatement': 4,\n", - " 'CompoundStatement': 5, 'IdentifierDecl': 6, 'Condition': 7, 'ArgumentList': 8, 'Sizeof': 9,\n", - " 'AdditiveExpression': 10, 'BitAndExpression': 11, 'CFGExitNode': 12, 'SizeofOperand': 13, 'AndExpression': 14,\n", - " 'Decl': 15, 'Label': 16, 'Argument': 17, 'Function': 18, 'CastExpression': 19, 'IdentifierDeclType': 20,\n", - " 'PtrMemberAccess': 21, 'PostIncDecOperationExpression': 22, 'Identifier': 23, 'GotoStatement': 24,\n", - " 'UnaryExpression': 25, 'DeclStmt': 26, 'ClassDefStatement': 27, 'FunctionDef': 28, 'File': 29,\n", - " 'ParameterType': 30, 'RelationalExpression': 31, 'SwitchStatement': 32, 'ExpressionStatement': 33,\n", - " 'CFGEntryNode': 34, 'MultiplicativeExpression': 35, 'Expression': 36, 'ConditionalExpression': 37,\n", - " 'MemberAccess': 38, 'ReturnType': 39, 'UnaryOperator': 40, 'BreakStatement': 41, 'CastTarget': 42,\n", - " 'ElseStatement': 43, 'SizeofExpression': 44, 'ClassDef': 45, 'DoStatement': 46, 'Symbol': 47,\n", - " 'ExclusiveOrExpression': 48, 'Callee': 49, 'ForStatement': 50, 'InitializerList': 51, 'WhileStatement': 52,\n", - " 'Statement': 53, 'ContinueStatement': 54, 'PrimaryExpression': 55, 'ParameterList': 56, 'EqualityExpression': 57,\n", - " 'Parameter': 58, 'InfiniteForNode': 59, 'IncDec': 60, 'ArrayIndexing': 61, 'CFGErrorNode': 62, 'IfStatement': 63,\n", - " 'ForInit': 64, 'UnaryOperationExpression': 65, 'AssignmentExpression': 66, 'ReturnStatement': 67,\n", - " 'OrExpression': 68\n", - "}\n", - "\n", - "type_one_hot = np.eye(len(type_map))\n", - "\n", - "edgeType = {\n", - " # 'IS_AST_PARENT': 1,\n", - " # 'IS_CLASS_OF': 2,\n", - " # 'DEF': 4, # Data Flow\n", - " # 'USE': 5, # Data Flow\n", - " # 'REACHES': 6, # Data Flow\n", - " 'FLOWS_TO': 3, # Control Flow\n", - " 'CONTROLS': 7, # Control Dependency edge\n", - " # 'DECLARES': 8,\n", - " # 'DOM': 9,\n", - " # 'POST_DOM': 10,\n", - " # 'IS_FUNCTION_OF_AST': 11,\n", - " # 'IS_FUNCTION_OF_CFG': 12\n", - "}\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/models/get_models.sh b/models/get_models.sh index 7ac6723..3a43baa 100755 --- a/models/get_models.sh +++ b/models/get_models.sh @@ -3,8 +3,8 @@ FILE=models.zip if [[ -f "$FILE" ]]; then echo "$FILE exists, skipping download" else - # https://drive.google.com/open?id=1plKRZ_tJZJQR7REr0QnTbBOw49Rh2ZNQ - fileid="1NT3xs7jBeKrHX4nMoLOATOxKy_zINp-V" + # https://drive.google.com/file/d/1gTgpgXGzSBlixNcUS-OaoXe8HxQXzaf0 + fileid="1gTgpgXGzSBlixNcUS-OaoXe8HxQXzaf0" curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${FILE} rm ./cookie