diff --git a/notebooks/main.ipynb b/notebooks/main.ipynb index b9db6ae..7c0ca89 100644 --- a/notebooks/main.ipynb +++ b/notebooks/main.ipynb @@ -1,1178 +1,1513 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "metadata": {} - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "# Imports\n", - "import tensorflow as tf\n", - "import tie.recommender\n", - "from tie.recommender import Recommender, FactorizationRecommender, BPRRecommender, ImplicitBPRRecommender, WalsRecommender, ImplicitWalsRecommender, TopItemsRecommender\n", - "from tie.matrix_builder import ReportTechniqueMatrixBuilder\n", - "from tie.engine import TechniqueInferenceEngine\n", - "from tie.constants import PredictionMethod\n", - "import random\n", - "import math\n", - "import importlib\n", - "import pandas as pd\n", - "import numpy as np\n", - "import sklearn.manifold\n", - "import matplotlib.pyplot as plt\n", - "import json\n", - "\n", - "tf.config.run_functions_eagerly(True)\n", - "\n", - "assert tf.executing_eagerly()\n", - "\n", - "importlib.reload(tie.recommender)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Num training interactions 33311.0\n", - "Num test interactions 9517.0\n", - "Num validation interactions 4758.0\n" - ] - } - ], - "source": [ - "validation_ratio = 0.1\n", - "test_ratio = 0.2\n", - "\n", - "# data locations\n", - "dataset_filepath = \"../data/combined_dataset_full_frequency.json\"\n", - "enterprise_attack_filepath = \"../data/stix/enterprise-attack.json\"\n", - "\n", - "# make data\n", - "data_builder = ReportTechniqueMatrixBuilder(\n", - " combined_dataset_filepath=dataset_filepath,\n", - " enterprise_attack_filepath=enterprise_attack_filepath,\n", - ")\n", - "training_data, test_data, validation_data = data_builder.build_train_test_validation(test_ratio, validation_ratio)\n", - "\n", - "print(\"Num training interactions\", training_data.to_numpy().sum())\n", - "print(\"Num test interactions\", test_data.to_numpy().sum())\n", - "print(\"Num validation interactions\", validation_data.to_numpy().sum())" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "oilrig_techniques = {\n", - " \"T1047\", \"T1059.005\", \"T1124\", \"T1082\",\n", - " \"T1497.001\", \"T1053.005\", \"T1027\", \"T1105\",\n", - " \"T1070.004\", \"T1059.003\", \"T1071.001\"\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "def test_multiple_embeding_dimensions(model_class: Recommender, method: PredictionMethod, out_file: str, **kwargs):\n", - " \"\"\"Runs model_class at multiple embedding dimensions and saves results.\n", - "\n", - " Runs each model for embedding dimensions 4, 8, 10, 16, 32, and 64.\n", - "\n", - " Args:\n", - " model_class: A model on which to train at multiple embedding dimensions.\n", - " out_file: filename for saving the results file. Requires len(out_file) > 0\n", - " and out_file is a valid csv filename.\n", - " kwargs: Parameters mapped to values.\n", - "\n", - " Mutates:\n", - " Saves model results, including embedding_dimension, hyperparameters,\n", - " and precision, recall, and ndcg at 10, 20, 50, and 100 to out_file.\n", - " \"\"\"\n", - " assert len(out_file) > 0\n", - "\n", - " results = []\n", - "\n", - " embedding_dimensions = (4,8,10,16,32,64)\n", - " # for every embedding\n", - " for embedding_dimension in embedding_dimensions:\n", - "\n", - " # make model\n", - " model = model_class(\n", - " m=training_data.m,\n", - " n=training_data.n,\n", - " k=embedding_dimension,\n", - " )\n", - "\n", - " # make tie\n", - " tie = TechniqueInferenceEngine(\n", - " training_data=training_data,\n", - " validation_data=validation_data,\n", - " test_data=test_data,\n", - " model=model,\n", - " prediction_method=method,\n", - " enterprise_attack_filepath=enterprise_attack_filepath,\n", - " )\n", - "\n", - " # fit hyperparameters\n", - " best_hyperparameters = tie.fit_with_validation(**kwargs)\n", - "\n", - " # calculate precision, recall, ndcg\n", - " run_stats = {\n", - " \"embedding_dimension\": embedding_dimension,\n", - " **best_hyperparameters\n", - " }\n", - " k_values = (10, 20, 50, 100)\n", - " for k in k_values:\n", - " run_stats[f\"precision_at_{k}\"] = tie.precision(k=k)\n", - " run_stats[f\"recall_at_{k}\"] = tie.recall(k=k)\n", - " run_stats[f\"ndcg_at_{k}\"] = tie.normalized_discounted_cumulative_gain(k=k)\n", - "\n", - " print(run_stats)\n", - " results.append(run_stats)\n", - "\n", - "\n", - " # save as csv\n", - " results_dataframe = pd.DataFrame(results)\n", - " results_dataframe.to_csv(out_file)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtest_multiple_embeding_dimensions\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mWalsRecommender\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPredictionMethod\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDOT\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mout_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwals_model_results_training_data_correction_dot.csv\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m25\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0.0001\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.001\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.005\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.05\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.7\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mregularization_coefficient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0.0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.00001\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.0001\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.001\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m)\u001b[49m\n", - "Cell \u001b[0;32mIn[12], line 42\u001b[0m, in \u001b[0;36mtest_multiple_embeding_dimensions\u001b[0;34m(model_class, method, out_file, **kwargs)\u001b[0m\n\u001b[1;32m 32\u001b[0m tie \u001b[38;5;241m=\u001b[39m TechniqueInferenceEngine(\n\u001b[1;32m 33\u001b[0m training_data\u001b[38;5;241m=\u001b[39mtraining_data,\n\u001b[1;32m 34\u001b[0m validation_data\u001b[38;5;241m=\u001b[39mvalidation_data,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 38\u001b[0m enterprise_attack_filepath\u001b[38;5;241m=\u001b[39menterprise_attack_filepath,\n\u001b[1;32m 39\u001b[0m )\n\u001b[1;32m 41\u001b[0m \u001b[38;5;66;03m# fit hyperparameters\u001b[39;00m\n\u001b[0;32m---> 42\u001b[0m best_hyperparameters \u001b[38;5;241m=\u001b[39m \u001b[43mtie\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_with_cross_validation\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# calculate precision, recall, ndcg\u001b[39;00m\n\u001b[1;32m 45\u001b[0m run_stats \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 46\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124membedding_dimension\u001b[39m\u001b[38;5;124m\"\u001b[39m: embedding_dimension,\n\u001b[1;32m 47\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mbest_hyperparameters\n\u001b[1;32m 48\u001b[0m }\n", - "File \u001b[0;32m~/Desktop/CTID/technique-inference-engine/src/tie/engine.py:188\u001b[0m, in \u001b[0;36mTechniqueInferenceEngine.fit_with_cross_validation\u001b[0;34m(self, method, **kwargs)\u001b[0m\n\u001b[1;32m 183\u001b[0m variable_values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtuple\u001b[39m(kwargs\u001b[38;5;241m.\u001b[39mget(key) \u001b[38;5;28;01mfor\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m variable_names)\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hyperparameters \u001b[38;5;129;01min\u001b[39;00m parameter_cartesian_product(\n\u001b[1;32m 186\u001b[0m variable_names, variable_values\n\u001b[1;32m 187\u001b[0m ):\n\u001b[0;32m--> 188\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhyperparameters\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 189\u001b[0m score \u001b[38;5;241m=\u001b[39m recall_at_k(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpredict(), \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validation_data\u001b[38;5;241m.\u001b[39mto_pandas(), k\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m)\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m score \u001b[38;5;241m>\u001b[39m best_score:\n", - "File \u001b[0;32m~/Desktop/CTID/technique-inference-engine/src/tie/engine.py:124\u001b[0m, in \u001b[0;36mTechniqueInferenceEngine.fit\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Fit the model to the data.\u001b[39;00m\n\u001b[1;32m 107\u001b[0m \n\u001b[1;32m 108\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;124;03m The MSE of the prediction matrix, as determined by the test set.\u001b[39;00m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;66;03m# train\u001b[39;00m\n\u001b[0;32m--> 124\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_training_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_sparse_tensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 126\u001b[0m mean_squared_error \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_model\u001b[38;5;241m.\u001b[39mevaluate(\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_test_data\u001b[38;5;241m.\u001b[39mto_sparse_tensor(), method\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prediction_method\n\u001b[1;32m 128\u001b[0m )\n\u001b[1;32m 130\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_checkrep()\n", - "File \u001b[0;32m~/Desktop/CTID/technique-inference-engine/src/tie/recommender/wals_recommender.py:216\u001b[0m, in \u001b[0;36mWalsRecommender.fit\u001b[0;34m(self, data, num_iterations, c, regularization_coefficient)\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_U \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_factor(\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V, P\u001b[38;5;241m.\u001b[39mT, alpha, regularization_coefficient\n\u001b[1;32m 213\u001b[0m )\n\u001b[1;32m 215\u001b[0m \u001b[38;5;66;03m# step 2: update V\u001b[39;00m\n\u001b[0;32m--> 216\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_update_factor\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_U\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mP\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43malpha\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregularization_coefficient\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_checkrep()\n", - "File \u001b[0;32m~/Desktop/CTID/technique-inference-engine/src/tie/recommender/wals_recommender.py:162\u001b[0m, in \u001b[0;36mWalsRecommender._update_factor\u001b[0;34m(self, opposing_factors, data, alpha, regularization_coefficient)\u001b[0m\n\u001b[1;32m 159\u001b[0m C_u \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mwhere(P_u \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m, alpha \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m C_u\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m==\u001b[39m (p,)\n\u001b[0;32m--> 162\u001b[0m confidence_scaled_v_transpose_v \u001b[38;5;241m=\u001b[39m \u001b[43mV_T_C_I_V\u001b[49m\u001b[43m(\u001b[49m\u001b[43mV\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mC_u\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;66;03m# X = (V^T CV + \\lambda I)^{-1} V^T CP\u001b[39;00m\n\u001b[1;32m 165\u001b[0m inv \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mlinalg\u001b[38;5;241m.\u001b[39minv(\n\u001b[1;32m 166\u001b[0m V_T_V\n\u001b[1;32m 167\u001b[0m \u001b[38;5;241m+\u001b[39m confidence_scaled_v_transpose_v\n\u001b[1;32m 168\u001b[0m \u001b[38;5;241m+\u001b[39m regularization_coefficient \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39midentity(k)\n\u001b[1;32m 169\u001b[0m )\n", - "File \u001b[0;32m~/Desktop/CTID/technique-inference-engine/src/tie/recommender/wals_recommender.py:140\u001b[0m, in \u001b[0;36mWalsRecommender._update_factor..V_T_C_I_V\u001b[0;34m(V, c_array)\u001b[0m\n\u001b[1;32m 137\u001b[0m square_addition \u001b[38;5;241m=\u001b[39m v_i \u001b[38;5;241m@\u001b[39m v_i\u001b[38;5;241m.\u001b[39mT\n\u001b[1;32m 138\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m square_addition\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m==\u001b[39m (k, k)\n\u001b[0;32m--> 140\u001b[0m product \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m square_addition\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m product\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "test_multiple_embeding_dimensions(\n", - " model_class=WalsRecommender,\n", - " method=PredictionMethod.DOT,\n", - " out_file=\"wals_model_results_training_data_correction_dot.csv\",\n", - " epochs=[25],\n", - " c=[0.0001, 0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 0.7],\n", - " regularization_coefficient=[0.0, 0.00001, 0.0001, 0.001, 0.01]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'embedding_dimension': 4, 'regularization': 0.0001, 'learning_rate': 0.0001, 'num_iterations': 76521640, 'precision_at_10': 0.01560204407537528, 'recall_at_10': 0.06050501513663635, 'ndcg_at_10': 0.07683413537968624, 'precision_at_20': 0.011992973490897476, 'recall_at_20': 0.09067910772439934, 'ndcg_at_20': 0.09634989967193944, 'precision_at_50': 0.0079112104758863, 'recall_at_50': 0.14733022592714648, 'ndcg_at_50': 0.12541038111253533, 'precision_at_100': 0.00527946343021399, 'recall_at_100': 0.1978219117723097, 'ndcg_at_100': 0.1456379374612586}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'embedding_dimension': 8, 'regularization': 0.001, 'learning_rate': 0.0001, 'num_iterations': 76521640, 'precision_at_10': 0.012791440434366017, 'recall_at_10': 0.048452527172814557, 'ndcg_at_10': 0.06212604210714955, 'precision_at_20': 0.010012775471095497, 'recall_at_20': 0.07689506441974149, 'ndcg_at_20': 0.07903656541040587, 'precision_at_50': 0.006547428936442032, 'recall_at_50': 0.12561008986844754, 'ndcg_at_50': 0.10273975134585815, 'precision_at_100': 0.004500159693388694, 'recall_at_100': 0.17281461717597588, 'ndcg_at_100': 0.12142186359861892}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'embedding_dimension': 10, 'regularization': 0.001, 'learning_rate': 0.0001, 'num_iterations': 76521640, 'precision_at_10': 0.011992973490897476, 'recall_at_10': 0.04644692238371199, 'ndcg_at_10': 0.05802910032592288, 'precision_at_20': 0.009374001916320665, 'recall_at_20': 0.0721340954731668, 'ndcg_at_20': 0.07385875110489287, 'precision_at_50': 0.006055573299265411, 'recall_at_50': 0.11468008558455581, 'ndcg_at_50': 0.09530259678856781, 'precision_at_100': 0.004227083998722453, 'recall_at_100': 0.16112090162064058, 'ndcg_at_100': 0.1135609038916972}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'embedding_dimension': 16, 'regularization': 0.0001, 'learning_rate': 0.0001, 'num_iterations': 76521640, 'precision_at_10': 0.010923027786649633, 'recall_at_10': 0.04140103153989063, 'ndcg_at_10': 0.05513784313726271, 'precision_at_20': 0.008312040881507506, 'recall_at_20': 0.0647656160113604, 'ndcg_at_20': 0.06839756124757733, 'precision_at_50': 0.005416799744490578, 'recall_at_50': 0.10646787811167806, 'ndcg_at_50': 0.08773154884648639, 'precision_at_100': 0.0039603960396039604, 'recall_at_100': 0.15611050618282554, 'ndcg_at_100': 0.10676608984315174}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n", - "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", - " data / np.expand_dims(num_items_per_user, axis=1)\n" - ] - } - ], - "source": [ - "test_multiple_embeding_dimensions(\n", - " BPRRecommender,\n", - " out_file=\"bpr_model_results.csv\",\n", - " epochs=[20*training_data.m*training_data.n],\n", - " learning_rate=[0.00001, 0.00005, 0.0001, 0.001],\n", - " regularization=[0., 0.0001, 0.001, 0.01],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean Squared Error 0.04852325970978819\n", - "Precision 0.02400990099009901\n", - "Recall 0.1905249816378097\n", - "Normalized Discounted Cumulative Gain 0.1841149999164702\n" - ] - } - ], - "source": [ - "embedding_dimension = 10\n", - "k = 20\n", - "best_hyperparameters = {'gravity_coefficient': 0.001, 'regularization_coefficient': 0.5, 'epochs': 1000, 'learning_rate': 100.0}\n", - "\n", - "model = TopItemsRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", - "\n", - "tie = TechniqueInferenceEngine(\n", - " training_data=training_data,\n", - " validation_data=validation_data,\n", - " test_data=test_data,\n", - " model=model,\n", - " prediction_method=PredictionMethod.DOT,\n", - " enterprise_attack_filepath=enterprise_attack_filepath,\n", - ")\n", - "mse = tie.fit()\n", - "print(\"Mean Squared Error\", mse)\n", - "precision = tie.precision(k=k)\n", - "print(\"Precision\", precision)\n", - "recall = tie.recall(k=k)\n", - "print(\"Recall\", recall)\n", - "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", - "print(\"Normalized Discounted Cumulative Gain\", ndcg)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " predictions training_data test_data \\\n", - "T1592.004 0.000000 0.0 0.0 \n", - "T1557.001 0.586885 0.0 0.0 \n", - "T1600 0.291803 0.0 0.0 \n", - "T1647 0.293443 0.0 0.0 \n", - "T1068 0.916393 0.0 0.0 \n", - "... ... ... ... \n", - "T1656 0.149180 0.0 0.0 \n", - "T1557.003 0.147541 0.0 0.0 \n", - "T1499.001 0.145902 0.0 0.0 \n", - "T1027.005 0.708197 0.0 0.0 \n", - "T1059.007 0.896721 0.0 0.0 \n", - "\n", - " technique_name \n", - "T1592.004 Client Configurations \n", - "T1557.001 LLMNR/NBT-NS Poisoning and SMB Relay \n", - "T1600 Weaken Encryption \n", - "T1647 Plist File Modification \n", - "T1068 Exploitation for Privilege Escalation \n", - "... ... \n", - "T1656 Impersonation \n", - "T1557.003 DHCP Spoofing \n", - "T1499.001 OS Exhaustion Flood \n", - "T1027.005 Indicator Removal from Tools \n", - "T1059.007 JavaScript \n", - "\n", - "[611 rows x 4 columns]\n" - ] - } - ], - "source": [ - "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", - "print(new_report_predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "metadata": {} - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean Squared Error 8.200210708280245\n", - "Precision 0.0014993585631815267\n", - "Recall 0.018532725776008053\n", - "Normalized Discounted Cumulative Gain 0.00973503731752359\n" - ] - } - ], - "source": [ - "embedding_dimension = 10\n", - "k = 20\n", - "best_hyperparameters = {'gravity_coefficient': 0.001, 'regularization_coefficient': 0.001, 'epochs': 10, 'learning_rate': 1.0}\n", - "\n", - "model = FactorizationRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", - "\n", - "tie = TechniqueInferenceEngine(\n", - " training_data=training_data,\n", - " validation_data=validation_data,\n", - " test_data=test_data,\n", - " model=model,\n", - " prediction_method=PredictionMethod.DOT,\n", - " enterprise_attack_filepath=enterprise_attack_filepath,\n", - ")\n", - "mse = tie.fit(**best_hyperparameters)\n", - "# mse = tie.fit_with_validation(\n", - "# learning_rate=[0.001, 0.01, 0.1, 1.0, 10., 20., 50., 100.],\n", - "# epochs=[1000],\n", - "# regularization_coefficient=[0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5],\n", - "# gravity_coefficient=[0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5],\n", - "# )\n", - "print(\"Mean Squared Error\", mse)\n", - "precision = tie.precision(k=k)\n", - "print(\"Precision\", precision)\n", - "recall = tie.recall(k=k)\n", - "print(\"Recall\", recall)\n", - "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", - "print(\"Normalized Discounted Cumulative Gain\", ndcg)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " predictions training_data test_data \\\n", - "T1204.002 -44127.687500 0.0 0.0 \n", - "T1592.001 -18382.687500 0.0 0.0 \n", - "T1547.012 43962.042969 0.0 0.0 \n", - "T1561.002 46403.843750 0.0 0.0 \n", - "T1110.004 67053.593750 0.0 0.0 \n", - "... ... ... ... \n", - "T1612 34722.996094 0.0 0.0 \n", - "T1588.006 -39228.710938 0.0 0.0 \n", - "T1003 -42823.429688 0.0 0.0 \n", - "T1069.002 -36731.289062 0.0 0.0 \n", - "T1070.005 12406.809570 0.0 0.0 \n", - "\n", - " technique_name \n", - "T1204.002 Malicious File \n", - "T1592.001 Hardware \n", - "T1547.012 Print Processors \n", - "T1561.002 Disk Structure Wipe \n", - "T1110.004 Credential Stuffing \n", - "... ... \n", - "T1612 Build Image on Host \n", - "T1588.006 Vulnerabilities \n", - "T1003 OS Credential Dumping \n", - "T1069.002 Domain Groups \n", - "T1070.005 Network Share Connection Removal \n", - "\n", - "[611 rows x 4 columns]\n" - ] - } - ], - "source": [ - "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", - "print(new_report_predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "metadata": {} - }, - "outputs": [ - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[20], line 22\u001b[0m\n\u001b[1;32m 9\u001b[0m tie \u001b[38;5;241m=\u001b[39m TechniqueInferenceEngine(\n\u001b[1;32m 10\u001b[0m training_data\u001b[38;5;241m=\u001b[39mtraining_data,\n\u001b[1;32m 11\u001b[0m validation_data\u001b[38;5;241m=\u001b[39mvalidation_data,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m enterprise_attack_filepath\u001b[38;5;241m=\u001b[39menterprise_attack_filepath,\n\u001b[1;32m 16\u001b[0m )\n\u001b[1;32m 17\u001b[0m \u001b[38;5;66;03m# mse = tie.fit_with_validation(\u001b[39;00m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;66;03m# learning_rate=[0.001, 0.005, 0.01, 0.02, 0.05],\u001b[39;00m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;66;03m# num_iterations=[500 * 512],\u001b[39;00m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# regularization_coefficient=[0, 0.0001, 0.001, 0.01],\u001b[39;00m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[0;32m---> 22\u001b[0m mse \u001b[38;5;241m=\u001b[39m \u001b[43mtie\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mbest_hyperparameters\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMean Squared Error\u001b[39m\u001b[38;5;124m\"\u001b[39m, mse)\n\u001b[1;32m 24\u001b[0m precision \u001b[38;5;241m=\u001b[39m tie\u001b[38;5;241m.\u001b[39mprecision(k\u001b[38;5;241m=\u001b[39mk)\n", - "File \u001b[0;32m~/code/technique-inference-engine/models/tie.py:122\u001b[0m, in \u001b[0;36mTechniqueInferenceEngine.fit\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Fit the model to the data.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;124;03m The MSE of the prediction matrix, as determined by the test set.\u001b[39;00m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# train\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_training_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_sparse_tensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 124\u001b[0m mean_squared_error \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_model\u001b[38;5;241m.\u001b[39mevaluate(\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_test_data\u001b[38;5;241m.\u001b[39mto_sparse_tensor(), method\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prediction_method\n\u001b[1;32m 126\u001b[0m )\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_checkrep()\n", - "File \u001b[0;32m~/code/technique-inference-engine/models/recommender/bpr_recommender.py:244\u001b[0m, in \u001b[0;36mBPRRecommender.fit\u001b[0;34m(self, data, learning_rate, epochs, regularization_coefficient)\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_U[u, :] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m learning_rate \u001b[38;5;241m*\u001b[39m (\n\u001b[1;32m 239\u001b[0m sigmoid_derivative \u001b[38;5;241m*\u001b[39m d_w \u001b[38;5;241m-\u001b[39m (regularization_coefficient \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_U[u, :])\n\u001b[1;32m 240\u001b[0m )\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V[i, :] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m learning_rate \u001b[38;5;241m*\u001b[39m (\n\u001b[1;32m 242\u001b[0m sigmoid_derivative \u001b[38;5;241m*\u001b[39m d_hi \u001b[38;5;241m-\u001b[39m (regularization_coefficient \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V[i, :])\n\u001b[1;32m 243\u001b[0m )\n\u001b[0;32m--> 244\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V[j, :] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m learning_rate \u001b[38;5;241m*\u001b[39m (\n\u001b[1;32m 245\u001b[0m sigmoid_derivative \u001b[38;5;241m*\u001b[39m d_hj \u001b[38;5;241m-\u001b[39m (regularization_coefficient \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V[j, :])\n\u001b[1;32m 246\u001b[0m )\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "# hyperparameters\n", - "embedding_dimension = 4\n", - "k = 20\n", - "best_hyperparameters = {'regularization_coefficient': 0.0001, 'epochs': 2, 'learning_rate': 0.0001}\n", - "# best_hyperparameters[\"epochs\"] = 20*training_data.m*training_data.n\n", - "\n", - "model = BPRRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", - "\n", - "tie = TechniqueInferenceEngine(\n", - " training_data=training_data,\n", - " validation_data=validation_data,\n", - " test_data=test_data,\n", - " model=model,\n", - " prediction_method=PredictionMethod.COSINE,\n", - " enterprise_attack_filepath=enterprise_attack_filepath,\n", - ")\n", - "# mse = tie.fit_with_validation(\n", - "# learning_rate=[0.001, 0.005, 0.01, 0.02, 0.05],\n", - "# epochs=[500 * 512],\n", - "# regularization_coefficient=[0, 0.0001, 0.001, 0.01],\n", - "# )\n", - "mse = tie.fit(**best_hyperparameters)\n", - "print(\"Mean Squared Error\", mse)\n", - "precision = tie.precision(k=k)\n", - "print(\"Precision\", precision)\n", - "recall = tie.recall(k=20)\n", - "print(\"Recall\", recall)\n", - "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", - "print(\"Normalized Discounted Cumulative Gain\", ndcg)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " predictions training_data test_data \\\n", - "T1558.002 0.555484 0.0 0.0 \n", - "T1132.002 0.494036 0.0 0.0 \n", - "T1211 -0.492932 0.0 0.0 \n", - "T1601.002 -0.185998 0.0 0.0 \n", - "T1596 -0.025210 0.0 0.0 \n", - "... ... ... ... \n", - "T1546.011 -0.217676 0.0 0.0 \n", - "T1535 0.464719 0.0 0.0 \n", - "T1071 0.199836 0.0 0.0 \n", - "T1587 0.658772 0.0 0.0 \n", - "T1499.002 0.464182 0.0 0.0 \n", - "\n", - " technique_name \n", - "T1558.002 Silver Ticket \n", - "T1132.002 Non-Standard Encoding \n", - "T1211 Exploitation for Defense Evasion \n", - "T1601.002 Downgrade System Image \n", - "T1596 Search Open Technical Databases \n", - "... ... \n", - "T1546.011 Application Shimming \n", - "T1535 Unused/Unsupported Cloud Regions \n", - "T1071 Application Layer Protocol \n", - "T1587 Develop Capabilities \n", - "T1499.002 Service Exhaustion Flood \n", - "\n", - "[611 rows x 4 columns]\n" - ] - } - ], - "source": [ - "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", - "print(new_report_predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 20/20 [00:00<00:00, 261.94it/s, train_auc=51.95%, skipped=9.34%]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean Squared Error 0.8416396221072098\n", - "Precision 0.008555163566388711\n", - "Recall 0.1188407602595878\n", - "Normalized Discounted Cumulative Gain 0.05200668172568438\n" - ] - } - ], - "source": [ - "# hyperparameters\n", - "embedding_dimension = 10\n", - "k = 20\n", - "best_hyperparameters = {'regularization_coefficient': 0.0001, \"epochs\": 20, 'learning_rate': 0.005}\n", - "\n", - "model = ImplicitBPRRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", - "\n", - "tie = TechniqueInferenceEngine(\n", - " training_data=training_data,\n", - " validation_data=validation_data,\n", - " test_data=test_data,\n", - " model=model,\n", - " prediction_method=PredictionMethod.COSINE,\n", - " enterprise_attack_filepath=enterprise_attack_filepath,\n", - ")\n", - "# mse = tie.fit_with_validation(\n", - "# learning_rate=[0.001, 0.005, 0.01, 0.02, 0.05],\n", - "# epochs=[math.floor(500 * 512 / training_data.to_numpy().sum())],\n", - "# regularization=[0, 0.0001, 0.001, 0.01],\n", - "# )\n", - "mse = tie.fit(**best_hyperparameters)\n", - "print(\"Mean Squared Error\", mse)\n", - "precision = tie.precision(k=k)\n", - "print(\"Precision\", precision)\n", - "recall = tie.recall(k=k)\n", - "print(\"Recall\", recall)\n", - "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", - "print(\"Normalized Discounted Cumulative Gain\", ndcg)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 20/20 [00:00<00:00, 51.99it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean Squared Error 0.5028757316475128\n", - "Precision 0.008659397049390635\n", - "Recall 0.11683780275644119\n", - "Normalized Discounted Cumulative Gain 0.06099917717388372\n" - ] - } - ], - "source": [ - "# hyperparameters\n", - "embedding_dimension = 10\n", - "k = 20\n", - "\n", - "best_hyperparameters = {'regularization_coefficient': 0.05, 'c': 0.5, 'epochs': 20}\n", - "\n", - "model = ImplicitWalsRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", - "\n", - "tie = TechniqueInferenceEngine(\n", - " training_data=training_data,\n", - " validation_data=validation_data,\n", - " test_data=test_data,\n", - " model=model,\n", - " prediction_method=PredictionMethod.COSINE,\n", - " enterprise_attack_filepath=enterprise_attack_filepath,\n", - ")\n", - "mse = tie.fit(**best_hyperparameters)\n", - "# mse = tie.fit_with_validation(\n", - "# epochs=[20],\n", - "# c=[0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 0.7],\n", - "# regularization_coefficient=[0.001, 0.005, 0.01, 0.02, 0.05]\n", - "# )\n", - "print(\"Mean Squared Error\", mse)\n", - "precision = tie.precision(k=k)\n", - "print(\"Precision\", precision)\n", - "recall = tie.recall(k=k)\n", - "print(\"Recall\", recall)\n", - "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", - "print(\"Normalized Discounted Cumulative Gain\", ndcg)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " predictions training_data test_data \\\n", - "T1204.002 0.190945 0.0 0.0 \n", - "T1592.001 -0.088588 0.0 0.0 \n", - "T1547.012 0.595495 0.0 0.0 \n", - "T1561.002 0.591824 0.0 0.0 \n", - "T1110.004 -0.004705 0.0 0.0 \n", - "... ... ... ... \n", - "T1612 0.140598 0.0 0.0 \n", - "T1588.006 0.665867 0.0 0.0 \n", - "T1003 -0.068861 0.0 0.0 \n", - "T1069.002 0.364830 0.0 0.0 \n", - "T1070.005 0.198465 0.0 0.0 \n", - "\n", - " technique_name \n", - "T1204.002 Malicious File \n", - "T1592.001 Hardware \n", - "T1547.012 Print Processors \n", - "T1561.002 Disk Structure Wipe \n", - "T1110.004 Credential Stuffing \n", - "... ... \n", - "T1612 Build Image on Host \n", - "T1588.006 Vulnerabilities \n", - "T1003 OS Credential Dumping \n", - "T1069.002 Domain Groups \n", - "T1070.005 Network Share Connection Removal \n", - "\n", - "[611 rows x 4 columns]\n" - ] - } - ], - "source": [ - "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", - "print(new_report_predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean Squared Error 0.22009765631062408\n", - "Precision 0.0081783194355356\n", - "Recall 0.11611657887235578\n", - "Normalized Discounted Cumulative Gain 0.05730162211191529\n" - ] - } - ], - "source": [ - "# hyperparameters\n", - "embedding_dimension = 4\n", - "k = 20\n", - "\n", - "# best_hyperparameters = {'regularization_coefficient': 0.1, 'c': 0.5, 'epochs': 20}\n", - "# best_hyperparameters = {'regularization_coefficient': 0.0001, 'c': 0.3, 'epochs': 100}\n", - "best_hyperparameters = {'regularization_coefficient': 0.001, 'c': 0.1, \"epochs\": 20}\n", - "model = WalsRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", - "\n", - "tie = TechniqueInferenceEngine(\n", - " training_data=training_data,\n", - " validation_data=validation_data,\n", - " test_data=test_data,\n", - " model=model,\n", - " prediction_method=PredictionMethod.COSINE,\n", - " enterprise_attack_filepath=enterprise_attack_filepath,\n", - ")\n", - "mse = tie.fit(**best_hyperparameters)\n", - "# mse = tie.fit_with_validation(\n", - "# epochs=[20],\n", - "# c=[0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 0.7],\n", - "# regularization_coefficient=[0.001, 0.005, 0.01, 0.02, 0.05]\n", - "# )\n", - "print(\"Mean Squared Error\", mse)\n", - "precision = tie.precision(k=k)\n", - "print(\"Precision\", precision)\n", - "recall = tie.recall(k=k)\n", - "print(\"Recall\", recall)\n", - "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", - "print(\"Normalized Discounted Cumulative Gain\", ndcg)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " predictions training_data test_data \\\n", - "T1204.002 0.746440 0.0 0.0 \n", - "T1592.001 -0.030477 0.0 0.0 \n", - "T1547.012 0.976308 0.0 0.0 \n", - "T1561.002 0.676964 0.0 0.0 \n", - "T1110.004 0.389903 0.0 0.0 \n", - "... ... ... ... \n", - "T1612 0.093252 0.0 0.0 \n", - "T1588.006 0.731710 0.0 0.0 \n", - "T1003 -0.059756 0.0 0.0 \n", - "T1069.002 0.502788 0.0 0.0 \n", - "T1070.005 0.731683 0.0 0.0 \n", - "\n", - " technique_name \n", - "T1204.002 Malicious File \n", - "T1592.001 Hardware \n", - "T1547.012 Print Processors \n", - "T1561.002 Disk Structure Wipe \n", - "T1110.004 Credential Stuffing \n", - "... ... \n", - "T1612 Build Image on Host \n", - "T1588.006 Vulnerabilities \n", - "T1003 OS Credential Dumping \n", - "T1069.002 Domain Groups \n", - "T1070.005 Network Share Connection Removal \n", - "\n", - "[611 rows x 4 columns]\n" - ] - } - ], - "source": [ - "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", - "print(new_report_predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 0.00000000e+00 3.10272485e-01 -5.06079257e-01 ... -2.96728946e-02\n", - " 1.03696346e-01 -4.59698914e-03]\n", - " [ 1.00000000e+00 2.34661356e-01 -3.45782340e-01 ... -8.81108269e-02\n", - " 7.32592419e-02 2.15996355e-01]\n", - " [ 2.00000000e+00 4.81936446e-08 6.88791080e-10 ... 3.42836657e-08\n", - " -7.81015075e-09 -4.81467985e-08]\n", - " ...\n", - " [ 6.25900000e+03 3.53572398e-01 -4.88738894e-01 ... 2.14364976e-02\n", - " 1.39371127e-01 1.01979606e-01]\n", - " [ 6.26000000e+03 -5.61978075e-10 -3.23640350e-08 ... -7.43976116e-08\n", - " 8.64229861e-08 8.77083117e-09]\n", - " [ 6.26100000e+03 -6.00948269e-08 -1.48262300e-08 ... 3.57593208e-08\n", - " -9.07301079e-09 2.34565452e-08]]\n", - "(6262, 11)\n", - "(611, 11)\n" - ] - } - ], - "source": [ - "# TEMPORARY - GET EMBEDDINGS FOR FE\n", - "U = tie.get_U() # entity (report) ids\n", - "V = tie.get_V() # item (technique) embeddings\n", - "\n", - "U_with_index = np.hstack((np.expand_dims(training_data.report_ids, axis=1), U))\n", - "V_with_index = np.hstack((np.expand_dims(training_data.technique_ids, axis=1), V))\n", - "\n", - "print(U_with_index.shape)\n", - "print(V_with_index.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "recalls [0.3087573406401257, 0.3257859760812914, 0.3458226653670225, 0.3745375233589626, 0.3949941633991787, 0.4110964558541681, 0.4219309973671012, 0.4317147374599419, 0.44042389049516534, 0.44770298702149547, 0.45387773687027355, 0.45797033624322875, 0.46304119765720547, 0.4679221448741617, 0.47182791494477766, 0.47617333685846847, 0.4791363202775644, 0.48264413250862803, 0.48577394889957126, 0.4883141430997783, 0.4904385260449852]\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "k_values = [1, 5] + list(range(10, 200, 10))\n", - "recalls = []\n", - "ndcgs = []\n", - "for k in k_values:\n", - " # print(\"Mean Squared Error\", mse)\n", - " precision = tie.precision(k=k)\n", - " # print(\"Precision\", precision)\n", - " recall = tie.recall(k=k)\n", - " recalls.append(recall)\n", - " # print(\"Recall\", recall)\n", - " ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", - " ndcgs.append(ndcg)\n", - " # print(\"Normalized Discounted Cumulative Gain\", ndcg)\n", - "\n", - "print(\"recalls\", recalls)\n", - "\n", - "plt.xlabel(\"k\")\n", - "plt.ylabel(\"Normalized discounted cumulative gain (NDCG)\")\n", - "plt.title(\"NDCG@k for various values of k\")\n", - "plt.plot(k_values, ndcgs)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "FactorizationRecommender.predict_new_entity() got an unexpected keyword argument 'c'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[6], line 6\u001b[0m\n\u001b[1;32m 1\u001b[0m oilrig_techniques \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1047\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1059.005\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1124\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1082\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1497.001\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1053.005\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1027\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1105\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1070.004\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1059.003\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1071.001\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5\u001b[0m }\n\u001b[0;32m----> 6\u001b[0m new_report_predictions \u001b[38;5;241m=\u001b[39m \u001b[43mtie\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict_for_new_report\u001b[49m\u001b[43m(\u001b[49m\u001b[43moilrig_techniques\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregularization_coefficient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.05\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlearning_rate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(new_report_predictions\u001b[38;5;241m.\u001b[39msort_values(by\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredictions\u001b[39m\u001b[38;5;124m\"\u001b[39m, ascending\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m10\u001b[39m))\n", - "File \u001b[0;32m~/code/technique-inference-engine/models/tie.py:338\u001b[0m, in \u001b[0;36mTechniqueInferenceEngine.predict_for_new_report\u001b[0;34m(self, techniques, **kwargs)\u001b[0m\n\u001b[1;32m 332\u001b[0m n \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_training_data\u001b[38;5;241m.\u001b[39mn\n\u001b[1;32m 334\u001b[0m technique_tensor \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mSparseTensor(\n\u001b[1;32m 335\u001b[0m indices\u001b[38;5;241m=\u001b[39mtechnique_indices_2d, values\u001b[38;5;241m=\u001b[39mvalues, dense_shape\u001b[38;5;241m=\u001b[39m(n,)\n\u001b[1;32m 336\u001b[0m )\n\u001b[0;32m--> 338\u001b[0m predictions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict_new_entity\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtechnique_tensor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_prediction_method\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 340\u001b[0m training_indices_dense \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mzeros(\u001b[38;5;28mlen\u001b[39m(predictions))\n\u001b[1;32m 341\u001b[0m training_indices_dense[technique_indices] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n", - "\u001b[0;31mTypeError\u001b[0m: FactorizationRecommender.predict_new_entity() got an unexpected keyword argument 'c'" - ] - } - ], - "source": [ - "oilrig_techniques = {\n", - " \"T1047\", \"T1059.005\", \"T1124\", \"T1082\",\n", - " \"T1497.001\", \"T1053.005\", \"T1027\", \"T1105\",\n", - " \"T1070.004\", \"T1059.003\", \"T1071.001\"\n", - "}\n", - "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, c=0.5, regularization_coefficient=0.05, learning_rate=0.01, epochs=100)\n", - "\n", - "print(new_report_predictions.sort_values(by=\"predictions\", ascending=False).head(10))" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[t-SNE] Computing 46 nearest neighbors...\n", - "[t-SNE] Indexed 6262 samples in 0.003s...\n", - "[t-SNE] Computed neighbors for 6262 samples in 0.226s...\n", - "[t-SNE] Computed conditional probabilities for sample 1000 / 6262\n", - "[t-SNE] Computed conditional probabilities for sample 2000 / 6262\n", - "[t-SNE] Computed conditional probabilities for sample 3000 / 6262\n", - "[t-SNE] Computed conditional probabilities for sample 4000 / 6262\n", - "[t-SNE] Computed conditional probabilities for sample 5000 / 6262\n", - "[t-SNE] Computed conditional probabilities for sample 6000 / 6262\n", - "[t-SNE] Computed conditional probabilities for sample 6262 / 6262\n", - "[t-SNE] Mean sigma: 0.000000\n", - "[t-SNE] KL divergence after 250 iterations with early exaggeration: 95.187531\n", - "[t-SNE] KL divergence after 10000 iterations: 0.847868\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def make_tsne_embeddings(embeddings: np.ndarray) -> tuple[np.array, np.array]:\n", - " \"\"\"Create 2D representation of embeddings using t-SNE.\n", - "\n", - " Args:\n", - " embeddings: an mxk array of m embeddings in k-dimensional space.\n", - "\n", - " Returns:\n", - " A tuple of the form (x_1, x_2) where x_1 and x_2 are length m\n", - " such that (x_1[i], x_2[i]) is the 2-dimensional point cotnaining the 2-dimensional\n", - " repsresentation for embeddings[i, :].\n", - " \"\"\"\n", - " tsne = sklearn.manifold.TSNE(\n", - " n_components=2,\n", - " perplexity=15,\n", - " learning_rate=\"auto\",\n", - " # metric='cosine',\n", - " # early_exaggeration=10.0,\n", - " init='pca',\n", - " verbose=True,\n", - " n_iter=10000,\n", - " )\n", - "\n", - " V_proj = tsne.fit_transform(embeddings)\n", - " x = V_proj[:, 0]\n", - " y = V_proj[:, 1]\n", - "\n", - " return x, y\n", - "\n", - "U = tie.get_U()\n", - "x_1, x_2 = make_tsne_embeddings(U)\n", - "\n", - "plt.scatter(x_1, x_2, s=0.5)\n", - "plt.show()" - ] - } + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "metadata": {} + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-18 11:00:01.252369: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-07-18 11:00:01.252701: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-07-18 11:00:01.255211: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-07-18 11:00:01.287622: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-07-18 11:00:01.867428: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "# Imports\n", + "import tensorflow as tf\n", + "import tie.recommender\n", + "from tie.recommender import Recommender, FactorizationRecommender, BPRRecommender, ImplicitBPRRecommender, WalsRecommender, ImplicitWalsRecommender, TopItemsRecommender\n", + "from tie.matrix_builder import ReportTechniqueMatrixBuilder\n", + "from tie.engine import TechniqueInferenceEngine\n", + "from tie.constants import PredictionMethod\n", + "import random\n", + "import math\n", + "import importlib\n", + "import pandas as pd\n", + "import numpy as np\n", + "import sklearn.manifold\n", + "import matplotlib.pyplot as plt\n", + "import json\n", + "\n", + "tf.config.run_functions_eagerly(True)\n", + "\n", + "assert tf.executing_eagerly()\n", + "\n", + "importlib.reload(tie.recommender)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Num training interactions 33311.0\n", + "Num test interactions 9517.0\n", + "Num validation interactions 4758.0\n" + ] + } + ], + "source": [ + "validation_ratio = 0.1\n", + "test_ratio = 0.2\n", + "\n", + "# data locations\n", + "dataset_filepath = \"../data/combined_dataset_full_frequency.json\"\n", + "enterprise_attack_filepath = \"../data/stix/enterprise-attack.json\"\n", + "\n", + "# make data\n", + "data_builder = ReportTechniqueMatrixBuilder(\n", + " combined_dataset_filepath=dataset_filepath,\n", + " enterprise_attack_filepath=enterprise_attack_filepath,\n", + ")\n", + "training_data, test_data, validation_data = data_builder.build_train_test_validation(test_ratio, validation_ratio)\n", + "\n", + "print(\"Num training interactions\", training_data.to_numpy().sum())\n", + "print(\"Num test interactions\", test_data.to_numpy().sum())\n", + "print(\"Num validation interactions\", validation_data.to_numpy().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "oilrig_techniques = {\n", + " \"T1047\", \"T1059.005\", \"T1124\", \"T1082\",\n", + " \"T1497.001\", \"T1053.005\", \"T1027\", \"T1105\",\n", + " \"T1070.004\", \"T1059.003\", \"T1071.001\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def test_multiple_embeding_dimensions(model_class: Recommender, method: PredictionMethod, out_file: str, **kwargs):\n", + " \"\"\"Runs model_class at multiple embedding dimensions and saves results.\n", + "\n", + " Runs each model for embedding dimensions 4, 8, 10, 16, 32, and 64.\n", + "\n", + " Args:\n", + " model_class: A model on which to train at multiple embedding dimensions.\n", + " out_file: filename for saving the results file. Requires len(out_file) > 0\n", + " and out_file is a valid csv filename.\n", + " kwargs: Parameters mapped to values.\n", + "\n", + " Mutates:\n", + " Saves model results, including embedding_dimension, hyperparameters,\n", + " and precision, recall, and ndcg at 10, 20, 50, and 100 to out_file.\n", + " \"\"\"\n", + " assert len(out_file) > 0\n", + "\n", + " results = []\n", + "\n", + " embedding_dimensions = (4,8,10,16,32,64)\n", + " # for every embedding\n", + " for embedding_dimension in embedding_dimensions:\n", + "\n", + " # make model\n", + " model = model_class(\n", + " m=training_data.m,\n", + " n=training_data.n,\n", + " k=embedding_dimension,\n", + " )\n", + "\n", + " # make tie\n", + " tie = TechniqueInferenceEngine(\n", + " training_data=training_data,\n", + " validation_data=validation_data,\n", + " test_data=test_data,\n", + " model=model,\n", + " prediction_method=method,\n", + " enterprise_attack_filepath=enterprise_attack_filepath,\n", + " )\n", + "\n", + " # fit hyperparameters\n", + " best_hyperparameters = tie.fit_with_validation(**kwargs)\n", + "\n", + " # calculate precision, recall, ndcg\n", + " run_stats = {\n", + " \"embedding_dimension\": embedding_dimension,\n", + " **best_hyperparameters\n", + " }\n", + " k_values = (10, 20, 50, 100)\n", + " for k in k_values:\n", + " run_stats[f\"precision_at_{k}\"] = tie.precision(k=k)\n", + " run_stats[f\"recall_at_{k}\"] = tie.recall(k=k)\n", + " run_stats[f\"ndcg_at_{k}\"] = tie.normalized_discounted_cumulative_gain(k=k)\n", + "\n", + " print(run_stats)\n", + " results.append(run_stats)\n", + "\n", + "\n", + " # save as csv\n", + " results_dataframe = pd.DataFrame(results)\n", + " results_dataframe.to_csv(out_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-18 11:00:56.954876: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n", + "Skipping registering GPU devices...\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAGhCAYAAACQ4eUqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA7CklEQVR4nO3de3yU9Z33//fMJJPJGUJOJkQSUKEKAnKIgKLW/EDpnVut7aK4EtJWl22wSnbrAnLwSNq6NxurKLa7Hm4tldqidqvL3jYKloJAg65FTTgEhAZzAklIQk4z1++PJBNGAmTCzFwzyev5eMxjkmu+18xnxtG8/V7fg8UwDEMAAAAhxmp2AQAAAP1BiAEAACGJEAMAAEISIQYAAIQkQgwAAAhJhBgAABCSCDEAACAkEWIAAEBIIsQAAICQRIgBAAAhqV8hZu3atcrMzJTD4VB2drZ27tx51rbt7e169NFHNWrUKDkcDo0fP16bNm06a/uf/OQnslgseuCBB/pTGgAAGCS8DjEbNmxQYWGhVq1apd27d2v8+PGaPXu2ampqem2/fPlyPf/883r66af12WefaeHChbrtttv00UcfndF2165dev7553XllVd6/04AAMCgYvF2A8js7GxNmTJFzzzzjCTJ5XIpIyND9913n5YsWXJG+7S0ND300EMqKChwH7v99tsVGRmpV1991X2ssbFRV111lZ599lk9/vjjmjBhgoqLi/tcl8vl0tGjRxUbGyuLxeLNWwIAACYxDEMnT55UWlqarFbv+lbCvGnc1tam0tJSLV261H3MarUqJydH27dv7/Wc1tZWORwOj2ORkZHaunWrx7GCggJ961vfUk5Ojh5//PHz1tLa2qrW1lb375WVlbr88su9eTsAACBIHDlyRMOHD/fqHK9CTF1dnZxOp1JSUjyOp6SkqKysrNdzZs+erTVr1mjmzJkaNWqUSkpKtHHjRjmdTneb1157Tbt379auXbv6XEtRUZEeeeSRM44fOXJEcXFxfX4eAABgnoaGBmVkZCg2Ntbrc70KMf3x1FNP6Z577tGYMWNksVg0atQo5efn64UXXpDUGTruv/9+vfvuu2f02JzL0qVLVVhY6P69+0OIi4sjxAAAEGL6MxTEq4tPiYmJstlsqq6u9jheXV2t1NTUXs9JSkrSm2++qaamJn3xxRcqKytTTEyMRo4cKUkqLS1VTU2NrrrqKoWFhSksLExbtmzRz3/+c4WFhXn02JwuIiLCHVgILgAADD5ehRi73a5JkyappKTEfczlcqmkpETTpk0757kOh0Pp6enq6OjQ7373O91yyy2SpBtvvFF//etf9fHHH7tvkydP1l133aWPP/5YNputH28LAAAMdF5fTiosLFReXp4mT56sqVOnqri4WE1NTcrPz5ckzZ8/X+np6SoqKpIk7dixQ5WVlZowYYIqKyv18MMPy+Vy6cEHH5QkxcbGauzYsR6vER0drWHDhp1xHAAAoJvXIWbu3Lmqra3VypUrVVVVpQkTJmjTpk3uwb6HDx/2mCLV0tKi5cuXq6KiQjExMZozZ45eeeUVDRkyxGdvAgAADD5erxMTrBoaGhQfH6/6+nrGxwAAECIu5O83eycBAICQRIgBAAAhiRADAABCEiEGAACEJEIMAAAISYQYAAAQkggxAAAgJBFizuPf3t2rpRv/quNNbWaXAgAATkOIOY/1Ow/r1zsP68v6U2aXAgAATkOIOY+EKLsk6aumdpMrAQAApyPEnMfQ6HBJ0vFmLicBABBMCDHnMSw6QpL0FWNiAAAIKoSY83D3xBBiAAAIKoSY83CPieFyEgAAQYUQcx5DoztDDD0xAAAEF0LMeSQQYgAACEqEmPMYGkWIAQAgGBFizqO7J4YxMQAABBdCzHm4Q0xTuwzDMLkaAADQjRBzHt2Xk9qcLjW1OU2uBgAAdCPEnEek3abIcJskFrwDACCYEGL6gBlKAAAEH0JMH7BqLwAAwYcQ0wdMswYAIPgQYvqAadYAAAQfQkwf0BMDAEDwIcT0wTB6YgAACDqEmD5gE0gAAIIPIaYPTl+1FwAABAdCTB+4x8RwOQkAgKBBiOkDFrsDACD4EGL6oHuxuxPNbXK62AQSAIBgQIjpg+7LSS5DajjFuBgAAIIBIaYPwm1WxTnCJDEuBgCAYEGI6aOeGUqEGAAAggEhpo9YKwYAgOBCiOmjhChW7QUAIJgQYvqouyfmGD0xAAAEBUJMHzEmBgCA4EKI6aOenayZYg0AQDAgxPRRQteCd4yJAQAgOBBi+ighOkISs5MAAAgW/Qoxa9euVWZmphwOh7Kzs7Vz586ztm1vb9ejjz6qUaNGyeFwaPz48dq0aZNHm6KiIk2ZMkWxsbFKTk7WrbfeqvLy8v6U5jf0xAAAEFy8DjEbNmxQYWGhVq1apd27d2v8+PGaPXu2ampqem2/fPlyPf/883r66af12WefaeHChbrtttv00Ucfudts2bJFBQUF+vDDD/Xuu++qvb1ds2bNUlNTU//fmY/1jIkhxAAAEAwshmF4taNhdna2pkyZomeeeUaS5HK5lJGRofvuu09Lliw5o31aWpoeeughFRQUuI/dfvvtioyM1Kuvvtrra9TW1io5OVlbtmzRzJkz+1RXQ0OD4uPjVV9fr7i4OG/eUp+caG7ThEfflSTtffxm2cO4EgcAwIW6kL/fXv0lbmtrU2lpqXJycnqewGpVTk6Otm/f3us5ra2tcjgcHsciIyO1devWs75OfX29JCkhIcGb8vwqzhEuq6Xz5xNcUgIAwHRehZi6ujo5nU6lpKR4HE9JSVFVVVWv58yePVtr1qzRvn375HK59O6772rjxo368ssve23vcrn0wAMPaMaMGRo7duxZa2ltbVVDQ4PHzZ+sVkvPJSVCDAAApvP7NZGnnnpKl156qcaMGSO73a5FixYpPz9fVmvvL11QUKA9e/botddeO+fzFhUVKT4+3n3LyMjwR/ke2D8JAIDg4VWISUxMlM1mU3V1tcfx6upqpaam9npOUlKS3nzzTTU1NemLL75QWVmZYmJiNHLkyDPaLlq0SH/4wx/0/vvva/jw4eesZenSpaqvr3ffjhw54s1b6ZeeVXtZ8A4AALN5FWLsdrsmTZqkkpIS9zGXy6WSkhJNmzbtnOc6HA6lp6ero6NDv/vd73TLLbe4HzMMQ4sWLdIbb7yh9957T1lZWeetJSIiQnFxcR43f0vgchIAAEEjzNsTCgsLlZeXp8mTJ2vq1KkqLi5WU1OT8vPzJUnz589Xenq6ioqKJEk7duxQZWWlJkyYoMrKSj388MNyuVx68MEH3c9ZUFCg9evX66233lJsbKx7fE18fLwiIyN98T59Yij7JwEAEDS8DjFz585VbW2tVq5cqaqqKk2YMEGbNm1yD/Y9fPiwx3iXlpYWLV++XBUVFYqJidGcOXP0yiuvaMiQIe42zz33nCTp+uuv93itF198UQsWLPD+XflJ94J3jIkBAMB8Xq8TE6z8vU6MJP37nyr0+Nuf63+PT9PP75zol9cAAGAwCdg6MYOde2AvY2IAADAdIcYLTLEGACB4EGK80D07iYG9AACYjxDjhe7LSUyxBgDAfIQYL3SHmJZ2l061OU2uBgCAwY0Q44Uou829e/WxplaTqwEAYHAjxHjBYrGcNi6GrQcAADATIcZLQxkXAwBAUCDEeKl71V5mKAEAYC5CjJeGRrFWDAAAwYAQ46VhrNoLAEBQIMR4iVV7AQAIDoQYLyUQYgAACAqEGC8xJgYAgOBAiPESO1kDABAcCDFe6umJYbE7AADMRIjx0uk9MYZhmFwNAACDFyHGS0O7Frtzugw1tHSYXA0AAIMXIcZLEWE2xUSESWLVXgAAzESI6Yfu3phjhBgAAExDiOmHnp2sCTEAAJiFENMP7GQNAID5CDH9QE8MAADmI8T0Az0xAACYjxDTD+61YuiJAQDANISYfujZBJJVewEAMAshph96th5oNbkSAAAGL0JMP/RsPUBPDAAAZiHE9ENC12J3xxkTAwCAaQgx/dB9Oan+VLs6nC6TqwEAYHAixPRDfGS4LJbOn0+c4pISAABmIMT0Q5jNqvjIzktKTLMGAMAchJh+6plmTYgBAMAMhJh+SogixAAAYCZCTD+x9QAAAOYixPQTm0ACAGAuQkw/DWXrAQAATEWI6afuBe++4nISAACmIMT001AG9gIAYCpCTD/17J9EiAEAwAyEmH5inRgAAMxFiOknQgwAAOYixPRT9+yk5janWtqdJlcDAMDg068Qs3btWmVmZsrhcCg7O1s7d+48a9v29nY9+uijGjVqlBwOh8aPH69NmzZd0HMGg9iIMIVZO3eBZFwMAACB53WI2bBhgwoLC7Vq1Srt3r1b48eP1+zZs1VTU9Nr++XLl+v555/X008/rc8++0wLFy7Ubbfdpo8++qjfzxkMLBbLaWvFEGIAAAg0i2EYhjcnZGdna8qUKXrmmWckSS6XSxkZGbrvvvu0ZMmSM9qnpaXpoYceUkFBgfvY7bffrsjISL366qv9es7eNDQ0KD4+XvX19YqLi/PmLfXb7H/7QOXVJ/Xq97N1zaWJAXlNAAAGkgv5++1VT0xbW5tKS0uVk5PT8wRWq3JycrR9+/Zez2ltbZXD4fA4FhkZqa1bt/b7Obuft6GhweMWaEO7Frxj/yQAAALPqxBTV1cnp9OplJQUj+MpKSmqqqrq9ZzZs2drzZo12rdvn1wul959911t3LhRX375Zb+fU5KKiooUHx/vvmVkZHjzVnxiWHSEJPZPAgDADH6fnfTUU0/p0ksv1ZgxY2S327Vo0SLl5+fLar2wl166dKnq6+vdtyNHjvio4r7r7ok5RogBACDgvEoSiYmJstlsqq6u9jheXV2t1NTUXs9JSkrSm2++qaamJn3xxRcqKytTTEyMRo4c2e/nlKSIiAjFxcV53AKNnawBADCPVyHGbrdr0qRJKikpcR9zuVwqKSnRtGnTznmuw+FQenq6Ojo69Lvf/U633HLLBT+n2dyzkxgTAwBAwIV5e0JhYaHy8vI0efJkTZ06VcXFxWpqalJ+fr4kaf78+UpPT1dRUZEkaceOHaqsrNSECRNUWVmphx9+WC6XSw8++GCfnzNYufdPoicGAICA8zrEzJ07V7W1tVq5cqWqqqo0YcIEbdq0yT0w9/Dhwx7jXVpaWrR8+XJVVFQoJiZGc+bM0SuvvKIhQ4b0+TmDFTtZAwBgHq/XiQlWZqwTs6eyXv/r6a1KiYvQjmU55z8BAAB4CNg6MfA01H05qV0DJAsCABAyCDEXoHt2UpvTpcbWDpOrAQBgcCHEXIBIu02R4TZJnb0xAAAgcAgxFyiBadYAAJiCEHOBulftZZo1AACBRYi5QEyzBgDAHISYC+Re8I7LSQAABBQh5gLREwMAgDkIMRdoWDQhBgAAMxBiLtBQQgwAAKYgxFwgxsQAAGAOQswFYkwMAADmIMRcoJ6eGFbsBQAgkAgxF6h7sbsTzW1yutgEEgCAQCHEXKDuy0kuQ2o4RW8MAACBQoi5QOE2q2IdYZKkY4yLAQAgYAgxPjCMGUoAAAQcIcYHWCsGAIDAI8T4QELXuBh2sgYAIHAIMT7g7onhchIAAAFDiPEB91ox9MQAABAwhBgf6Fm1lynWAAAECiHGBxK6Frw73tRqciUAAAwehBgfSIiOkCQdZ+sBAAAChhDjA909MYyJAQAgcAgxPjCUKdYAAAQcIcYHumcnnWztUFuHy+RqAAAYHAgxPhDnCJfV0vnzCdaKAQAgIAgxPmC1WnqmWRNiAAAICEKMj7hX7W0kxAAAEAiEGB9JoCcGAICAIsT4CFsPAAAQWIQYH3FfTmLrAQAAAoIQ4yPuBe+4nAQAQEAQYnykZxNIQgwAAIFAiPER95gYemIAAAgIQoyPdI+JOcYUawAAAoIQ4yPdU6zpiQEAIDAIMT6SEN0zJsYwDJOrAQBg4CPE+Eh3iGntcOlUu9PkagAAGPgIMT4SZbfJHtb5cTJDCQAA/yPE+IjFYukZF8OCdwAA+F2/QszatWuVmZkph8Oh7Oxs7dy585zti4uLNXr0aEVGRiojI0OLFy9WS0uL+3Gn06kVK1YoKytLkZGRGjVqlB577LGQG1viXrWXwb0AAPhdmLcnbNiwQYWFhVq3bp2ys7NVXFys2bNnq7y8XMnJyWe0X79+vZYsWaIXXnhB06dP1969e7VgwQJZLBatWbNGkvTTn/5Uzz33nF5++WVdccUV+stf/qL8/HzFx8frRz/60YW/ywDpXrX3eFOryZUAADDwed0Ts2bNGt1zzz3Kz8/X5ZdfrnXr1ikqKkovvPBCr+23bdumGTNmaN68ecrMzNSsWbN05513evTebNu2Tbfccou+9a1vKTMzU9/5znc0a9as8/bwBJueVXu5nAQAgL95FWLa2tpUWlqqnJycniewWpWTk6Pt27f3es706dNVWlrqDiQVFRV65513NGfOHI82JSUl2rt3ryTpf/7nf7R161bdfPPNZ62ltbVVDQ0NHjezsZM1AACB49XlpLq6OjmdTqWkpHgcT0lJUVlZWa/nzJs3T3V1dbrmmmtkGIY6Ojq0cOFCLVu2zN1myZIlamho0JgxY2Sz2eR0OvXEE0/orrvuOmstRUVFeuSRR7wp3+8SGBMDAEDA+H120ubNm7V69Wo9++yz2r17tzZu3Ki3335bjz32mLvNb37zG/3qV7/S+vXrtXv3br388sv613/9V7388stnfd6lS5eqvr7efTty5Ii/38p50RMDAEDgeNUTk5iYKJvNpurqao/j1dXVSk1N7fWcFStW6O6779YPfvADSdK4cePU1NSke++9Vw899JCsVqt+/OMfa8mSJbrjjjvcbb744gsVFRUpLy+v1+eNiIhQRESEN+X7HTtZAwAQOF71xNjtdk2aNEklJSXuYy6XSyUlJZo2bVqv5zQ3N8tq9XwZm80mSe4p1Gdr43K5vCnPdOxkDQBA4Hg9xbqwsFB5eXmaPHmypk6dquLiYjU1NSk/P1+SNH/+fKWnp6uoqEiSlJubqzVr1mjixInKzs7W/v37tWLFCuXm5rrDTG5urp544gldfPHFuuKKK/TRRx9pzZo1+t73vufDt+p/zE4CACBwvA4xc+fOVW1trVauXKmqqipNmDBBmzZtcg/2PXz4sEevyvLly2WxWLR8+XJVVlYqKSnJHVq6Pf3001qxYoV++MMfqqamRmlpafqHf/gHrVy50gdvMXBO74lxuQxZrRaTKwIAYOCyGKG2LO5ZNDQ0KD4+XvX19YqLizOlhtYOp0Yv3yRJ+p+VsxQfFW5KHQAAhIoL+fvN3kk+FBFmU0xEZ+cW06wBAPAvQoyPDXVvPUCIAQDAnwgxPtazkzUhBgAAfyLE+Bg7WQMAEBiEGB+jJwYAgMAgxPiYuyeGEAMAgF8RYnwsgRADAEBAEGJ8jK0HAAAIDEKMj7EJJAAAgUGI8bGenhj2TwIAwJ8IMT6WwGJ3AAAEBCHGx7ovJ9WfaleH02VyNQAADFyEGB+LjwyXpWvzai4pAQDgP4QYHwuzWRUf2XlJiRlKAAD4DyHGDxKYoQQAgN8RYvzAPUOJEAMAgN8QYvyATSABAPA/QowfsAkkAAD+R4jxg55NIJmdBACAvxBi/KBnwbtWkysBAGDgIsT4gXv/JNaJAQDAbwgxfsDsJAAA/I8Q4wcJ0awTAwCAvxFi/KBnJ2tCDAAA/kKI8YPu2UnNbU61tDtNrgYAgIGJEOMHsRFhCrN27gJJbwwAAP5BiPEDi8Xi7o051kiIAQDAHwgxfuJetZeeGAAA/IIQ4ydD3QveEWIAAPAHQoyfsFYMAAD+RYjxE/daMazaCwCAXxBi/ISdrAEA8C9CjJ+4d7JmYC8AAH5BiPET9+UkplgDAOAXhBg/GcoUawAA/IoQ4ydsAgkAgH8RYvxk6GmbQBqGYXI1AAAMPIQYP+mendTuNNTY2mFyNQAADDyEGD+JtNsUGW6TJH3VxFoxAAD4GiHGjxKYZg0AgN8QYvyoZ/+kVpMrAQBg4CHE+FH3NOvjXE4CAMDn+hVi1q5dq8zMTDkcDmVnZ2vnzp3nbF9cXKzRo0crMjJSGRkZWrx4sVpaWjzaVFZW6u///u81bNgwRUZGaty4cfrLX/7Sn/KCBptAAgDgP2HenrBhwwYVFhZq3bp1ys7OVnFxsWbPnq3y8nIlJyef0X79+vVasmSJXnjhBU2fPl179+7VggULZLFYtGbNGknSV199pRkzZuiGG27Qf/3XfykpKUn79u3T0KFDL/wdmsjdE8OYGAAAfM7rELNmzRrdc889ys/PlyStW7dOb7/9tl544QUtWbLkjPbbtm3TjBkzNG/ePElSZmam7rzzTu3YscPd5qc//akyMjL04osvuo9lZWV5/WaCDT0xAAD4j1eXk9ra2lRaWqqcnJyeJ7BalZOTo+3bt/d6zvTp01VaWuq+5FRRUaF33nlHc+bMcbf5/e9/r8mTJ+u73/2ukpOTNXHiRP3yl7/sz/sJKqzaCwCA/3jVE1NXVyen06mUlBSP4ykpKSorK+v1nHnz5qmurk7XXHONDMNQR0eHFi5cqGXLlrnbVFRU6LnnnlNhYaGWLVumXbt26Uc/+pHsdrvy8vJ6fd7W1la1tvbM+mloaPDmrQREQjT7JwEA4C9+n520efNmrV69Ws8++6x2796tjRs36u2339Zjjz3mbuNyuXTVVVdp9erVmjhxou69917dc889Wrdu3Vmft6ioSPHx8e5bRkaGv9+K17rHxByjJwYAAJ/zKsQkJibKZrOpurra43h1dbVSU1N7PWfFihW6++679YMf/EDjxo3TbbfdptWrV6uoqEgul0uSdNFFF+nyyy/3OO8b3/iGDh8+fNZali5dqvr6evftyJEj3ryVgGBMDAAA/uNViLHb7Zo0aZJKSkrcx1wul0pKSjRt2rRez2lubpbV6vkyNlvncvzdGyPOmDFD5eXlHm327t2rESNGnLWWiIgIxcXFedyCTfdidydOtcvpYhNIAAB8yevZSYWFhcrLy9PkyZM1depUFRcXq6mpyT1baf78+UpPT1dRUZEkKTc3V2vWrNHEiROVnZ2t/fv3a8WKFcrNzXWHmcWLF2v69OlavXq1/u7v/k47d+7UL37xC/3iF7/w4VsNvO7LSYYh1Z9qd/fMAACAC+d1iJk7d65qa2u1cuVKVVVVacKECdq0aZN7sO/hw4c9el6WL18ui8Wi5cuXq7KyUklJScrNzdUTTzzhbjNlyhS98cYbWrp0qR599FFlZWWpuLhYd911lw/eonnCbVbFOsJ0sqVDx5vaCDEAAPiQxei+phPiGhoaFB8fr/r6+qC6tHT9k+/r0LFmvb5wmqZkJphdDgAAQeVC/n6zd5KfDWWtGAAA/IIQ42cJUYQYAAD8gRDjZ/TEAADgH4QYP2OtGAAA/IMQ42fsZA0AgH8QYvwsoWvBO3piAADwLUKMn/X0xLSbXAkAAAMLIcbPhsUwJgYAAH8gxPhZd08MIQYAAN8ixPhZ9+ykk60dau1wmlwNAAADByHGz+Ic4bJaOn8+wbgYAAB8hhDjZ1arpWdwL5eUAADwGUJMAAxlwTsAAHyOEBMACSx4BwCAzxFiAoCtBwAA8D1CTAD0bALJwF4AAHyFEBMA3VsPHG9qNbkSAAAGDkJMALD1AAAAvkeICQDGxAAA4HuEmADoGRNDiAEAwFcIMQHQPcX6K6ZYAwDgM4SYAEg4rSfGMAyTqwEAYGAgxARAd4hp7XDpVDubQAIA4AuEmACIsttkD+v8qI81ckkJAABfIMQEgMViYVwMAAA+RogJEGYoAQDgW4SYAOletZeeGAAAfIMQEyDuVXvZPwkAAJ8gxAQIq/YCAOBbhJgAca8Vw+UkAAB8ghATIO4QwxRrAAB8ghATID07WRNiAADwBUJMgDAmBgAA3yLEBMhQFrsDAMCnCDEBkhrvkMUi1TW2qaK20exyAAAIeYSYAEmItuuG0cmSpP+7/QuTqwEAIPQRYgIob3qmJOm3pX9TY2uHucUAABDiCDEBdO0liRqZFK3G1g79rvRvZpcDAEBII8QEkNVq0YKu3piXtx2Sy2WYWxAAACGMEBNg375quGIiwlRR16Q/7a8zuxwAAEIWISbAYiLC9N3JwyVJL/35oMnVAAAQuggxJpg/LVOS9H55rQ7WNZlbDAAAIapfIWbt2rXKzMyUw+FQdna2du7cec72xcXFGj16tCIjI5WRkaHFixerpaWl17Y/+clPZLFY9MADD/SntJCQlRitG0YnSZL+7/ZD5hYDAECI8jrEbNiwQYWFhVq1apV2796t8ePHa/bs2aqpqem1/fr167VkyRKtWrVKn3/+uf7jP/5DGzZs0LJly85ou2vXLj3//PO68sorvX8nIaZ7uvXrf2G6NQAA/eF1iFmzZo3uuece5efn6/LLL9e6desUFRWlF154odf227Zt04wZMzRv3jxlZmZq1qxZuvPOO8/ovWlsbNRdd92lX/7ylxo6dGj/3k0ImXlpkkYmdk633rib6dYAAHjLqxDT1tam0tJS5eTk9DyB1aqcnBxt376913OmT5+u0tJSd2ipqKjQO++8ozlz5ni0Kygo0Le+9S2P5x7IrFaLuzfmJaZbAwDgtTBvGtfV1cnpdColJcXjeEpKisrKyno9Z968eaqrq9M111wjwzDU0dGhhQsXelxOeu2117R7927t2rWrz7W0traqtbXV/XtDQ4M3byUo3D5puJ7873JV1DZp6/46zbwsyeySAAAIGX6fnbR582atXr1azz77rHbv3q2NGzfq7bff1mOPPSZJOnLkiO6//3796le/ksPh6PPzFhUVKT4+3n3LyMjw11vwm5iIMH1nUtd0622HzC0GAIAQYzEMo8/XMdra2hQVFaXf/va3uvXWW93H8/LydOLECb311ltnnHPttdfq6quv1pNPPuk+9uqrr+ree+9VY2Ojfv/73+u2226TzWZzP+50OmWxWGS1WtXa2urxWLfeemIyMjJUX1+vuLi4vr4l01XUNuqb/2eLLBbp/X+6XpmJ0WaXBABAwDQ0NCg+Pr5ff7+96omx2+2aNGmSSkpK3MdcLpdKSko0bdq0Xs9pbm6W1er5Mt2hxDAM3XjjjfrrX/+qjz/+2H2bPHmy7rrrLn388ce9BhhJioiIUFxcnMctFI1MitH1o5NkGOxuDQCAN7waEyNJhYWFysvL0+TJkzV16lQVFxerqalJ+fn5kqT58+crPT1dRUVFkqTc3FytWbNGEydOVHZ2tvbv368VK1YoNzdXNptNsbGxGjt2rMdrREdHa9iwYWccH6gWTM/U5vJavf6XI/qnWZcpOsLrfywAAAw6Xv+1nDt3rmpra7Vy5UpVVVVpwoQJ2rRpk3uw7+HDhz16XpYvXy6LxaLly5ersrJSSUlJys3N1RNPPOG7dxHiZl6apKzEaB2sa9LG3X/T3V0r+gIAgLPzakxMMLuQa2rB4KU/H9TD//mZRiVF64+F18lisZhdEgAAfhewMTHwn9snDVe03aYDXdOtAQDAuRFigkSsI1zfndw5TfylPx8ytxgAAEIAISaIzJ82QpL0XnmNvjjG7tYAAJwLISaIjEyK0XWXMd0aAIC+IMQEmQUzMiVJv9l1RE3sbg0AwFkRYoLMdV3TrU+2dmjjR5VmlwMAQNAixAQZq9XiHhvz8rZDGiAz4AEA8DlCTBD6Ttd06/01jfrz/mNmlwMAQFAixAShWEf4abtbHzS5GgAAghMhJkjNn54pSSopq9HhY83mFgMAQBAixASpUUkxmumebn3I7HIAAAg6hJgglt/VG7PhL0y3BgDg6wgxQey6y5KUOSxKJ1s69AbTrQEA8ECICWKd060zJTHdGgCAryPEBLnvTO6cbr2vplHbDjDdGgCAboSYIBfnCNftXdOtX2R3awAA3AgxIaD7klJJWTXTrQEA6EKICQGXJMfo2ksTZRjSKx8eMrscAACCAiEmROR37W69YdcRNbcx3RoAAEJMiLj+smSNGBalBqZbAwAgiRATMphuDQCAJ0JMCPnu5OGKstu0t7pR25luDQAY5AgxISTOEa7br+qabr3tkLnFAABgMkJMiMmbPkKS9MfPq3XkONOtAQCDFyEmxFySHOuebv2v/69crR1Os0sCAMAUhJgQdO/MkZKktz4+qtynt+qTv50wtyAAAExAiAlB116apOfuukrDou3aW92o257dpp9uKlNLO70yAIDBgxATom4ed5HeLbxOuePT5HQZem7zAf2vp7dq9+GvzC4NAICAIMSEsIRou56+c6Kev3uSEmMitL+mUd95bptWv/M5vTIAgAGPEDMAzL4iVX8snKnbJqbLZUi/+KBCc576k0q/OG52aQAA+A0hZoAYEmXXv82doH+fP1nJsRGqqGvSd9Zt12N/+Eyn2uiVAQAMPISYASbn8hS9u/g6fWfScBmG9B9bD+rmpz7QzoP0ygAABhZCzAAUHxWuf/3ueL2YP0WpcQ4dOtasub/Yrod//yk7YAMABgxCzAB2w+hk/b/CmZo7OUOGIb207ZBuKv4T+y4BAAYEQswAF+cI10+/c6Ve/t5UpcU7dPh4s+785Yda8eYeNbXSKwMACF2EmEHiusuS9N+LZ2pe9sWSpFc+/EKz/u0D/Xl/ncmVAQDQP4SYQSTWEa7Vt43Tr36QrfQhkao8cUp3/fsOLd34V/3tKzaTBACEFothGIbZRfhCQ0OD4uPjVV9fr7i4OLPLCXqNrR366X+V6ZUPv3AfG5sep5uuSNVNY1N1SXKsidUBAAaLC/n7TYgZ5LYfOKbiP+7VrkPH5TrtmzAqKVqzuwLNuPR4WSwW84oEAAxYhBgRYi5UXWOr/vhZtTZ9WqU/769Tu7Pna5EW79CsrkAzJTNBNiuBBgDgG4QYEWJ8qaGlXe+X1ei/P63S5vJaNZ+24u+waLv+v8tTNHtsqqaPGqaIMJuJlQIAQh0hRoQYf2lpd+pP++q0aU+V/vh5tepPtbsfi4kI0zfHJOumsam67rIkRUeEmVgpACAUEWJEiAmEdqdLOw8e16Y9VfrvT6tUc7LV/VhEmFXXXpqk2Vek6LrRSUqOdZhYKQAgVFzI3+9+TbFeu3atMjMz5XA4lJ2drZ07d56zfXFxsUaPHq3IyEhlZGRo8eLFamlpcT9eVFSkKVOmKDY2VsnJybr11ltVXl7en9LgR+E2q2ZckqjHbh2rD5feqI0/nK5/mDlSI4ZFqbXDpT9+Xq0f//YTTX2iRHOe+pN+uqlMH1YcU1uHy+zSAQADkNc9MRs2bND8+fO1bt06ZWdnq7i4WK+//rrKy8uVnJx8Rvv169fre9/7nl544QVNnz5de/fu1YIFC3THHXdozZo1kqSbbrpJd9xxh6ZMmaKOjg4tW7ZMe/bs0Weffabo6Og+1UVPjHkMw1BZ1Ult2lOl98pq9NfKeo/Ho+02Tb8kUdddlqTrLktSRkKUSZUCAIJNQC8nZWdna8qUKXrmmWckSS6XSxkZGbrvvvu0ZMmSM9ovWrRIn3/+uUpKStzH/umf/kk7duzQ1q1be32N2tpaJScna8uWLZo5c2af6iLEBI+6xlZt3VenLXtr9cHeWh1ravN4fGRitGZelqTrRifp6qxhirQzOBgABqsL+fvt1UjMtrY2lZaWaunSpe5jVqtVOTk52r59e6/nTJ8+Xa+++qp27typqVOnqqKiQu+8847uvvvus75OfX3n/8knJCSctU1ra6taW3vGZDQ0NHjzVuBHiTERunVium6dmC6Xy9CnRxv0wb5abSmvVenhr1RR16SKuia9tO2Q7GFWZWcluHtpLkmOYU0aAECfeBVi6urq5HQ6lZKS4nE8JSVFZWVlvZ4zb9481dXV6ZprrpFhGOro6NDChQu1bNmyXtu7XC498MADmjFjhsaOHXvWWoqKivTII494Uz5MYLVaNG54vMYNj1fBDZeooaVd2/Yfc/fSVJ44pT/tq9Of9tXp8bc/V1q8o7OX5rIkTb8kUfGR4Wa/BQBAkPL7nNjNmzdr9erVevbZZ5Wdna39+/fr/vvv12OPPaYVK1ac0b6goEB79uw566WmbkuXLlVhYaH794aGBmVkZPi8fvhWnCNcN43tXDjPMAwdqG3Ulr2dl552VBzT0foWvbbriF7bdUQ2q0VTMofqxjEpumFMskYlRdNLAwBw82pMTFtbm6KiovTb3/5Wt956q/t4Xl6eTpw4obfeeuuMc6699lpdffXVevLJJ93HXn31Vd17771qbGyU1dozQWrRokV666239MEHHygrK8urN8KYmNDX0u7UjoPHtaW8Vlv21uhAbZPH4yOGRembY5J145gUTc1KkD2M/UsBINQFbEyM3W7XpEmTVFJS4g4xLpdLJSUlWrRoUa/nNDc3ewQVSbLZOgdyducnwzB033336Y033tDmzZu9DjAYGBzhNvfYGOlyHT7WrPfKqlVSVqMdFcf1xbFmvfjnQ3rxz4cUbbfp2kuT9M1vJOuG0clKio0wu3wAQIB5fTmpsLBQeXl5mjx5sqZOnari4mI1NTUpPz9fkjR//nylp6erqKhIkpSbm6s1a9Zo4sSJ7stJK1asUG5urjvMFBQUaP369XrrrbcUGxurqqoqSVJ8fLwiIyN99V4RYi4eFqUFM7K0YEaWmlo7tHV/nd77vEbvldeo9mSrNn1apU2fdn5Xxg+P1zfHpOjGbyTrirQ4LjsBwCDQrxV7n3nmGT355JOqqqrShAkT9POf/1zZ2dmSpOuvv16ZmZl66aWXJEkdHR164okn9Morr6iyslJJSUnKzc3VE088oSFDhnQWcZY/OC+++KIWLFjQp5q4nDR4uFyG9hyt13tlNXqvrEaf/M1zXZrk2Ah9c0yyvjkmWTMuSWQ7BAAIYmw7IELMYFbT0KLN5bUqKavWn/bVeWxYabdZdfWoYfrm6CTdMCZZI4b1bfFEAEBgEGJEiEGn1g6ndlQc13tlNSopq9aR46c8Hh+ZGK3rRyfrhjFJmpqVwC7cAGAyQowIMThT9xTuks9r9H55jf5y6Ct1uHq+7pHhNs24JFHXd/XSpA9h/BUABBohRoQYnF9DS7v+vK9O75fXaHN5rccu3JJ0WUqMbhidrOtHJ2ty5lCF25jCDQD+RogRIQbeMQxDn33ZoM3ltXq/rEa7D3+l0zppFBsRpmsuTdQNo5N13egkpcQ5zCsWAAYwQowIMbgwJ5rb9MG+Om0uq9HmvbU6/rVNKy+/KE43jEnSDaOTNT5jCL00AOAjhBgRYuA7LpehTyrr9X5XoPnkbyd0+r8l0XabJmUmKDsrQVePHKYrh8cTagCgnwgxIsTAf+oaW/XB3lq9X965aWX9qXaPxyPDbZo0YmhnqBnVGWqY9QQAfUOIESEGgeFyGSqrOqkPK45px8Fj2nnwuL5q9gw1EWFWXXXxUF09cpiyRyZoQsYQOcIJNQDQG0KMCDEwh8tlaG/NSe2oOK4dB49pR8VxHfvaeBp7mFUTM4Yoe+QwXZ2VoKtGDCXUAEAXQowIMQgOhmFof02jPjx4XDsqjunDiuOqa/Scym23WTU+I17ZWZ2Xnsamx+uieAf7PQEYlAgxIsQgOBmGoYq6JndPzYcVx1Td0HpGu6FR4RqbHq8r0uI1Nj1OV6TFa0RClKxWgg2AgY0QI0IMQoNhGPriWLN2HDymXYe+0qdHG7Sv+qTHSsLdYiLCdHlanMaeFmxGJUUrjJlQAAYQQowIMQhdLe1O7a0+qT2VDdpztF6fHm3Q5182qK3DdUbbiDCrvnFRnMamd4abK9LidVlqDLOhAIQsQowIMRhY2p0uHaht7Aw2lfX67GiDPj1ar6bTdujuFma16JLkGI1MilZWYrQyh0VrZFLnfUK0nbE2AIIaIUaEGAx8LpehQ8eatOdogz6t7Oyx2XO0Xie+NsX7dHGOMGUldoWbrvvun+Mc4QGsHgB6R4gRIQaDk2EYqjxxSuVVJ3WwrkmHjjV13tc1q/LEqXOemxhjd/fcZCVFK6vr/uKEKEXZwwL0DgAMdhfy95v/UgEhzGKxaPjQKA0fGnXGYy3tTn1xrFkH6xp1sK7z/lBdsyrqmlTX2Kq6xjbVNbZp16Gvzjg3zhGmi+IjlRrv0EXxDqXEdd53/t55PM4RxqUqAKYixAADlCPcptGpsRqdGnvGYydb2nWorlkHjzXpYG1nD05FXZMO1jaqoaWj63ZS5dUnz/r8keE2d7DpDjup8ZFKPS3wJETZmSYOwG8IMcAgFOsI17jh8Ro3PP6Mx062tKu6oUVf1nfeqtz3p1TV0Kqq+lP6qrldp9qdqqjrDD9nY7VIQ6PsSoi2a2i0XcOiO38+142ZVgD6ihADwEOsI1yxjnBdknxmD063lnZnT7hpOKUv61tU7f69876usVUuQzrW1HbGVgznEhMRpqHR4UqIjlBCVOf9sBi74hxhinWEKy4yTLER4Yp1hCkusvM+1hGu2Igwen2AQYYQA8BrjnCbMrtmOZ1Nu9Olr7oCTPf98bPdmjvbdLgMNbZ2qLG1Q0eOn3tg8tdZLFKMPeyMcNMdfk4/HmW3Kcrefd/zc6Tdpmh7mCLDbQQiIAQQYgD4RbjNquQ4h5LjHH1qbxiGGk516Hhzm443tep4U7uON7W6Q1DDqQ6dbG3XyZYONZzqum/pUENLu9o6XDIM6WRrh062duhofcsF1+8It3YGmq8Fna+HnogwmxzhVkWE2RQRZpUj3PM+IvzMY+7Hus4hMAH9Q4gBEBQsFovio8IVHxWurHP08PSmtcPpEW5OdoWbky2nhR33Y53jeZrbnGpq7dCpdqeaWp061dah5nanuhedaGl3qaW9TTr7kB+fsdusigizKjzMqnCbReE2q+w2a+f96cfCrD2PdR3vbtf9uN1mUZjNKpvVojBr58+d912/W61dP3e2CbdZuu5P/73nHJvFImvXc1ktnW1tX/vZZu1uJ9m6jjNzDYFAiAEQ8iLCbIqIsSkxJuKCnscwDLW0u9Tc1qHmNmfX7Sw/dwWg1g6XWrruT//Zfey0+5bT7p2n7ZfV5nSpzemSztwbNGRZLfIMO11hyGqRrJbOkGOzdv7c+bvcbbt/tlp6Hrdae86zWiSL5D7PYrF4/H7mcbnPk7rOP61d59HOJ7Wo57yex3qOdbaxfO0xz2M67dzOFvJ87GttLOp58PTn7U0gwuG5lo/759mjFRtEC2USYgCgi8ViUWTXZaJhfn6tDmdP6GnpcKm9w6X2rjDT1uFSu9Nw/97e9Xub06n2DqPzWNetrcOltq627R2d7Ttchjrc94acrs7HnS6j85jLpQ5n98+dbU9v0951Toer83eny5DLkDpcLrlcktMwPEJYb1yG5HIakgbEeqroUvDNSwgxADDYhdmsCrNZFR0Ruv8ZdnWFIFdXqHEaRs+xrt+dLkMuV2cAMrrOcRmSy+g8zzDUFZI6jxtGT2gyuo453W0Nd4jq7Czobtf5fIY6zzEMyVBnW0Odj6n72Ontu3ocOs/rOrf756736G7Tdb7nYz3H1Ev77ufWae07j52lzWn1nE9f19o3ZHj09JxLXzp5gm017+CqBgAQMqxWi+wMSoaJrGYXAAAA0B+EGAAAEJIIMQAAICQRYgAAQEgixAAAgJBEiAEAACGJEAMAAEISIQYAAIQkQgwAAAhJhBgAABCSCDEAACAkEWIAAEBIIsQAAICQNGB2se7e/ryhocHkSgAAQF91/93u/jvujQETYk6ePClJysjIMLkSAADgrZMnTyo+Pt6rcyxGf6JPEHK5XDp69KhiY2NlsVh89rwNDQ3KyMjQkSNHFBcX57PnxbnxuZuDz90cfO7m4HM3x9c/d8MwdPLkSaWlpclq9W6Uy4DpibFarRo+fLjfnj8uLo4vuQn43M3B524OPndz8Lmb4/TP3dsemG4M7AUAACGJEAMAAEISIeY8IiIitGrVKkVERJhdyqDC524OPndz8Lmbg8/dHL783AfMwF4AADC40BMDAABCEiEGAACEJEIMAAAISYQYAAAQkggx57F27VplZmbK4XAoOztbO3fuNLukAe3hhx+WxWLxuI0ZM8bssgacDz74QLm5uUpLS5PFYtGbb77p8bhhGFq5cqUuuugiRUZGKicnR/v27TOn2AHkfJ/7ggULzvj+33TTTeYUO0AUFRVpypQpio2NVXJysm699VaVl5d7tGlpaVFBQYGGDRummJgY3X777aqurjap4oGhL5/79ddff8b3feHChV69DiHmHDZs2KDCwkKtWrVKu3fv1vjx4zV79mzV1NSYXdqAdsUVV+jLL79037Zu3Wp2SQNOU1OTxo8fr7Vr1/b6+M9+9jP9/Oc/17p167Rjxw5FR0dr9uzZamlpCXClA8v5PndJuummmzy+/7/+9a8DWOHAs2XLFhUUFOjDDz/Uu+++q/b2ds2aNUtNTU3uNosXL9Z//ud/6vXXX9eWLVt09OhRffvb3zax6tDXl89dku655x6P7/vPfvYz717IwFlNnTrVKCgocP/udDqNtLQ0o6ioyMSqBrZVq1YZ48ePN7uMQUWS8cYbb7h/d7lcRmpqqvHkk0+6j504ccKIiIgwfv3rX5tQ4cD09c/dMAwjLy/PuOWWW0ypZ7CoqakxJBlbtmwxDKPzux0eHm68/vrr7jaff/65IcnYvn27WWUOOF//3A3DMK677jrj/vvvv6DnpSfmLNra2lRaWqqcnBz3MavVqpycHG3fvt3Eyga+ffv2KS0tTSNHjtRdd92lw4cPm13SoHLw4EFVVVV5fPfj4+OVnZ3Ndz8ANm/erOTkZI0ePVr/+I//qGPHjpld0oBSX18vSUpISJAklZaWqr293eP7PmbMGF188cV8333o6597t1/96ldKTEzU2LFjtXTpUjU3N3v1vANmA0hfq6urk9PpVEpKisfxlJQUlZWVmVTVwJedna2XXnpJo0eP1pdffqlHHnlE1157rfbs2aPY2FizyxsUqqqqJKnX7373Y/CPm266Sd/+9reVlZWlAwcOaNmyZbr55pu1fft22Ww2s8sLeS6XSw888IBmzJihsWPHSur8vtvtdg0ZMsSjLd933+ntc5ekefPmacSIEUpLS9Mnn3yif/mXf1F5ebk2btzY5+cmxCCo3Hzzze6fr7zySmVnZ2vEiBH6zW9+o+9///smVgb43x133OH+edy4cbryyis1atQobd68WTfeeKOJlQ0MBQUF2rNnD+PsAuxsn/u9997r/nncuHG66KKLdOONN+rAgQMaNWpUn56by0lnkZiYKJvNdsYI9erqaqWmpppU1eAzZMgQXXbZZdq/f7/ZpQwa3d9vvvvmGzlypBITE/n++8CiRYv0hz/8Qe+//76GDx/uPp6amqq2tjadOHHCoz3fd9842+fem+zsbEny6vtOiDkLu92uSZMmqaSkxH3M5XKppKRE06ZNM7GywaWxsVEHDhzQRRddZHYpg0ZWVpZSU1M9vvsNDQ3asWMH3/0A+9vf/qZjx47x/b8AhmFo0aJFeuONN/Tee+8pKyvL4/FJkyYpPDzc4/teXl6uw4cP832/AOf73Hvz8ccfS5JX33cuJ51DYWGh8vLyNHnyZE2dOlXFxcVqampSfn6+2aUNWP/8z/+s3NxcjRgxQkePHtWqVatks9l05513ml3agNLY2OjxfzsHDx7Uxx9/rISEBF188cV64IEH9Pjjj+vSSy9VVlaWVqxYobS0NN16663mFT0AnOtzT0hI0COPPKLbb79dqampOnDggB588EFdcsklmj17tolVh7aCggKtX79eb731lmJjY93jXOLj4xUZGan4+Hh9//vfV2FhoRISEhQXF6f77rtP06ZN09VXX21y9aHrfJ/7gQMHtH79es2ZM0fDhg3TJ598osWLF2vmzJm68sor+/5CFzS3aRB4+umnjYsvvtiw2+3G1KlTjQ8//NDskga0uXPnGhdddJFht9uN9PR0Y+7cucb+/fvNLmvAef/99w1JZ9zy8vIMw+icZr1ixQojJSXFiIiIMG688UajvLzc3KIHgHN97s3NzcasWbOMpKQkIzw83BgxYoRxzz33GFVVVWaXHdJ6+7wlGS+++KK7zalTp4wf/vCHxtChQ42oqCjjtttuM7788kvzih4Azve5Hz582Jg5c6aRkJBgREREGJdcconx4x//2Kivr/fqdSxdLwYAABBSGBMDAABCEiEGAACEJEIMAAAISYQYAAAQkggxAAAgJBFiAABASCLEAACAkESIAQAAIYkQAwAAQhIhBgAAhCRCDAAACEmEGAAAEJL+fxkK0cL00kH+AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "test_multiple_embeding_dimensions(\n", + " model_class=WalsRecommender,\n", + " method=PredictionMethod.DOT,\n", + " out_file=\"wals_model_results_final_dot.csv\",\n", + " epochs=[25],\n", + " c=[0.0001, 0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 0.7],\n", + " regularization_coefficient=[0.0, 0.00001, 0.0001, 0.001, 0.01]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'embedding_dimension': 4, 'regularization': 0.0001, 'learning_rate': 0.0001, 'num_iterations': 76521640, 'precision_at_10': 0.01560204407537528, 'recall_at_10': 0.06050501513663635, 'ndcg_at_10': 0.07683413537968624, 'precision_at_20': 0.011992973490897476, 'recall_at_20': 0.09067910772439934, 'ndcg_at_20': 0.09634989967193944, 'precision_at_50': 0.0079112104758863, 'recall_at_50': 0.14733022592714648, 'ndcg_at_50': 0.12541038111253533, 'precision_at_100': 0.00527946343021399, 'recall_at_100': 0.1978219117723097, 'ndcg_at_100': 0.1456379374612586}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'embedding_dimension': 8, 'regularization': 0.001, 'learning_rate': 0.0001, 'num_iterations': 76521640, 'precision_at_10': 0.012791440434366017, 'recall_at_10': 0.048452527172814557, 'ndcg_at_10': 0.06212604210714955, 'precision_at_20': 0.010012775471095497, 'recall_at_20': 0.07689506441974149, 'ndcg_at_20': 0.07903656541040587, 'precision_at_50': 0.006547428936442032, 'recall_at_50': 0.12561008986844754, 'ndcg_at_50': 0.10273975134585815, 'precision_at_100': 0.004500159693388694, 'recall_at_100': 0.17281461717597588, 'ndcg_at_100': 0.12142186359861892}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'embedding_dimension': 10, 'regularization': 0.001, 'learning_rate': 0.0001, 'num_iterations': 76521640, 'precision_at_10': 0.011992973490897476, 'recall_at_10': 0.04644692238371199, 'ndcg_at_10': 0.05802910032592288, 'precision_at_20': 0.009374001916320665, 'recall_at_20': 0.0721340954731668, 'ndcg_at_20': 0.07385875110489287, 'precision_at_50': 0.006055573299265411, 'recall_at_50': 0.11468008558455581, 'ndcg_at_50': 0.09530259678856781, 'precision_at_100': 0.004227083998722453, 'recall_at_100': 0.16112090162064058, 'ndcg_at_100': 0.1135609038916972}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'embedding_dimension': 16, 'regularization': 0.0001, 'learning_rate': 0.0001, 'num_iterations': 76521640, 'precision_at_10': 0.010923027786649633, 'recall_at_10': 0.04140103153989063, 'ndcg_at_10': 0.05513784313726271, 'precision_at_20': 0.008312040881507506, 'recall_at_20': 0.0647656160113604, 'ndcg_at_20': 0.06839756124757733, 'precision_at_50': 0.005416799744490578, 'recall_at_50': 0.10646787811167806, 'ndcg_at_50': 0.08773154884648639, 'precision_at_100': 0.0039603960396039604, 'recall_at_100': 0.15611050618282554, 'ndcg_at_100': 0.10676608984315174}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n", + "/Users/mjturner/code/technique-inference-engine/models/recommender/bpr_recommender.py:107: RuntimeWarning: invalid value encountered in divide\n", + " data / np.expand_dims(num_items_per_user, axis=1)\n" + ] + } + ], + "source": [ + "test_multiple_embeding_dimensions(\n", + " BPRRecommender,\n", + " out_file=\"bpr_model_results.csv\",\n", + " epochs=[20*training_data.m*training_data.n],\n", + " learning_rate=[0.00001, 0.00005, 0.0001, 0.001],\n", + " regularization=[0., 0.0001, 0.001, 0.01],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Squared Error 0.04852325970978819\n", + "Precision 0.02400990099009901\n", + "Recall 0.1905249816378097\n", + "Normalized Discounted Cumulative Gain 0.1841149999164702\n" + ] + } ], + "source": [ + "embedding_dimension = 10\n", + "k = 20\n", + "best_hyperparameters = {'gravity_coefficient': 0.001, 'regularization_coefficient': 0.5, 'epochs': 1000, 'learning_rate': 100.0}\n", + "\n", + "model = TopItemsRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", + "\n", + "tie = TechniqueInferenceEngine(\n", + " training_data=training_data,\n", + " validation_data=validation_data,\n", + " test_data=test_data,\n", + " model=model,\n", + " prediction_method=PredictionMethod.DOT,\n", + " enterprise_attack_filepath=enterprise_attack_filepath,\n", + ")\n", + "mse = tie.fit()\n", + "print(\"Mean Squared Error\", mse)\n", + "precision = tie.precision(k=k)\n", + "print(\"Precision\", precision)\n", + "recall = tie.recall(k=k)\n", + "print(\"Recall\", recall)\n", + "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", + "print(\"Normalized Discounted Cumulative Gain\", ndcg)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " predictions training_data test_data \\\n", + "T1592.004 0.000000 0.0 0.0 \n", + "T1557.001 0.586885 0.0 0.0 \n", + "T1600 0.291803 0.0 0.0 \n", + "T1647 0.293443 0.0 0.0 \n", + "T1068 0.916393 0.0 0.0 \n", + "... ... ... ... \n", + "T1656 0.149180 0.0 0.0 \n", + "T1557.003 0.147541 0.0 0.0 \n", + "T1499.001 0.145902 0.0 0.0 \n", + "T1027.005 0.708197 0.0 0.0 \n", + "T1059.007 0.896721 0.0 0.0 \n", + "\n", + " technique_name \n", + "T1592.004 Client Configurations \n", + "T1557.001 LLMNR/NBT-NS Poisoning and SMB Relay \n", + "T1600 Weaken Encryption \n", + "T1647 Plist File Modification \n", + "T1068 Exploitation for Privilege Escalation \n", + "... ... \n", + "T1656 Impersonation \n", + "T1557.003 DHCP Spoofing \n", + "T1499.001 OS Exhaustion Flood \n", + "T1027.005 Indicator Removal from Tools \n", + "T1059.007 JavaScript \n", + "\n", + "[611 rows x 4 columns]\n" + ] + } + ], + "source": [ + "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", + "print(new_report_predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "metadata": {} + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Squared Error 8.200210708280245\n", + "Precision 0.0014993585631815267\n", + "Recall 0.018532725776008053\n", + "Normalized Discounted Cumulative Gain 0.00973503731752359\n" + ] + } + ], + "source": [ + "embedding_dimension = 10\n", + "k = 20\n", + "best_hyperparameters = {'gravity_coefficient': 0.001, 'regularization_coefficient': 0.001, 'epochs': 10, 'learning_rate': 1.0}\n", + "\n", + "model = FactorizationRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", + "\n", + "tie = TechniqueInferenceEngine(\n", + " training_data=training_data,\n", + " validation_data=validation_data,\n", + " test_data=test_data,\n", + " model=model,\n", + " prediction_method=PredictionMethod.DOT,\n", + " enterprise_attack_filepath=enterprise_attack_filepath,\n", + ")\n", + "mse = tie.fit(**best_hyperparameters)\n", + "# mse = tie.fit_with_validation(\n", + "# learning_rate=[0.001, 0.01, 0.1, 1.0, 10., 20., 50., 100.],\n", + "# epochs=[1000],\n", + "# regularization_coefficient=[0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5],\n", + "# gravity_coefficient=[0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5],\n", + "# )\n", + "print(\"Mean Squared Error\", mse)\n", + "precision = tie.precision(k=k)\n", + "print(\"Precision\", precision)\n", + "recall = tie.recall(k=k)\n", + "print(\"Recall\", recall)\n", + "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", + "print(\"Normalized Discounted Cumulative Gain\", ndcg)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " predictions training_data test_data \\\n", + "T1204.002 -44127.687500 0.0 0.0 \n", + "T1592.001 -18382.687500 0.0 0.0 \n", + "T1547.012 43962.042969 0.0 0.0 \n", + "T1561.002 46403.843750 0.0 0.0 \n", + "T1110.004 67053.593750 0.0 0.0 \n", + "... ... ... ... \n", + "T1612 34722.996094 0.0 0.0 \n", + "T1588.006 -39228.710938 0.0 0.0 \n", + "T1003 -42823.429688 0.0 0.0 \n", + "T1069.002 -36731.289062 0.0 0.0 \n", + "T1070.005 12406.809570 0.0 0.0 \n", + "\n", + " technique_name \n", + "T1204.002 Malicious File \n", + "T1592.001 Hardware \n", + "T1547.012 Print Processors \n", + "T1561.002 Disk Structure Wipe \n", + "T1110.004 Credential Stuffing \n", + "... ... \n", + "T1612 Build Image on Host \n", + "T1588.006 Vulnerabilities \n", + "T1003 OS Credential Dumping \n", + "T1069.002 Domain Groups \n", + "T1070.005 Network Share Connection Removal \n", + "\n", + "[611 rows x 4 columns]\n" + ] + } + ], + "source": [ + "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", + "print(new_report_predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.8" - } + "metadata": {} + }, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[20], line 22\u001b[0m\n\u001b[1;32m 9\u001b[0m tie \u001b[38;5;241m=\u001b[39m TechniqueInferenceEngine(\n\u001b[1;32m 10\u001b[0m training_data\u001b[38;5;241m=\u001b[39mtraining_data,\n\u001b[1;32m 11\u001b[0m validation_data\u001b[38;5;241m=\u001b[39mvalidation_data,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m enterprise_attack_filepath\u001b[38;5;241m=\u001b[39menterprise_attack_filepath,\n\u001b[1;32m 16\u001b[0m )\n\u001b[1;32m 17\u001b[0m \u001b[38;5;66;03m# mse = tie.fit_with_validation(\u001b[39;00m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;66;03m# learning_rate=[0.001, 0.005, 0.01, 0.02, 0.05],\u001b[39;00m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;66;03m# num_iterations=[500 * 512],\u001b[39;00m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# regularization_coefficient=[0, 0.0001, 0.001, 0.01],\u001b[39;00m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[0;32m---> 22\u001b[0m mse \u001b[38;5;241m=\u001b[39m \u001b[43mtie\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mbest_hyperparameters\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMean Squared Error\u001b[39m\u001b[38;5;124m\"\u001b[39m, mse)\n\u001b[1;32m 24\u001b[0m precision \u001b[38;5;241m=\u001b[39m tie\u001b[38;5;241m.\u001b[39mprecision(k\u001b[38;5;241m=\u001b[39mk)\n", + "File \u001b[0;32m~/code/technique-inference-engine/models/tie.py:122\u001b[0m, in \u001b[0;36mTechniqueInferenceEngine.fit\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Fit the model to the data.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;124;03m The MSE of the prediction matrix, as determined by the test set.\u001b[39;00m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# train\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_training_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_sparse_tensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 124\u001b[0m mean_squared_error \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_model\u001b[38;5;241m.\u001b[39mevaluate(\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_test_data\u001b[38;5;241m.\u001b[39mto_sparse_tensor(), method\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prediction_method\n\u001b[1;32m 126\u001b[0m )\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_checkrep()\n", + "File \u001b[0;32m~/code/technique-inference-engine/models/recommender/bpr_recommender.py:244\u001b[0m, in \u001b[0;36mBPRRecommender.fit\u001b[0;34m(self, data, learning_rate, epochs, regularization_coefficient)\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_U[u, :] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m learning_rate \u001b[38;5;241m*\u001b[39m (\n\u001b[1;32m 239\u001b[0m sigmoid_derivative \u001b[38;5;241m*\u001b[39m d_w \u001b[38;5;241m-\u001b[39m (regularization_coefficient \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_U[u, :])\n\u001b[1;32m 240\u001b[0m )\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V[i, :] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m learning_rate \u001b[38;5;241m*\u001b[39m (\n\u001b[1;32m 242\u001b[0m sigmoid_derivative \u001b[38;5;241m*\u001b[39m d_hi \u001b[38;5;241m-\u001b[39m (regularization_coefficient \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V[i, :])\n\u001b[1;32m 243\u001b[0m )\n\u001b[0;32m--> 244\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V[j, :] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m learning_rate \u001b[38;5;241m*\u001b[39m (\n\u001b[1;32m 245\u001b[0m sigmoid_derivative \u001b[38;5;241m*\u001b[39m d_hj \u001b[38;5;241m-\u001b[39m (regularization_coefficient \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_V[j, :])\n\u001b[1;32m 246\u001b[0m )\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# hyperparameters\n", + "embedding_dimension = 4\n", + "k = 20\n", + "best_hyperparameters = {'regularization_coefficient': 0.0001, 'epochs': 2, 'learning_rate': 0.0001}\n", + "# best_hyperparameters[\"epochs\"] = 20*training_data.m*training_data.n\n", + "\n", + "model = BPRRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", + "\n", + "tie = TechniqueInferenceEngine(\n", + " training_data=training_data,\n", + " validation_data=validation_data,\n", + " test_data=test_data,\n", + " model=model,\n", + " prediction_method=PredictionMethod.COSINE,\n", + " enterprise_attack_filepath=enterprise_attack_filepath,\n", + ")\n", + "# mse = tie.fit_with_validation(\n", + "# learning_rate=[0.001, 0.005, 0.01, 0.02, 0.05],\n", + "# epochs=[500 * 512],\n", + "# regularization_coefficient=[0, 0.0001, 0.001, 0.01],\n", + "# )\n", + "mse = tie.fit(**best_hyperparameters)\n", + "print(\"Mean Squared Error\", mse)\n", + "precision = tie.precision(k=k)\n", + "print(\"Precision\", precision)\n", + "recall = tie.recall(k=20)\n", + "print(\"Recall\", recall)\n", + "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", + "print(\"Normalized Discounted Cumulative Gain\", ndcg)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " predictions training_data test_data \\\n", + "T1558.002 0.555484 0.0 0.0 \n", + "T1132.002 0.494036 0.0 0.0 \n", + "T1211 -0.492932 0.0 0.0 \n", + "T1601.002 -0.185998 0.0 0.0 \n", + "T1596 -0.025210 0.0 0.0 \n", + "... ... ... ... \n", + "T1546.011 -0.217676 0.0 0.0 \n", + "T1535 0.464719 0.0 0.0 \n", + "T1071 0.199836 0.0 0.0 \n", + "T1587 0.658772 0.0 0.0 \n", + "T1499.002 0.464182 0.0 0.0 \n", + "\n", + " technique_name \n", + "T1558.002 Silver Ticket \n", + "T1132.002 Non-Standard Encoding \n", + "T1211 Exploitation for Defense Evasion \n", + "T1601.002 Downgrade System Image \n", + "T1596 Search Open Technical Databases \n", + "... ... \n", + "T1546.011 Application Shimming \n", + "T1535 Unused/Unsupported Cloud Regions \n", + "T1071 Application Layer Protocol \n", + "T1587 Develop Capabilities \n", + "T1499.002 Service Exhaustion Flood \n", + "\n", + "[611 rows x 4 columns]\n" + ] + } + ], + "source": [ + "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", + "print(new_report_predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 20/20 [00:00<00:00, 261.94it/s, train_auc=51.95%, skipped=9.34%]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Squared Error 0.8416396221072098\n", + "Precision 0.008555163566388711\n", + "Recall 0.1188407602595878\n", + "Normalized Discounted Cumulative Gain 0.05200668172568438\n" + ] + } + ], + "source": [ + "# hyperparameters\n", + "embedding_dimension = 10\n", + "k = 20\n", + "best_hyperparameters = {'regularization_coefficient': 0.0001, \"epochs\": 20, 'learning_rate': 0.005}\n", + "\n", + "model = ImplicitBPRRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", + "\n", + "tie = TechniqueInferenceEngine(\n", + " training_data=training_data,\n", + " validation_data=validation_data,\n", + " test_data=test_data,\n", + " model=model,\n", + " prediction_method=PredictionMethod.COSINE,\n", + " enterprise_attack_filepath=enterprise_attack_filepath,\n", + ")\n", + "# mse = tie.fit_with_validation(\n", + "# learning_rate=[0.001, 0.005, 0.01, 0.02, 0.05],\n", + "# epochs=[math.floor(500 * 512 / training_data.to_numpy().sum())],\n", + "# regularization=[0, 0.0001, 0.001, 0.01],\n", + "# )\n", + "mse = tie.fit(**best_hyperparameters)\n", + "print(\"Mean Squared Error\", mse)\n", + "precision = tie.precision(k=k)\n", + "print(\"Precision\", precision)\n", + "recall = tie.recall(k=k)\n", + "print(\"Recall\", recall)\n", + "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", + "print(\"Normalized Discounted Cumulative Gain\", ndcg)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 20/20 [00:00<00:00, 51.99it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Squared Error 0.5028757316475128\n", + "Precision 0.008659397049390635\n", + "Recall 0.11683780275644119\n", + "Normalized Discounted Cumulative Gain 0.06099917717388372\n" + ] + } + ], + "source": [ + "# hyperparameters\n", + "embedding_dimension = 10\n", + "k = 20\n", + "\n", + "best_hyperparameters = {'regularization_coefficient': 0.05, 'c': 0.5, 'epochs': 20}\n", + "\n", + "model = ImplicitWalsRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", + "\n", + "tie = TechniqueInferenceEngine(\n", + " training_data=training_data,\n", + " validation_data=validation_data,\n", + " test_data=test_data,\n", + " model=model,\n", + " prediction_method=PredictionMethod.COSINE,\n", + " enterprise_attack_filepath=enterprise_attack_filepath,\n", + ")\n", + "mse = tie.fit(**best_hyperparameters)\n", + "# mse = tie.fit_with_validation(\n", + "# epochs=[20],\n", + "# c=[0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 0.7],\n", + "# regularization_coefficient=[0.001, 0.005, 0.01, 0.02, 0.05]\n", + "# )\n", + "print(\"Mean Squared Error\", mse)\n", + "precision = tie.precision(k=k)\n", + "print(\"Precision\", precision)\n", + "recall = tie.recall(k=k)\n", + "print(\"Recall\", recall)\n", + "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", + "print(\"Normalized Discounted Cumulative Gain\", ndcg)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " predictions training_data test_data \\\n", + "T1204.002 0.190945 0.0 0.0 \n", + "T1592.001 -0.088588 0.0 0.0 \n", + "T1547.012 0.595495 0.0 0.0 \n", + "T1561.002 0.591824 0.0 0.0 \n", + "T1110.004 -0.004705 0.0 0.0 \n", + "... ... ... ... \n", + "T1612 0.140598 0.0 0.0 \n", + "T1588.006 0.665867 0.0 0.0 \n", + "T1003 -0.068861 0.0 0.0 \n", + "T1069.002 0.364830 0.0 0.0 \n", + "T1070.005 0.198465 0.0 0.0 \n", + "\n", + " technique_name \n", + "T1204.002 Malicious File \n", + "T1592.001 Hardware \n", + "T1547.012 Print Processors \n", + "T1561.002 Disk Structure Wipe \n", + "T1110.004 Credential Stuffing \n", + "... ... \n", + "T1612 Build Image on Host \n", + "T1588.006 Vulnerabilities \n", + "T1003 OS Credential Dumping \n", + "T1069.002 Domain Groups \n", + "T1070.005 Network Share Connection Removal \n", + "\n", + "[611 rows x 4 columns]\n" + ] + } + ], + "source": [ + "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", + "print(new_report_predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Squared Error 0.22009765631062408\n", + "Precision 0.0081783194355356\n", + "Recall 0.11611657887235578\n", + "Normalized Discounted Cumulative Gain 0.05730162211191529\n" + ] + } + ], + "source": [ + "# hyperparameters\n", + "embedding_dimension = 4\n", + "k = 20\n", + "\n", + "# best_hyperparameters = {'regularization_coefficient': 0.1, 'c': 0.5, 'epochs': 20}\n", + "# best_hyperparameters = {'regularization_coefficient': 0.0001, 'c': 0.3, 'epochs': 100}\n", + "best_hyperparameters = {'regularization_coefficient': 0.001, 'c': 0.1, \"epochs\": 20}\n", + "model = WalsRecommender(m=training_data.m, n=training_data.n, k=embedding_dimension)\n", + "\n", + "tie = TechniqueInferenceEngine(\n", + " training_data=training_data,\n", + " validation_data=validation_data,\n", + " test_data=test_data,\n", + " model=model,\n", + " prediction_method=PredictionMethod.COSINE,\n", + " enterprise_attack_filepath=enterprise_attack_filepath,\n", + ")\n", + "mse = tie.fit(**best_hyperparameters)\n", + "# mse = tie.fit_with_validation(\n", + "# epochs=[20],\n", + "# c=[0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 0.7],\n", + "# regularization_coefficient=[0.001, 0.005, 0.01, 0.02, 0.05]\n", + "# )\n", + "print(\"Mean Squared Error\", mse)\n", + "precision = tie.precision(k=k)\n", + "print(\"Precision\", precision)\n", + "recall = tie.recall(k=k)\n", + "print(\"Recall\", recall)\n", + "ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", + "print(\"Normalized Discounted Cumulative Gain\", ndcg)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " predictions training_data test_data \\\n", + "T1204.002 0.746440 0.0 0.0 \n", + "T1592.001 -0.030477 0.0 0.0 \n", + "T1547.012 0.976308 0.0 0.0 \n", + "T1561.002 0.676964 0.0 0.0 \n", + "T1110.004 0.389903 0.0 0.0 \n", + "... ... ... ... \n", + "T1612 0.093252 0.0 0.0 \n", + "T1588.006 0.731710 0.0 0.0 \n", + "T1003 -0.059756 0.0 0.0 \n", + "T1069.002 0.502788 0.0 0.0 \n", + "T1070.005 0.731683 0.0 0.0 \n", + "\n", + " technique_name \n", + "T1204.002 Malicious File \n", + "T1592.001 Hardware \n", + "T1547.012 Print Processors \n", + "T1561.002 Disk Structure Wipe \n", + "T1110.004 Credential Stuffing \n", + "... ... \n", + "T1612 Build Image on Host \n", + "T1588.006 Vulnerabilities \n", + "T1003 OS Credential Dumping \n", + "T1069.002 Domain Groups \n", + "T1070.005 Network Share Connection Removal \n", + "\n", + "[611 rows x 4 columns]\n" + ] + } + ], + "source": [ + "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, **best_hyperparameters)\n", + "print(new_report_predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0.00000000e+00 3.10272485e-01 -5.06079257e-01 ... -2.96728946e-02\n", + " 1.03696346e-01 -4.59698914e-03]\n", + " [ 1.00000000e+00 2.34661356e-01 -3.45782340e-01 ... -8.81108269e-02\n", + " 7.32592419e-02 2.15996355e-01]\n", + " [ 2.00000000e+00 4.81936446e-08 6.88791080e-10 ... 3.42836657e-08\n", + " -7.81015075e-09 -4.81467985e-08]\n", + " ...\n", + " [ 6.25900000e+03 3.53572398e-01 -4.88738894e-01 ... 2.14364976e-02\n", + " 1.39371127e-01 1.01979606e-01]\n", + " [ 6.26000000e+03 -5.61978075e-10 -3.23640350e-08 ... -7.43976116e-08\n", + " 8.64229861e-08 8.77083117e-09]\n", + " [ 6.26100000e+03 -6.00948269e-08 -1.48262300e-08 ... 3.57593208e-08\n", + " -9.07301079e-09 2.34565452e-08]]\n", + "(6262, 11)\n", + "(611, 11)\n" + ] + } + ], + "source": [ + "# TEMPORARY - GET EMBEDDINGS FOR FE\n", + "U = tie.get_U() # entity (report) ids\n", + "V = tie.get_V() # item (technique) embeddings\n", + "\n", + "U_with_index = np.hstack((np.expand_dims(training_data.report_ids, axis=1), U))\n", + "V_with_index = np.hstack((np.expand_dims(training_data.technique_ids, axis=1), V))\n", + "\n", + "print(U_with_index.shape)\n", + "print(V_with_index.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "recalls [0.3087573406401257, 0.3257859760812914, 0.3458226653670225, 0.3745375233589626, 0.3949941633991787, 0.4110964558541681, 0.4219309973671012, 0.4317147374599419, 0.44042389049516534, 0.44770298702149547, 0.45387773687027355, 0.45797033624322875, 0.46304119765720547, 0.4679221448741617, 0.47182791494477766, 0.47617333685846847, 0.4791363202775644, 0.48264413250862803, 0.48577394889957126, 0.4883141430997783, 0.4904385260449852]\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "k_values = [1, 5] + list(range(10, 200, 10))\n", + "recalls = []\n", + "ndcgs = []\n", + "for k in k_values:\n", + " # print(\"Mean Squared Error\", mse)\n", + " precision = tie.precision(k=k)\n", + " # print(\"Precision\", precision)\n", + " recall = tie.recall(k=k)\n", + " recalls.append(recall)\n", + " # print(\"Recall\", recall)\n", + " ndcg = tie.normalized_discounted_cumulative_gain(k=k)\n", + " ndcgs.append(ndcg)\n", + " # print(\"Normalized Discounted Cumulative Gain\", ndcg)\n", + "\n", + "print(\"recalls\", recalls)\n", + "\n", + "plt.xlabel(\"k\")\n", + "plt.ylabel(\"Normalized discounted cumulative gain (NDCG)\")\n", + "plt.title(\"NDCG@k for various values of k\")\n", + "plt.plot(k_values, ndcgs)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "FactorizationRecommender.predict_new_entity() got an unexpected keyword argument 'c'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 6\u001b[0m\n\u001b[1;32m 1\u001b[0m oilrig_techniques \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1047\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1059.005\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1124\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1082\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1497.001\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1053.005\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1027\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1105\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1070.004\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1059.003\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT1071.001\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5\u001b[0m }\n\u001b[0;32m----> 6\u001b[0m new_report_predictions \u001b[38;5;241m=\u001b[39m \u001b[43mtie\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict_for_new_report\u001b[49m\u001b[43m(\u001b[49m\u001b[43moilrig_techniques\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregularization_coefficient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.05\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlearning_rate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(new_report_predictions\u001b[38;5;241m.\u001b[39msort_values(by\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredictions\u001b[39m\u001b[38;5;124m\"\u001b[39m, ascending\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m10\u001b[39m))\n", + "File \u001b[0;32m~/code/technique-inference-engine/models/tie.py:338\u001b[0m, in \u001b[0;36mTechniqueInferenceEngine.predict_for_new_report\u001b[0;34m(self, techniques, **kwargs)\u001b[0m\n\u001b[1;32m 332\u001b[0m n \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_training_data\u001b[38;5;241m.\u001b[39mn\n\u001b[1;32m 334\u001b[0m technique_tensor \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mSparseTensor(\n\u001b[1;32m 335\u001b[0m indices\u001b[38;5;241m=\u001b[39mtechnique_indices_2d, values\u001b[38;5;241m=\u001b[39mvalues, dense_shape\u001b[38;5;241m=\u001b[39m(n,)\n\u001b[1;32m 336\u001b[0m )\n\u001b[0;32m--> 338\u001b[0m predictions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict_new_entity\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtechnique_tensor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_prediction_method\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 340\u001b[0m training_indices_dense \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mzeros(\u001b[38;5;28mlen\u001b[39m(predictions))\n\u001b[1;32m 341\u001b[0m training_indices_dense[technique_indices] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n", + "\u001b[0;31mTypeError\u001b[0m: FactorizationRecommender.predict_new_entity() got an unexpected keyword argument 'c'" + ] + } + ], + "source": [ + "oilrig_techniques = {\n", + " \"T1047\", \"T1059.005\", \"T1124\", \"T1082\",\n", + " \"T1497.001\", \"T1053.005\", \"T1027\", \"T1105\",\n", + " \"T1070.004\", \"T1059.003\", \"T1071.001\"\n", + "}\n", + "new_report_predictions = tie.predict_for_new_report(oilrig_techniques, c=0.5, regularization_coefficient=0.05, learning_rate=0.01, epochs=100)\n", + "\n", + "print(new_report_predictions.sort_values(by=\"predictions\", ascending=False).head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[t-SNE] Computing 46 nearest neighbors...\n", + "[t-SNE] Indexed 6262 samples in 0.003s...\n", + "[t-SNE] Computed neighbors for 6262 samples in 0.226s...\n", + "[t-SNE] Computed conditional probabilities for sample 1000 / 6262\n", + "[t-SNE] Computed conditional probabilities for sample 2000 / 6262\n", + "[t-SNE] Computed conditional probabilities for sample 3000 / 6262\n", + "[t-SNE] Computed conditional probabilities for sample 4000 / 6262\n", + "[t-SNE] Computed conditional probabilities for sample 5000 / 6262\n", + "[t-SNE] Computed conditional probabilities for sample 6000 / 6262\n", + "[t-SNE] Computed conditional probabilities for sample 6262 / 6262\n", + "[t-SNE] Mean sigma: 0.000000\n", + "[t-SNE] KL divergence after 250 iterations with early exaggeration: 95.187531\n", + "[t-SNE] KL divergence after 10000 iterations: 0.847868\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def make_tsne_embeddings(embeddings: np.ndarray) -> tuple[np.array, np.array]:\n", + " \"\"\"Create 2D representation of embeddings using t-SNE.\n", + "\n", + " Args:\n", + " embeddings: an mxk array of m embeddings in k-dimensional space.\n", + "\n", + " Returns:\n", + " A tuple of the form (x_1, x_2) where x_1 and x_2 are length m\n", + " such that (x_1[i], x_2[i]) is the 2-dimensional point cotnaining the 2-dimensional\n", + " repsresentation for embeddings[i, :].\n", + " \"\"\"\n", + " tsne = sklearn.manifold.TSNE(\n", + " n_components=2,\n", + " perplexity=15,\n", + " learning_rate=\"auto\",\n", + " # metric='cosine',\n", + " # early_exaggeration=10.0,\n", + " init='pca',\n", + " verbose=True,\n", + " n_iter=10000,\n", + " )\n", + "\n", + " V_proj = tsne.fit_transform(embeddings)\n", + " x = V_proj[:, 0]\n", + " y = V_proj[:, 1]\n", + "\n", + " return x, y\n", + "\n", + "U = tie.get_U()\n", + "x_1, x_2 = make_tsne_embeddings(U)\n", + "\n", + "plt.scatter(x_1, x_2, s=0.5)\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 }, - "nbformat": 4, - "nbformat_minor": 2 + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 } diff --git a/src/tie/engine.py b/src/tie/engine.py index ed1042b..0a22355 100644 --- a/src/tie/engine.py +++ b/src/tie/engine.py @@ -111,7 +111,11 @@ def fit(self, **kwargs) -> float: The MSE of the prediction matrix, as determined by the test set. """ # train - self._model.fit(self._training_data.to_sparse_tensor(), **kwargs) + self._model.fit( + self._training_data.to_sparse_tensor(), + self._validation_data.to_sparse_tensor(), + **kwargs, + ) mean_squared_error = self._model.evaluate( self._test_data.to_sparse_tensor(), method=self._prediction_method diff --git a/src/tie/recommender/wals_recommender.py b/src/tie/recommender/wals_recommender.py index 8e5d3a5..953579b 100644 --- a/src/tie/recommender/wals_recommender.py +++ b/src/tie/recommender/wals_recommender.py @@ -4,7 +4,7 @@ from tie.constants import PredictionMethod from tie.utils import calculate_predicted_matrix - +import matplotlib.pyplot as plt from .recommender import Recommender @@ -179,6 +179,7 @@ def V_T_C_I_V(V, c_array): def fit( self, data: tf.SparseTensor, + test_data: tf.SparseTensor, epochs: int, c: float = 0.024, regularization_coefficient: float = 0.01, @@ -209,6 +210,8 @@ def fit( alpha = (1 / c) - 1 + losses = [] + for _ in range(epochs): # step 1: update U @@ -219,6 +222,10 @@ def fit( # step 2: update V self._V = self._update_factor(self._U, P, alpha, regularization_coefficient) + losses.append(self.evaluate(test_data)) + + plt.plot(list(range(len(losses))), losses) + plt.show() self._checkrep() def evaluate(