remove temp files while running saving space

fa1c4 · Jan 3, 2022 · fb5ff7a · fb5ff7a
1 parent be221bf
commit fb5ff7a
Show file tree

Hide file tree

Showing 19 changed files with 32,860 additions and 125,618 deletions.
diff --git a/EDA/splitdata/spliting.ipynb b/EDA/splitdata/spliting.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {
     "collapsed": true
    },
@@ -14,20 +14,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 3,
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "----- file created -----\n",
-      "../../data/shards_15_ordered\n"
+      "----- The folder exists! -----\n",
+      "../../data/shards_5_shuffled\n"
      ]
     }
    ],
    "source": [
-    "shards = 15\n",
-    "shuffle = False\n",
+    "shards = 5\n",
+    "shuffle = True\n",
     "shuflled_ordered_str = 'shuffled' if shuffle else 'ordered'\n",
     "\n",
     "def mkdir(path):\n",
@@ -54,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 4,
    "outputs": [],
    "source": [
     "def split(dataset_file, shards=5):\n",
@@ -86,28 +86,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 5,
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "92755\n",
-      "../../data/shards_15_ordered/dataset_sharded0.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded1.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded2.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded3.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded4.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded5.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded6.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded7.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded8.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded9.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded10.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded11.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded12.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded13.csv\n",
-      "../../data/shards_15_ordered/dataset_sharded14.csv\n",
+      "92732\n",
+      "../../data/shards_5_shuffled/dataset_sharded0.csv\n",
+      "../../data/shards_5_shuffled/dataset_sharded1.csv\n",
+      "../../data/shards_5_shuffled/dataset_sharded2.csv\n",
+      "../../data/shards_5_shuffled/dataset_sharded3.csv\n",
+      "../../data/shards_5_shuffled/dataset_sharded4.csv\n",
       "----- done data spliting -----\n"
      ]
     }
@@ -176,18 +166,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "6 <class 'int'>\n",
-      "6\n",
-      "5\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "outputs": [],
    "source": [
     "test = 5.4999\n",
     "print(round(test), type(round(test)))\n",

diff --git a/EDA/splitdata/test_dataset_generating.ipynb b/EDA/splitdata/test_dataset_generating.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
@@ -15,7 +15,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "outputs": [],
    "source": [
     "# sorting shuffled train dataset to ordered dataset\n",
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "outputs": [],
    "source": [
     "def testdata_generating(dataset_file, save_file):\n",
@@ -123,25 +123,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "100000\n",
-      "train dataset has 92731 length\n",
-      "test dataset has 7269 length\n",
-      "----- dividing dataset done! -----\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "outputs": [],
    "source": [
     "'''\n",
     "generating test dataset and divide it from whole train dataset\n",
     "'''\n",
     "\n",
-    "shuffle = True\n",
+    "shuffle = False\n",
     "shuflled_ordered_str = 'shuffled' if shuffle else 'ordered'\n",
     "if shuffle: # shuffled\n",
     "    dataset_file = open('../../ml-100k/u.data', \"rb\")\n",
@@ -186,17 +175,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "100000\n",
-      "----- ordered test data generating done -----\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "outputs": [],
    "source": [
     "'''\n",
     "ordered score test data generating\n",