Skip to content

Commit

Permalink
remove temp files while running saving space
Browse files Browse the repository at this point in the history
  • Loading branch information
fa1c4 committed Jan 3, 2022
1 parent be221bf commit fb5ff7a
Show file tree
Hide file tree
Showing 19 changed files with 32,860 additions and 125,618 deletions.
52 changes: 16 additions & 36 deletions EDA/splitdata/spliting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {
"collapsed": true
},
Expand All @@ -14,20 +14,20 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 3,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"----- file created -----\n",
"../../data/shards_15_ordered\n"
"----- The folder exists! -----\n",
"../../data/shards_5_shuffled\n"
]
}
],
"source": [
"shards = 15\n",
"shuffle = False\n",
"shards = 5\n",
"shuffle = True\n",
"shuflled_ordered_str = 'shuffled' if shuffle else 'ordered'\n",
"\n",
"def mkdir(path):\n",
Expand All @@ -54,7 +54,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 4,
"outputs": [],
"source": [
"def split(dataset_file, shards=5):\n",
Expand Down Expand Up @@ -86,28 +86,18 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"92755\n",
"../../data/shards_15_ordered/dataset_sharded0.csv\n",
"../../data/shards_15_ordered/dataset_sharded1.csv\n",
"../../data/shards_15_ordered/dataset_sharded2.csv\n",
"../../data/shards_15_ordered/dataset_sharded3.csv\n",
"../../data/shards_15_ordered/dataset_sharded4.csv\n",
"../../data/shards_15_ordered/dataset_sharded5.csv\n",
"../../data/shards_15_ordered/dataset_sharded6.csv\n",
"../../data/shards_15_ordered/dataset_sharded7.csv\n",
"../../data/shards_15_ordered/dataset_sharded8.csv\n",
"../../data/shards_15_ordered/dataset_sharded9.csv\n",
"../../data/shards_15_ordered/dataset_sharded10.csv\n",
"../../data/shards_15_ordered/dataset_sharded11.csv\n",
"../../data/shards_15_ordered/dataset_sharded12.csv\n",
"../../data/shards_15_ordered/dataset_sharded13.csv\n",
"../../data/shards_15_ordered/dataset_sharded14.csv\n",
"92732\n",
"../../data/shards_5_shuffled/dataset_sharded0.csv\n",
"../../data/shards_5_shuffled/dataset_sharded1.csv\n",
"../../data/shards_5_shuffled/dataset_sharded2.csv\n",
"../../data/shards_5_shuffled/dataset_sharded3.csv\n",
"../../data/shards_5_shuffled/dataset_sharded4.csv\n",
"----- done data spliting -----\n"
]
}
Expand Down Expand Up @@ -176,18 +166,8 @@
},
{
"cell_type": "code",
"execution_count": 38,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6 <class 'int'>\n",
"6\n",
"5\n"
]
}
],
"execution_count": null,
"outputs": [],
"source": [
"test = 5.4999\n",
"print(round(test), type(round(test)))\n",
Expand Down
36 changes: 8 additions & 28 deletions EDA/splitdata/test_dataset_generating.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {
"collapsed": true
},
Expand All @@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"outputs": [],
"source": [
"# sorting shuffled train dataset to ordered dataset\n",
Expand All @@ -32,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"outputs": [],
"source": [
"def testdata_generating(dataset_file, save_file):\n",
Expand Down Expand Up @@ -123,25 +123,14 @@
},
{
"cell_type": "code",
"execution_count": 19,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100000\n",
"train dataset has 92731 length\n",
"test dataset has 7269 length\n",
"----- dividing dataset done! -----\n"
]
}
],
"execution_count": null,
"outputs": [],
"source": [
"'''\n",
"generating test dataset and divide it from whole train dataset\n",
"'''\n",
"\n",
"shuffle = True\n",
"shuffle = False\n",
"shuflled_ordered_str = 'shuffled' if shuffle else 'ordered'\n",
"if shuffle: # shuffled\n",
" dataset_file = open('../../ml-100k/u.data', \"rb\")\n",
Expand Down Expand Up @@ -186,17 +175,8 @@
},
{
"cell_type": "code",
"execution_count": 35,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100000\n",
"----- ordered test data generating done -----\n"
]
}
],
"execution_count": null,
"outputs": [],
"source": [
"'''\n",
"ordered score test data generating\n",
Expand Down
Loading

0 comments on commit fb5ff7a

Please sign in to comment.