Merge branch 'dahai-rebuttal' into 'eurosys22-final-version'

Fig14a & Fig14b See merge request gnn/samgraph!93
SJTU-IPADS · Mar 11, 2022 · f102765 · f102765
2 parents de73769 + 22eef33
commit f102765
Show file tree

Hide file tree

Showing 9 changed files with 318 additions and 101 deletions.
diff --git a/exp/README.md b/exp/README.md
@@ -91,8 +91,8 @@ SGNN                        3.26     5.96   4.14  13.37    # logs_sgnn/test3.log
 |   Fig11c   |       94 tests       |      120 mins     |
 |    Fig12   |       33 tests       |      30 mins      |
 |    Fig13   |       33 tests       |      30 mins      |
-|   Fig14a   |       26 tests       |      25 mins      |
-|   Fig14b   |       33 tests       |      30 mins      |
+|   Fig14a   |       34 tests       |      32 mins      |
+|   Fig14b   |       34 tests       |      23 mins      |
 |    Fig15   |       36 tests       |      40 mins      |
 |   Fig16a   |        3 tests       |      25 mins      |
 |   Fig17a   |       14 tests       |      20 mins      |

diff --git a/exp/fig14a/README.md b/exp/fig14a/README.md
@@ -1,6 +1,8 @@
 # Figure 14a:  GCN Scalability Test
 
-The goal of this experiment is to show the scalability performance of DGL and FGNN on GCN model.
+The goal of this experiment is to show the scalability performance of DGL, SGNN and FGNN on GCN model.
+
+Dataset: obg-papers
 
 - `run.py` is the runner script.
 - `logtable_def.py` defines log parsing rules.
@@ -11,8 +13,9 @@ The goal of this experiment is to show the scalability performance of DGL and FG
 
 - Paper's configurations: **8x16GB** NVIDIA V100 GPUs, **2x24** cores Intel 8163 CPU
 - For other hardware configurations, you may need to modify the ①Number of GPU. ②Number of CPU threads ③Number of vertex (in percentage, 0<=pct. <=1) to be cached.
-  - **DGL:** Modify `L64-L65(#GPU)` in `run.py`.
-  - **FGNN:**  Modify  `L106(#CPU threads), L116-L151(#GPU, #Cache percentage)` in `run.py`.
+  - **DGL:** Modify `L66-L67(#GPU)` in `run.py`.
+  - **FGNN:**  Modify  `L108(#CPU threads), L118-L153(#GPU, #Cache percentage)` in `run.py`.
+  - **SGNN:**  Modify  `L187(#Cache percentage), L190(#GPU)` in `run.py`.
 
 
 
@@ -71,19 +74,16 @@ output_2022-01-29_20-10-39
 
 ```sh
 > cat output_2022-01-29_20-10-39/fig14a.res
-"GPUs"  "DGL"   "1S"    "2S"    "3S"
-1       18.51   -       -       -
-2       9.79    4.11    -       -
-3       7.19    2.14    4.08    -
-4       6.00    1.49    2.19    4.14
-5       5.28    1.19    1.46    2.13
-6       4.79    1.06    1.11    1.45
-7       4.48    1.05    0.93    1.11
-8       4.03    1.04    0.81    0.91
+"GPUs"  "DGL"   "SGNN"  "1S"    "2S"    "3S"
+1       18.45   10.02   -       -       -
+2       9.85    6.94    4.10    -       -
+3       7.15    5.15    2.16    4.20    -
+4       6.01    4.18    1.48    2.15    4.15
+...
 ```
 
 
 
 
 
-## FAQ
+## FAQ
diff --git a/exp/fig14a/logtable_def.py b/exp/fig14a/logtable_def.py
@@ -144,3 +144,54 @@ def get_fgnn_logtable():
         num_sample_worker=3,
         num_train_worker=5
     ).create()
+
+
+def get_sgnn_logtable():
+    return LogTable(
+        num_row=8,
+        num_col=4
+    ).update_col_definition(
+        col_id=0,
+        definition='epoch_time:sample_total'
+    ).update_col_definition(
+        col_id=1,
+        definition='epoch_time:copy_time'
+    ).update_col_definition(
+        col_id=2,
+        definition='epoch_time:train_total'
+    ).update_col_definition(
+        col_id=3,
+        definition='epoch_time:total'
+    ).update_row_definition(
+        row_id=0,
+        col_range=[0, 3],
+        num_worker=1,
+    ).update_row_definition(
+        row_id=1,
+        col_range=[0, 3],
+        num_worker=2,
+    ).update_row_definition(
+        row_id=2,
+        col_range=[0, 3],
+        num_worker=3,
+    ).update_row_definition(
+        row_id=3,
+        col_range=[0, 3],
+        num_worker=4,
+    ).update_row_definition(
+        row_id=4,
+        col_range=[0, 3],
+        num_worker=5,
+    ).update_row_definition(
+        row_id=5,
+        col_range=[0, 3],
+        num_worker=6,
+    ).update_row_definition(
+        row_id=6,
+        col_range=[0, 3],
+        num_worker=7,
+    ).update_row_definition(
+        row_id=7,
+        col_range=[0, 3],
+        num_worker=8,
+    ).create()
diff --git a/exp/fig14a/run.py b/exp/fig14a/run.py
@@ -15,11 +15,13 @@
 HERE = os.path.abspath(os.path.dirname(__file__))
 DGL_APP_DIR = os.path.join(HERE, '../../example/dgl/multi_gpu')
 FGNN_APP_DIR = os.path.join(HERE, '../../example/samgraph/multi_gpu')
+SGNN_APP_DIR = os.path.join(HERE, '../../example/samgraph/sgnn')
 
 OUTPUT_DIR = os.path.join(HERE, f'output_{TIMESTAMP}')
 OUTPUT_DIR_SHORT = f'output_{TIMESTAMP}'
 def DGL_LOG_DIR(): return os.path.join(OUTPUT_DIR, 'logs_dgl')
 def FGNN_LOG_DIR(): return os.path.join(OUTPUT_DIR, 'logs_fgnn')
+def SGNN_LOG_DIR(): return os.path.join(OUTPUT_DIR, 'logs_sgnn')
 
 
 GNUPLOT_FILE = os.path.join(HERE, 'scale-gcn.plt')
@@ -162,23 +164,67 @@ def fgnn_scalability_test():
     return configs, logtable
 
 
+
+def sgnn_scalability_test():
+    logtable = get_sgnn_logtable()
+
+    configs = ConfigList(
+        'SGNN GCN scalability test'
+    ).select(
+        'app',
+        [App.gcn]
+    ).select(
+        'dataset',
+        [Dataset.papers100M]
+    ).override(
+        'num_epoch',
+        [NUM_EPOCH]
+    ).override(
+        'cache_policy',
+        ['degree']
+    ).override(
+        'cache_percentage',
+        [0.03]
+    ).override(
+        'num_worker',
+        [1, 2, 3, 4, 5, 6, 7, 8],
+    ).override(
+        'BOOL_pipeline',
+        ['no_pipeline']
+        # ).override(
+        #     'BOOL_validate_configs',
+        #     ['validate_configs']
+    ).run(
+        appdir=SGNN_APP_DIR,
+        logdir=SGNN_LOG_DIR(),
+        mock=MOCK
+    ).parse_logs(
+        logtable=logtable,
+        logdir=SGNN_LOG_DIR()
+    )
+
+    return configs, logtable
+
+
 def run_fig14a_tests():
     os.system(f'mkdir -p {OUTPUT_DIR}')
-    table_format = '{:}\t{:}\t{:}\t{:}\t{:}\n'
-    table_format_full = '{:}\t{:}\t{:}\t{:}\t{:}\t# {:}\n'
+    table_format = '{:}\t{:}\t{:}\t{:}\t{:}\t{:}\n'
+    table_format_full = '{:}\t{:}\t{:}\t{:}\t{:}\t{:}\t# {:}\n'
     with open(OUT_DATA_FILE(), 'w') as f1, open(OUT_DATA_FILE_FULL(), 'w') as f2:
         f1.write(table_format.format('"GPUs"',
-                                     '"DGL"', '"1S"', '"2S"', '"3S"'))
+                                     '"DGL"', '"SGNN"', '"1S"', '"2S"', '"3S"'))
         f2.write(table_format_full.format('"GPUs"',
-                '"DGL"', '"1S"', '"2S"', '"3S"', '""'))
+                '"DGL"', '"SGNN"', '"1S"', '"2S"', '"3S"', '""'))
 
         print(f'Running tests for fig 14a({OUTPUT_DIR_SHORT})...')
         _, dgl_logtable = dgl_scalability_test()
+        _, sgnn_logtable = sgnn_scalability_test()
         _, fgnn_logtable = fgnn_scalability_test()
 
         print('Parsing logs...')
         gpus = [1, 2, 3, 4, 5, 6, 7, 8]
         dgl_data = [data[0] for data in dgl_logtable.data]
+        sgnn_data = [data[3] for data in sgnn_logtable.data]
         fgnn_1s_data = ['-'] + [fgnn_logtable.data[i][0] for i in range(0, 7)]
         fgnn_2s_data = ['-', '-'] + [fgnn_logtable.data[i][0]
                                      for i in range(7, 13)]
@@ -188,6 +234,8 @@ def run_fig14a_tests():
         data_refs = [[] for _ in range(8)]
         for i in range(8):
             data_refs[i] += list(dgl_logtable.data_refs[i])
+        for i in range(8):
+            data_refs[i] += list(sgnn_logtable.data_refs[i])
         for i in range(0, 7):
             data_refs[i + 1] += list(fgnn_logtable.data_refs[i])
         for i in range(7, 13):
@@ -196,9 +244,9 @@ def run_fig14a_tests():
             data_refs[i - 13 + 3] += list(fgnn_logtable.data_refs[i])
 
         for i in range(8):
-            f1.write(table_format.format(str(gpus[i]), str(dgl_data[i]), str(
+            f1.write(table_format.format(str(gpus[i]), str(dgl_data[i]), str(sgnn_data[i]), str(
                 fgnn_1s_data[i]), str(fgnn_2s_data[i]), str(fgnn_3s_data[i])))
-            f2.write(table_format.format(str(gpus[i]), str(dgl_data[i]), str(
+            f2.write(table_format.format(str(gpus[i]), str(dgl_data[i]), str(sgnn_data[i]), str(
                 fgnn_1s_data[i]), str(fgnn_2s_data[i]), str(fgnn_3s_data[i]), ' '.join(data_refs[i])))
 
     print('Ploting...')

diff --git a/exp/fig14a/scale-gcn.plt b/exp/fig14a/scale-gcn.plt
@@ -39,6 +39,7 @@ set ytics offset 0.5,0 #format "%.1f"  #nomirror
 
 
 plot resfile u ($1):($2) t "DGL"     w lp lt 1 lw 3 pt  4 ps 1.5 lc rgb '#c00000', \
-     resfile u ($1):($3) t "FGNN/1S" w lp lt 1 lw 3 pt  6 ps 1.5 lc rgb '#008800', \
-     resfile u ($1):($4) t "FGNN/2S" w lp lt 1 lw 3 pt  8 ps 1.5 lc rgb '#00bb00', \
-     resfile u ($1):($5) t "FGNN/3S" w lp lt 1 lw 3 pt  2 ps 1.5 lc rgb '#00dd00'
+     resfile u ($1):($3) t "T_{SOTA}"  w lp lt 1 lw 3 pt  3 ps 1.5 lc rgb '#ff9900', \
+     resfile u ($1):($4) t "FGNN/1S" w lp lt 1 lw 3 pt  6 ps 1.5 lc rgb '#008800', \
+     resfile u ($1):($5) t "FGNN/2S" w lp lt 1 lw 3 pt  8 ps 1.5 lc rgb '#00bb00', \
+     resfile u ($1):($6) t "FGNN/3S" w lp lt 1 lw 3 pt  2 ps 1.5 lc rgb '#00dd00'
diff --git a/exp/fig14b/README.md b/exp/fig14b/README.md
@@ -1,6 +1,8 @@
-# Figure 14b:  PinSAGE Scalability Test
+# Figure 14b:  GCN Scalability Test
 
-The goal of this experiment is to show the scalability performance of DGL and FGNN on PinSAGE model.
+The goal of this experiment is to show the scalability performance of DGL, SGNN and FGNN on GCN model.
+
+Dataset: twitter
 
 - `run.py` is the runner script.
 - `logtable_def.py` defines log parsing rules.
@@ -11,8 +13,9 @@ The goal of this experiment is to show the scalability performance of DGL and FG
 
 - Paper's configurations: **8x16GB** NVIDIA V100 GPUs, **2x24** cores Intel 8163 CPU
 - For other hardware configurations, you may need to modify the ①Number of GPU. ②Number of CPU threads ③Number of vertex (in percentage, 0<=pct. <=1) to be cached.
-  - **DGL:** Modify `L61(#GPU)` in `run.py`.
-  - **FGNN:**  Modify  `L99(#CPU threads), L104-L139(#GPU, #Cache percentage)` in `run.py`.
+  - **DGL:** Modify `L66-L67(#GPU)` in `run.py`.
+  - **FGNN:**  Modify  `L108(#CPU threads), L118-L153(#GPU, #Cache percentage)` in `run.py`.
+  - **SGNN:**  Modify  `L187(#Cache percentage), L190(#GPU)` in `run.py`.
 
 
 
@@ -51,39 +54,37 @@ optional arguments:
 
 ## Output Example
 
-`python run.py` will create a new folder(e.g. `output_2022-01-29_20-45-14`) as result.
+`python run.py` will create a new folder(e.g. `output_2022-01-29_20-10-39`) as result.
 
 `python run.py --rerun-tests`  does not create a new folder and reuse the last created folder.
 
 ```sh
-> tree output_2022-01-29_20-45-14 -L 1
-output_2022-01-29_20-45-14
-├── fig14b.eps           # Output figure
-├── fig14b-full.res      # Output data with comments
-├── fig14b.res           # Output data
+> tree output_2022-01-29_20-10-39 -L 1
+output_2022-01-29_20-10-39
+├── fig14b.eps             # Output figure
+├── fig14b-full.res        # Output data with comments
+├── fig14b.res             # Output data
 ├── logs_dgl
-└── logs_fgnn
+├── logs_fgnn
+└── logs_sgnn
 
-2 directories, 3 files
+3 directories, 3 files
 ```
 
 
 
 ```sh
-> cat output_2022-01-29_20-45-14/fig14b.res
-"GPUs"  "DGL"   "1S"    "2S"    "3S"
-1       13.14   -       -       -
-2       6.92    6.25    -       -
-3       4.90    3.22    6.22    -
-4       3.89    2.21    3.26    6.34
-5       3.31    1.66    2.21    3.29
-6       3.03    1.38    1.67    2.26
-7       2.81    1.18    1.39    1.65
-8       2.57    1.02    1.17    1.37
+> cat output_2022-01-29_20-10-39/fig14b.res
+"GPUs"	"DGL"	"SGNN"	"1S"	"2S"	"3S"
+1   11.86	5.47	-	-	-
+2	7.07	4.26	1.69	-	-
+3	5.65	3.20	0.95	1.70	-
+4	4.88	2.65	0.71	0.97	1.69
+...
 ```
 
 
 
 
 
-## FAQ
+## FAQ