rapidsai · nv-rliu · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
@@ -21,7 +21,9 @@ Our current benchmarks provide the following datasets:
 #### 1. `run-main-benchmarks.sh`
 This script allows users to run a small set of commonly-used algorithms across multiple datasets and backends. All results are stored inside a sub-directory (`logs/`) and output files are named based on the combination of parameters for that benchmark.
 
-NOTE: If running with all algorithms and datasets using NetworkX without an accelerated backend, this script may take a few hours to finish running.
+NOTE:
+ - If running with all algorithms and datasets using NetworkX without an accelerated backend, this script may take a few hours to finish running.
+ - The `betweenness_centrality` benchmark will run with values `[10, 20, 50, 100, 500, 1000]` by default. You can specify only specific k-values to be run by editing `bc_k_values` (line 46) to be passed as a [pytest keyword object](https://docs.pytest.org/en/6.2.x/usage.html#specifying-tests-selecting-tests).
 
 **Usage:**
  - Run with `--cpu-only`:

diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py
@@ -272,7 +272,7 @@ def bench_from_networkx(benchmark, graph_obj):
 
 # normalized_param_values = [True, False]
 normalized_param_values = [True]
-k_param_values = [10, 100, 1000]
+k_param_values = [10, 20, 50, 100, 500, 1000]
 
 
 @pytest.mark.parametrize(
@@ -281,7 +281,6 @@ def bench_from_networkx(benchmark, graph_obj):
 @pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}")
 def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k):
     G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-
     if k > G.number_of_nodes():
         pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")
 

diff --git a/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh b/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh
@@ -14,7 +14,7 @@
 
 
 # location to store datasets used for benchmarking
-export RAPIDS_DATASET_ROOT_DIR=/datasets/cugraph
+export RAPIDS_DATASET_ROOT_DIR=${RAPIDS_DATASET_ROOT_DIR:-/datasets/cugraph}
 mkdir -p logs
 
 # list of algos, datasets, and back-ends to use in combinations
@@ -40,6 +40,11 @@ backends="
     None
     cugraph-preconverted
 "
+
+# edit this directly to for pytest
+# e.g. -k "and not 100 and not 1000"
+bc_k_values=""
+
 # check for --cpu-only or --gpu-only args
 if [[ "$#" -eq 1 ]]; then
     case $1 in
@@ -63,10 +68,12 @@ for algo in $algos; do
         for backend in $backends; do
             name="${backend}__${algo}__${dataset}"
             echo "Running: $backend, $dataset, bench_$algo"
-            # command to preproduce test
-            # echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo and not 1000\" --benchmark-json=\"logs/${name}.json\" bench_algos.py"
-            pytest -sv \
-                -k "$backend and $dataset and bench_$algo and not 1000" \
+
+            # uncomment to get command for reproducing test
+            # echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo $bc_k_values\" --benchmark-json=\"logs/${name}.json\" bench_algos.py"
+
+            pytest -sv --co \
+                -k "$backend and $dataset and bench_$algo $bc_k_values" \
                 --benchmark-json="logs/${name}.json" \
                 bench_algos.py 2>&1 | tee "logs/${name}.out"
         done