From 9df180baef449503f66abe605a3ff15f87415e6e Mon Sep 17 00:00:00 2001 From: hyandt Date: Wed, 14 Dec 2022 09:11:20 -0700 Subject: [PATCH 1/4] Rename references to master branch --- .../DataSet/RTS-GMLC-6.400.xml | 14 ++++---- .../plexos-hpc-walkthrough/RunFiles/enhanced | 2 +- .../plexos-hpc-walkthrough/RunFiles/simple | 2 +- applications/plexos-quick-start/README.md | 10 +++--- applications/spark/README.md | 4 +-- applications/spark/conf/spark-defaults.conf | 2 +- .../spark/conf/spark-defaults.conf.template | 2 +- applications/spark/conf/spark-env.sh | 10 +++--- applications/spark/config | 2 +- applications/spark/python.md | 10 +++--- applications/spark/r.md | 2 +- applications/spark/slurm_scripts/batch_job.sh | 2 +- .../spark/slurm_scripts/batch_jupyter.sh | 2 +- .../batch_job.sh | 2 +- .../batch_jupyter.sh | 2 +- applications/spark/spark_scripts/common.sh | 2 +- .../spark/spark_scripts/configure_spark.sh | 10 +++--- .../spark_scripts/start_spark_cluster.sh | 14 ++++---- .../spark/spark_scripts/stop_spark_cluster.sh | 2 +- .../spark/tests/batch_job.sh.template | 2 +- .../vasp/Performance Study 2/README.md | 32 +++++++++---------- .../VASP_Recommendations_Analysis.ipynb | 2 +- .../cpu-bind data/cpu-bind_VASP.ipynb | 4 +-- applications/vasp/README.md | 24 +++++++------- general/Jupyterhub/adv_jupyter/README.md | 2 +- .../adv_jupyter/mpi4py_tf/dompi.ipynb | 16 +++++----- .../Jupyterhub/adv_jupyter/mpi4py_tf/makeit | 4 +-- .../adv_jupyter/mpi4py_tf/mninstcu.ipynb | 6 ++-- .../adv_jupyter/mpi4py_tf/spackBuild | 8 ++--- general/bash/cheatsheet.sh | 2 +- general/building-mpi-applications/README.md | 2 +- general/markdown/RenameStep1.md | 2 +- .../conda-how-to.md | 2 +- general/stream_benchmark/stream.c | 2 +- languages/fortran/Fortran90/f90.md | 4 +-- .../julia/demos/notebooks/PyJulia_Demo.ipynb | 2 +- .../julia/how-to-guides/install-Julia.md | 2 +- .../Julia-Parallel-Computing.md | 4 +-- .../Julia-Parallel-Computing.ipynb | 4 +-- .../anaconda/conda_tutorial.slides.html | 6 ++-- .../openai_rllib/custom_gym_env/README.md | 2 +- .../openai_rllib/simple-example-gpu/README.md | 6 ++-- .../openai_rllib/simple-example/README.md | 8 ++--- languages/python/pyomo/README.md | 4 +-- languages/python/pyomo/p_median.py | 2 +- slurm/multinode-task-per-core.sh | 6 ++-- slurm/source/slurm_variables | 2 +- slurm/source/stf_01.f90 | 28 ++++++++-------- 48 files changed, 143 insertions(+), 143 deletions(-) diff --git a/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml b/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml index 70dcb54fe..aaeff3636 100755 --- a/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml +++ b/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml @@ -1,4 +1,4 @@ - + 1 2 @@ -1409,7 +1409,7 @@ true true 88 - Master switch for computation of reliability indices LOLP, LOLE, etc. + code-examples switch for computation of reliability indices LOLP, LOLE, etc. 107 @@ -1423,7 +1423,7 @@ true true 1734 - Master switch for computation of reliability indices LOLP, LOLE, etc across multiple areas. + code-examples switch for computation of reliability indices LOLP, LOLE, etc across multiple areas. 108 @@ -1604,7 +1604,7 @@ true true 88 - Master switch for computation of reliability indices LOLP, LOLE, etc. + code-examples switch for computation of reliability indices LOLP, LOLE, etc. 121 @@ -1618,7 +1618,7 @@ true true 1734 - Master switch for computation of reliability indices LOLP, LOLE, etc across multiple areas. + code-examples switch for computation of reliability indices LOLP, LOLE, etc across multiple areas. 122 @@ -10966,7 +10966,7 @@ Nested Reserves 0 -1 - Master Reserves + code-examples Reserves 0 -1 false @@ -133924,4 +133924,4 @@ 0 0 - \ No newline at end of file + \ No newline at end of file diff --git a/applications/plexos-hpc-walkthrough/RunFiles/enhanced b/applications/plexos-hpc-walkthrough/RunFiles/enhanced index 373125daf..42595ade4 100644 --- a/applications/plexos-hpc-walkthrough/RunFiles/enhanced +++ b/applications/plexos-hpc-walkthrough/RunFiles/enhanced @@ -42,7 +42,7 @@ module purge module load centos mono/$MONO_VERSION xpressmp/$XPRESSMP_VERSION plexos/$PLEXOS_VERSION # Get our data -wget https://github.nrel.gov/tkaiser2/plexos/raw/master/week.tgz +wget https://github.nrel.gov/tkaiser2/plexos/raw/code-examples/week.tgz tar -xzf week.tgz # What we have diff --git a/applications/plexos-hpc-walkthrough/RunFiles/simple b/applications/plexos-hpc-walkthrough/RunFiles/simple index e0bfbdec0..e6d123350 100644 --- a/applications/plexos-hpc-walkthrough/RunFiles/simple +++ b/applications/plexos-hpc-walkthrough/RunFiles/simple @@ -27,7 +27,7 @@ module purge module load centos mono/$MONO_VERSION xpressmp/$XPRESSMP_VERSION plexos/$PLEXOS_VERSION # Get our data -wget https://github.nrel.gov/tkaiser2/plexos/raw/master/week.tgz +wget https://github.nrel.gov/tkaiser2/plexos/raw/code-examples/week.tgz tar -xzf week.tgz ls -lt diff --git a/applications/plexos-quick-start/README.md b/applications/plexos-quick-start/README.md index 9e2f6e969..b2a463357 100644 --- a/applications/plexos-quick-start/README.md +++ b/applications/plexos-quick-start/README.md @@ -74,12 +74,12 @@ or just copy them from this page or ```bash -wget https://github.nrel.gov/raw/tkaiser2/plexos/master/scripts/enhanced +wget https://github.nrel.gov/raw/tkaiser2/plexos/code-examples/scripts/enhanced ``` and ```bash -wget https://github.nrel.gov/raw/tkaiser2/plexos/master/scripts/simple +wget https://github.nrel.gov/raw/tkaiser2/plexos/code-examples/scripts/simple ``` ## To run: @@ -88,7 +88,7 @@ If you have never run Plexos on Eagle you will need to set up the license. There is a script to do that. Download it and run it. ```bash -wget https://github.nrel.gov/raw/tkaiser2/plexos/master/scripts/makelicense +wget https://github.nrel.gov/raw/tkaiser2/plexos/code-examples/scripts/makelicense chmod 700 makelicense ./makelicense ``` @@ -146,7 +146,7 @@ module purge module load centos mono/$MONO_VERSION xpressmp/$XPRESSMP_VERSION plexos/$PLEXOS_VERSION # Get our data -wget https://github.nrel.gov/tkaiser2/plexos/raw/master/week.tgz +wget https://github.nrel.gov/tkaiser2/plexos/raw/code-examples/week.tgz tar -xzf week.tgz ls -lt @@ -209,7 +209,7 @@ module purge module load centos mono/$MONO_VERSION xpressmp/$XPRESSMP_VERSION plexos/$PLEXOS_VERSION # Get our data -wget https://github.nrel.gov/tkaiser2/plexos/raw/master/week.tgz +wget https://github.nrel.gov/tkaiser2/plexos/raw/code-examples/week.tgz tar -xzf week.tgz # What we have diff --git a/applications/spark/README.md b/applications/spark/README.md index d0aa36d4f..4baec13c1 100644 --- a/applications/spark/README.md +++ b/applications/spark/README.md @@ -163,7 +163,7 @@ be able to load data files in any of them: ## Debugging problems Open the Spark web UI to observe what's happening with your jobs. You will have to forward ports -8080 and 4040 of the master node (first node in your SLURM allocation) through an ssh tunnel. +8080 and 4040 of the code-examples node (first node in your SLURM allocation) through an ssh tunnel. Open your browser to http://localhost:4040 after configuring the tunnel to access the application UI. @@ -190,7 +190,7 @@ CPUs and are not bottle-necked by the storage. Here is an example of how to run `htop` on multiple nodes simulataneously with `tmux`. -Download this script: https://raw.githubusercontent.com/johnko/ssh-multi/master/bin/ssh-multi +Download this script: https://raw.githubusercontent.com/johnko/ssh-multi/code-examples/bin/ssh-multi Run it like this: ``` diff --git a/applications/spark/conf/spark-defaults.conf b/applications/spark/conf/spark-defaults.conf index 95f6339fc..421c51b8b 100644 --- a/applications/spark/conf/spark-defaults.conf +++ b/applications/spark/conf/spark-defaults.conf @@ -19,7 +19,7 @@ # This is useful for setting default environmental settings. # Example: -# spark.master spark://master:7077 +# spark.code-examples spark://code-examples:7077 # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer diff --git a/applications/spark/conf/spark-defaults.conf.template b/applications/spark/conf/spark-defaults.conf.template index 19cba6e71..a8598ca84 100644 --- a/applications/spark/conf/spark-defaults.conf.template +++ b/applications/spark/conf/spark-defaults.conf.template @@ -19,7 +19,7 @@ # This is useful for setting default environmental settings. # Example: -# spark.master spark://master:7077 +# spark.code-examples spark://code-examples:7077 # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer diff --git a/applications/spark/conf/spark-env.sh b/applications/spark/conf/spark-env.sh index b7f09b92f..85e9b2305 100644 --- a/applications/spark/conf/spark-env.sh +++ b/applications/spark/conf/spark-env.sh @@ -45,20 +45,20 @@ # - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN # Options for the daemons used in the standalone deploy mode -# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname -# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master -# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") +# - SPARK_code-examples_HOST, to bind the code-examples to a different IP address or hostname +# - SPARK_code-examples_PORT / SPARK_code-examples_WEBUI_PORT, to use non-default ports for the code-examples +# - SPARK_code-examples_OPTS, to set config properties only for the code-examples (e.g. "-Dx=y") # - SPARK_WORKER_CORES, to set the number of cores to use on this machine # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker # - SPARK_WORKER_DIR, to set the working directory of worker processes # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") -# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). +# - SPARK_DAEMON_MEMORY, to allocate to the code-examples, worker and history server themselves (default: 1g). # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") # - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons -# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers +# - SPARK_PUBLIC_DNS, to set the public dns name of the code-examples or workers # Options for launcher # - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y") diff --git a/applications/spark/config b/applications/spark/config index 6eb94aec7..bb11509de 100644 --- a/applications/spark/config +++ b/applications/spark/config @@ -1,4 +1,4 @@ container = /datasets/images/apache_spark/spark_py39.sif container_instance_name = spark -master_node_memory_overhead_gb = 10 +code-examples_node_memory_overhead_gb = 10 worker_node_memory_overhead_gb = 5 diff --git a/applications/spark/python.md b/applications/spark/python.md index 25f2363ec..1f5c911a5 100644 --- a/applications/spark/python.md +++ b/applications/spark/python.md @@ -4,13 +4,13 @@ This uses ipython, which is optional. Remove the `--env` line to use the regular $ singularity run \ --env PYSPARK_DRIVER_PYTHON=ipython \ instance://spark \ - pyspark --master spark://$(hostname):7077 + pyspark --code-examples spark://$(hostname):7077 ``` Optional: check your environment to ensure that all configuration settings are correct. -Most importantly, ensure that you connected to the Spark cluster master and are not in local mode. +Most importantly, ensure that you connected to the Spark cluster code-examples and are not in local mode. pyspark prints the connection information during startup. For example: ``` -Spark context available as 'sc' (master = spark://r2i7n35:7077, app id = app-20221202224041-0000). +Spark context available as 'sc' (code-examples = spark://r2i7n35:7077, app id = app-20221202224041-0000). ``` You can dump all configuration settings with this command: ``` @@ -32,7 +32,7 @@ $ singularity run \ --env PYSPARK_DRIVER_PYTHON=jupyter \ --env PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser --port=8889 --ip=0.0.0.0" \ instance://spark \ - pyspark --master spark://$(hostname):7077 + pyspark --code-examples spark://$(hostname):7077 ``` The Jupyter process will print a URL to the terminal. You can access it from your laptop after you forward the ports through an ssh tunnel. @@ -54,7 +54,7 @@ spark = SparkSession.builder.appName("my_app").getOrCreate() ``` $ singularity run \ instance://spark \ - spark-submit --master spark://$(hostname):7077 + spark-submit --code-examples spark://$(hostname):7077 ``` Note: if your script is Python, the filename must end in `.py`. diff --git a/applications/spark/r.md b/applications/spark/r.md index 9bf6c63dd..db01f677f 100644 --- a/applications/spark/r.md +++ b/applications/spark/r.md @@ -2,6 +2,6 @@ ``` $ singularity run instance://spark sparkR > library(sparklyr) -> sc = spark_connect(master = paste0("spark://",Sys.info()["nodename"],":7077")) +> sc = spark_connect(code-examples = paste0("spark://",Sys.info()["nodename"],":7077")) > df = spark_read_parquet(sc = sc, path = "my_data.parquet", memory = FALSE) ``` diff --git a/applications/spark/slurm_scripts/batch_job.sh b/applications/spark/slurm_scripts/batch_job.sh index f3a45dc75..7be9df1cd 100755 --- a/applications/spark/slurm_scripts/batch_job.sh +++ b/applications/spark/slurm_scripts/batch_job.sh @@ -12,5 +12,5 @@ SCRIPT_DIR=~/repos/HPC/applications/spark/spark_scripts ${SCRIPT_DIR}/configure_spark.sh ${SCRIPT_DIR}/start_spark_cluster.sh # This runs an example script inside the container. -singularity run instance://spark spark-submit --master spark://$(hostname):7077 /opt/spark/examples/src/main/python/pi.py 500 +singularity run instance://spark spark-submit --code-examples spark://$(hostname):7077 /opt/spark/examples/src/main/python/pi.py 500 ${SCRIPT_DIR}/stop_spark_cluster.sh diff --git a/applications/spark/slurm_scripts/batch_jupyter.sh b/applications/spark/slurm_scripts/batch_jupyter.sh index 5791da674..bc73fb285 100755 --- a/applications/spark/slurm_scripts/batch_jupyter.sh +++ b/applications/spark/slurm_scripts/batch_jupyter.sh @@ -22,6 +22,6 @@ singularity run \ --network-args \ "portmap=8889:8889" \ instance://spark \ - pyspark --master spark://$(hostname):7077 + pyspark --code-examples spark://$(hostname):7077 ${SCRIPT_DIR}/stop_spark_cluster.sh diff --git a/applications/spark/slurm_scripts_with_resource_monitoring/batch_job.sh b/applications/spark/slurm_scripts_with_resource_monitoring/batch_job.sh index 66da511c6..65a8aa1b2 100755 --- a/applications/spark/slurm_scripts_with_resource_monitoring/batch_job.sh +++ b/applications/spark/slurm_scripts_with_resource_monitoring/batch_job.sh @@ -16,7 +16,7 @@ srun collect_stats.sh . & ${SCRIPT_DIR}/configure_spark.sh ${SCRIPT_DIR}/start_spark_cluster.sh # This runs an example script inside the container. -singularity run instance://spark spark-submit --master spark://$(hostname):7077 /opt/spark/examples/src/main/python/pi.py 500 +singularity run instance://spark spark-submit --code-examples spark://$(hostname):7077 /opt/spark/examples/src/main/python/pi.py 500 ${SCRIPT_DIR}/stop_spark_cluster.sh touch shutdown diff --git a/applications/spark/slurm_scripts_with_resource_monitoring/batch_jupyter.sh b/applications/spark/slurm_scripts_with_resource_monitoring/batch_jupyter.sh index 19d2c65de..ccce0c155 100755 --- a/applications/spark/slurm_scripts_with_resource_monitoring/batch_jupyter.sh +++ b/applications/spark/slurm_scripts_with_resource_monitoring/batch_jupyter.sh @@ -26,7 +26,7 @@ singularity run \ --network-args \ "portmap=8889:8889" \ instance://spark \ - pyspark --master spark://$(hostname):7077 + pyspark --code-examples spark://$(hostname):7077 ${SCRIPT_DIR}/stop_spark_cluster.sh diff --git a/applications/spark/spark_scripts/common.sh b/applications/spark/spark_scripts/common.sh index d3a85700a..4e868a5aa 100644 --- a/applications/spark/spark_scripts/common.sh +++ b/applications/spark/spark_scripts/common.sh @@ -31,7 +31,7 @@ function get_config_variable() export CONTAINER=$(get_config_variable "container") export CONTAINER_INSTANCE_NAME=$(get_config_variable "container_instance_name") -export MASTER_NODE_MEMORY_OVERHEAD_GB=$(get_config_variable "master_node_memory_overhead_gb") +export code-examples_NODE_MEMORY_OVERHEAD_GB=$(get_config_variable "code-examples_node_memory_overhead_gb") export WORKER_NODE_MEMORY_OVERHEAD_GB=$(get_config_variable "worker_node_memory_overhead_gb") function get_memory_gb() diff --git a/applications/spark/spark_scripts/configure_spark.sh b/applications/spark/spark_scripts/configure_spark.sh index 150dde31b..8c1b2486a 100755 --- a/applications/spark/spark_scripts/configure_spark.sh +++ b/applications/spark/spark_scripts/configure_spark.sh @@ -12,13 +12,13 @@ SLURM_JOB_IDS=() # Check for errors in user input. Exit on error. function check_errors() { - master_memory_gb=$(( 1 + ${DRIVER_MEMORY_GB} )) + code-examples_memory_gb=$(( 1 + ${DRIVER_MEMORY_GB} )) if [ ${ENABLE_HISTORY_SERVER} = true ]; then - (( master_memory_gb += 1 )) + (( code-examples_memory_gb += 1 )) fi - if [ ${master_memory_gb} -gt ${MASTER_NODE_MEMORY_OVERHEAD_GB} ]; then - error "master_node_memory_overhead_gb=${MASTER_NODE_MEMORY_OVERHEAD_GB} is too small." \ + if [ ${code-examples_memory_gb} -gt ${code-examples_NODE_MEMORY_OVERHEAD_GB} ]; then + error "code-examples_node_memory_overhead_gb=${code-examples_NODE_MEMORY_OVERHEAD_GB} is too small." \ "Increase it or reduce driver_memory_gb=${DRIVER_MEMORY_GB}" fi } @@ -40,7 +40,7 @@ function config_executors() memory_gb_by_node=() lowest_memory_gb=0 for node_mem in $(cat ${CONFIG_DIR}/conf/worker_memory); do - mem=$(( ${node_mem} - ${MASTER_NODE_MEMORY_OVERHEAD_GB} )) + mem=$(( ${node_mem} - ${code-examples_NODE_MEMORY_OVERHEAD_GB} )) if [ ${lowest_memory_gb} -eq 0 ] || [ ${node_mem} -lt ${lowest_memory_gb} ]; then lowest_memory_gb=${mem} fi diff --git a/applications/spark/spark_scripts/start_spark_cluster.sh b/applications/spark/spark_scripts/start_spark_cluster.sh index 701c21f40..7ec80d85a 100755 --- a/applications/spark/spark_scripts/start_spark_cluster.sh +++ b/applications/spark/spark_scripts/start_spark_cluster.sh @@ -34,26 +34,26 @@ function start_containers() function start_spark_processes() { - master_node=$(hostname | tr -d '\n') - spark_cluster=spark://${master_node}:7077 + code-examples_node=$(hostname | tr -d '\n') + spark_cluster=spark://${code-examples_node}:7077 - exec_spark_process start-master.sh + exec_spark_process start-code-examples.sh check_history_server_enabled if [ $? -eq 0 ]; then exec_spark_process start-history-server.sh fi - ${SCRIPT_DIR}/start_spark_worker.sh ${CONFIG_DIR} ${MASTER_NODE_MEMORY_OVERHEAD_GB} ${spark_cluster} + ${SCRIPT_DIR}/start_spark_worker.sh ${CONFIG_DIR} ${code-examples_NODE_MEMORY_OVERHEAD_GB} ${spark_cluster} ret=$? if [[ $ret -ne 0 ]]; then - echo "Error: Failed to start Spark worker on the master node: ${ret}" + echo "Error: Failed to start Spark worker on the code-examples node: ${ret}" exit $ret fi - echo "Started Spark processes on master node ${master_node}" + echo "Started Spark processes on code-examples node ${code-examples_node}" # Spark does provide a way to start all nodes at once: start-workers.sh. # But that doesn't allow specifying memory for each node independently. for node_name in $(cat ${CONFIG_DIR}/conf/workers); do - if [[ $node_name != ${master_node} ]]; then + if [[ $node_name != ${code-examples_node} ]]; then ssh ${USER}@${node_name} ${SCRIPT_DIR}/start_spark_worker.sh \ ${CONFIG_DIR} ${WORKER_NODE_MEMORY_OVERHEAD_GB} ${spark_cluster} ret=$? diff --git a/applications/spark/spark_scripts/stop_spark_cluster.sh b/applications/spark/spark_scripts/stop_spark_cluster.sh index 80703fe66..a7a8fcd02 100755 --- a/applications/spark/spark_scripts/stop_spark_cluster.sh +++ b/applications/spark/spark_scripts/stop_spark_cluster.sh @@ -46,7 +46,7 @@ fi # scripts ssh to each worker node. It doesn't happen in our ssh commands. # Workaround the issue by stopping the Spark worker inside stop_container.sh. # singularity exec instance://${CONTAINER_INSTANCE_NAME} stop-all.sh -singularity exec instance://${CONTAINER_INSTANCE_NAME} stop-master.sh +singularity exec instance://${CONTAINER_INSTANCE_NAME} stop-code-examples.sh for node_name in $(cat ${CONFIG_DIR}/conf/workers); do ssh ${USER}@${node_name} ${SCRIPT_DIR}/stop_container.sh ${CONFIG_DIR} done diff --git a/applications/spark/tests/batch_job.sh.template b/applications/spark/tests/batch_job.sh.template index c582ec576..95044fe01 100644 --- a/applications/spark/tests/batch_job.sh.template +++ b/applications/spark/tests/batch_job.sh.template @@ -50,7 +50,7 @@ if [ ${ret} -ne 0 ]; then exit 1 fi -singularity run instance://spark spark-submit --master spark://$(hostname):7077 ../test_job.py +singularity run instance://spark spark-submit --code-examples spark://$(hostname):7077 ../test_job.py ret=$? if [ ${ret} -ne 0 ]; then echo "Error: Failed to run test_job.py: ${ret}" diff --git a/applications/vasp/Performance Study 2/README.md b/applications/vasp/Performance Study 2/README.md index 853dd8a29..faf3a60ea 100644 --- a/applications/vasp/Performance Study 2/README.md +++ b/applications/vasp/Performance Study 2/README.md @@ -1,4 +1,4 @@ -A study was performed to evaluate the performance on VASP on Swift and Eagle using [ESIF VASP Benchmarks](https://github.com/NREL/ESIFHPC3/tree/master/VASP) 1 and 2. Benchmark 1 is a system of 16 atoms (Cu4In4Se8), and Benchmark 2 is a system of 519 atoms (Ag504C4H10S1). +A study was performed to evaluate the performance on VASP on Swift and Eagle using [ESIF VASP Benchmarks](https://github.com/NREL/ESIFHPC3/tree/code-examples/VASP) 1 and 2. Benchmark 1 is a system of 16 atoms (Cu4In4Se8), and Benchmark 2 is a system of 519 atoms (Ag504C4H10S1). On Swift, the default builds of VASP installed on the system as modules were used. The Intel MPI build was built with Intel compilers and the mkl math library, and it was accessed via the "vaspintel" module. The OpenMPI build was compiled with gnu using gcc and fortran compilers and used OpenMPI's math libraries, and was accessed via the "vasp" module. Both builds run VASP 6.1.1. @@ -22,9 +22,9 @@ Running the OpenACC GPU build of VASP (vasp_gpu) on GPU nodes improves performan * Memory limitation: GPU nodes on Eagle cannot provide as much memory as CPU nodes for VASP jobs, and large VASP jobs may require more GPU nodes to provide enough memory for the calculation. For Benchmark 2, at least 2 full nodes were needed to provide enough memory to complete a calculation. Using more complicated parallelization schemes, the number of nodes necessary to provide enough memory scaled with the increase in number of problems handled simultaneousely. -![Eagle GPU Bench 2](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Eagle_GPU_2.png) +![Eagle GPU Bench 2](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Eagle_GPU_2.png) -![Eagle GPU Bench 1 4x4x2](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Eagle_GPU_1_4x4x2.png) +![Eagle GPU Bench 1 4x4x2](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Eagle_GPU_1_4x4x2.png) ### MPI @@ -33,7 +33,7 @@ Intel MPI is recommended over Open MPI. Using an Intel MPI build of VASP and run Find scripts for running the Intel MPI and Open MPI builds of VASP in [this section](#Scripts-for-Running-VASP-on-Eagle). ### --cpu-bind Flag -The --cpu-bind flag changes how tasks are assigned to cores throughout the node. Setting --cpu-bind=cores or rank showed no improvement in the performance of VASP on 36 CPUs/node. When running on 18 CPUs/node, setting --cpu-bind=cores shows a small improvement in runtime (~5% decrease) using both Intel MPI and Open MPI. (See [cpu-bind analysis](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/cpu-bind%20data/cpu-bind_VASP.ipynb) for info on the effect of cpu-bind) +The --cpu-bind flag changes how tasks are assigned to cores throughout the node. Setting --cpu-bind=cores or rank showed no improvement in the performance of VASP on 36 CPUs/node. When running on 18 CPUs/node, setting --cpu-bind=cores shows a small improvement in runtime (~5% decrease) using both Intel MPI and Open MPI. (See [cpu-bind analysis](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/cpu-bind%20data/cpu-bind_VASP.ipynb) for info on the effect of cpu-bind) cpu-bind can be set as a flag in an srun command, such as ``` @@ -52,9 +52,9 @@ KPAR determines the number of groups across which to divide calculations at each Runtime does not scale well with the number of kpoints. Benchmark 1 uses a 10x10x5 kpoints grid (500 kpoints). When run with a 4x4x2 kpoints grid (16 kpoints), we should expect the runtime to scale by 16/500 (3.2%) since calculations are being performed at 16 points rather than 500. However, the average scaling factor between Benchmark 1 jobs on Eagle with 10x10x5 grids and 4x4x2 grids is 28% (ranging from ~20%-57%). ### Scripts for Running VASP on Eagle - * [VASP on Eagle with Intel MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_IntelMPI.slurm) - * [VASP on Eagle with Open MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_OpenMPI.slurm) - * [VASP on Eagle on GPUs with OpenACC GPU build using Intel MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_OpenACC_GPU.slurm) + * [VASP on Eagle with Intel MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_IntelMPI.slurm) + * [VASP on Eagle with Open MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_OpenMPI.slurm) + * [VASP on Eagle on GPUs with OpenACC GPU build using Intel MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_OpenACC_GPU.slurm) ## Swift @@ -70,11 +70,11 @@ The graphs below are meant to help users identify the number of CPUs/node that w Intel MPI, performance/core | Intel MPI, performance/node :-------------------------:|:-------------------------: -![](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Swift_2_Intel_Cores.png) | ![](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Swift_2_Intel_Nodes.png) +![](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Swift_2_Intel_Cores.png) | ![](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Swift_2_Intel_Nodes.png) Open MPI, performance/core | Open MPI, performance/node :-------------------------:|:-------------------------: -![](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Swift_2_Open_Cores.png) | ![](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Swift_2_Open_Nodes.png) +![](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Swift_2_Open_Cores.png) | ![](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Swift_2_Open_Nodes.png) ### MPI @@ -84,7 +84,7 @@ Find scripts for running the Intel MPI and Open MPI builds of VASP in [this sect ### --cpu-bind Flag -The --cpu-bind flag changes how tasks are assigned to cores throughout the node. On Swift, it is recommended not to use cpu-bind. Running VASP on 64 CPUs/node and 128 CPUs/node, setting --cpu-bind=cores or rank showed no improvement in runtime. Running VASP on 32 CPUs/node, setting --cpu-bind=cores or rank increased runtime by up to 40%. (See [cpu-bind analysis](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/cpu-bind%20data/cpu-bind_VASP.ipynb) for info on the effect of cpu-bind) +The --cpu-bind flag changes how tasks are assigned to cores throughout the node. On Swift, it is recommended not to use cpu-bind. Running VASP on 64 CPUs/node and 128 CPUs/node, setting --cpu-bind=cores or rank showed no improvement in runtime. Running VASP on 32 CPUs/node, setting --cpu-bind=cores or rank increased runtime by up to 40%. (See [cpu-bind analysis](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/cpu-bind%20data/cpu-bind_VASP.ipynb) for info on the effect of cpu-bind) ``` srun --cpu-bind=cores vasp_std @@ -102,9 +102,9 @@ KPAR determines the number of groups across which to divide calculations at each KPAR = 1 | KPAR = 4 :-------------------------:|:-------------------------: -![](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Swift_1_K1_N4.png) | ![](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Swift_1_K4_N4.png) +![](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Swift_1_K1_N4.png) | ![](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Swift_1_K4_N4.png) KPAR = 8 | KPAR = 9 -![](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Swift_1_K8_N4.png) | ![](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/Images/Swift_1_K9_N4.png) +![](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Swift_1_K8_N4.png) | ![](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/Images/Swift_1_K9_N4.png) ### K-Points Scaling @@ -112,7 +112,7 @@ KPAR = 8 | KPAR = 9 Runtime does not scale well with the number of kpoints. Benchmark 1 uses a 10x10x5 kpoints grid (500 kpoints). When run with a 4x4x2 kpoints grid (16 kpoints), we should expect the runtime to scale by 16/500 (3.2%) since calculations are being performed at 16 points rather than 500. However, the average scaling factor between Benchmark 1 jobs on Swift with 10x10x5 grids and 4x4x2 grids is 28% (ranging from ~19%-39%). ### Scripts for Running VASP on Swift - * [VASP on Swift with Intel MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_IntelMPI.slurm) - * [VASP on Swift with Open MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_OpenMPI.slurm) - * [VASP on Swift with Shared Nodes using Intel MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_IntelMPI_shared_nodes.slurm) - * [VASP on Swift with Shared Nodes using Open MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_OpenMPI_shared_nodes.slurm) + * [VASP on Swift with Intel MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_IntelMPI.slurm) + * [VASP on Swift with Open MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_OpenMPI.slurm) + * [VASP on Swift with Shared Nodes using Intel MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_IntelMPI_shared_nodes.slurm) + * [VASP on Swift with Shared Nodes using Open MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_OpenMPI_shared_nodes.slurm) diff --git a/applications/vasp/Performance Study 2/VASP Performance Analysis/VASP_Recommendations_Analysis.ipynb b/applications/vasp/Performance Study 2/VASP Performance Analysis/VASP_Recommendations_Analysis.ipynb index f54da3d90..160153856 100644 --- a/applications/vasp/Performance Study 2/VASP Performance Analysis/VASP_Recommendations_Analysis.ipynb +++ b/applications/vasp/Performance Study 2/VASP Performance Analysis/VASP_Recommendations_Analysis.ipynb @@ -6,7 +6,7 @@ "source": [ "# VASP Recommendations Analysis\n", "\n", - "In this study, the ESIF VASP Benchmarks (https://github.com/NREL/ESIFHPC3/tree/master/VASP) 1 and 2 were used. Benchmark 1 is a system of 16 atoms (Cu4In4Se8), and Benchmark 2 is a system of 519 atoms (Ag504C4H10S1). \n", + "In this study, the ESIF VASP Benchmarks (https://github.com/NREL/ESIFHPC3/tree/code-examples/VASP) 1 and 2 were used. Benchmark 1 is a system of 16 atoms (Cu4In4Se8), and Benchmark 2 is a system of 519 atoms (Ag504C4H10S1). \n", "\n", "On Swift, the default builds of VASP installed on the system as modules were used. The Intel MPI build was built with Intel compilers and the mkl math library, and it was accessed via the \"vaspintel\" module. The OpenMPI build was compiled with gnu using gcc and fortran compilers and used OpenMPI's math libraries, and was accessed via the \"vasp\" module. Both builds run VASP 6.1.1. \n", "\n", diff --git a/applications/vasp/Performance Study 2/cpu-bind data/cpu-bind_VASP.ipynb b/applications/vasp/Performance Study 2/cpu-bind data/cpu-bind_VASP.ipynb index c01fa11b3..632318de1 100644 --- a/applications/vasp/Performance Study 2/cpu-bind data/cpu-bind_VASP.ipynb +++ b/applications/vasp/Performance Study 2/cpu-bind data/cpu-bind_VASP.ipynb @@ -4,13 +4,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this study, the ESIF VASP Benchmark 2 (https://github.com/NREL/ESIFHPC3/tree/master/VASP/bench2) was used to study how the cpu-bind flag affects the way that tasks are assigned to cores throughout the node over the runtime of a VASP job on Swfit and Eagle. Benchmark 1 is a system of 16 atoms (Cu4In4Se8), and Benchmark 2 is a system of 519 atoms (Ag504C4H10S1).\n", + "In this study, the ESIF VASP Benchmark 2 (https://github.com/NREL/ESIFHPC3/tree/code-examples/VASP/bench2) was used to study how the cpu-bind flag affects the way that tasks are assigned to cores throughout the node over the runtime of a VASP job on Swfit and Eagle. Benchmark 1 is a system of 16 atoms (Cu4In4Se8), and Benchmark 2 is a system of 519 atoms (Ag504C4H10S1).\n", "\n", "On Swift, the default builds of VASP installed on the system as modules were used. The Intel MPI build was built with Intel compilers and the mkl math library, and it was accessed via the \"vaspintel\" module. The OpenMPI build was compiled with gnu using gcc and fortran compilers and used OpenMPI's math libraries, and was accessed via the \"vasp\" module. Both builds run VASP 6.1.1.\n", "\n", "On Eagle, the default build of VASP installed on the system is an Intel MPI version of VASP. The Intel MPI build was built with Intel compilers and the mkl math library, and it was accessed via the \"vasp\" module. It runs VASP 6.1.2. No Open MPI VASP build is accessible through the default modules on Eagle, but an Open MPI build can be accessed in an environment via \"source /nopt/nrel/apps/210830a/myenv.2108301742, ml vasp/6.1.1-l2mkbb2\". The OpenMPI build was compiled with gnu using gcc and fortran compilers and used OpenMPI's math libraries. It runs VASP 6.1.1.\n", "\n", - "The VASP repo (https://github.com/claralarson/HPC/tree/master/applications/vasp/VASP%20Recommendations) contains scripts that can be used to run the Intel MPI and Open MPI builds used in the study to perform calculations on Swift and Eagle.\n", + "The VASP repo (https://github.com/claralarson/HPC/tree/code-examples/applications/vasp/VASP%20Recommendations) contains scripts that can be used to run the Intel MPI and Open MPI builds used in the study to perform calculations on Swift and Eagle.\n", "\n", "The cpu-bind flag can be set in the srun command as follows:\n", "> srun --cpu-bind=cores vasp_std\n", diff --git a/applications/vasp/README.md b/applications/vasp/README.md index df98b53e6..f1eace160 100644 --- a/applications/vasp/README.md +++ b/applications/vasp/README.md @@ -13,14 +13,14 @@ Load VASP with Intel MPI: ``` ml vasp ``` -[script to run VASP on Eagle with Intel MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_IntelMPI.slurm) +[script to run VASP on Eagle with Intel MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_IntelMPI.slurm) Load VASP with Open MPI: ``` source /nopt/nrel/apps/210830a/myenv.2108301742 ml vasp/6.1.1-l2mkbb2 ``` -[script to run VASP on Eagle with Open MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_OpenMPI.slurm) +[script to run VASP on Eagle with Open MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_OpenMPI.slurm) Load the GPU build of VASP: ``` @@ -30,7 +30,7 @@ export LD_LIBRARY_PATH=/nopt/nrel/apps/220511a/install/opt/spack/linux-centos7-s export LD_LIBRARY_PATH=/nopt/nrel/apps/220511a/install/opt/spack/linux-centos7-skylake_avx512/gcc-12.1.0/nvhpc-22.3-c4qk6fly5hls3mjimoxg6vyuy5cc3vti/Linux_x86_64/22.3/compilers/extras/qd/lib:$LD_LIBRARY_PATH export PATH=/projects/hpcapps/tkaiser2/vasp/6.3.1/nvhpc_acc:$PATH ``` -[script to run VASP on Eagle on GPU nodes](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_OpenACC_GPU.slurm) +[script to run VASP on Eagle on GPU nodes](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Eagle_OpenACC_GPU.slurm) ### On Swift Load VASP with Intel MPI: @@ -42,7 +42,7 @@ ml intel-oneapi-compilers/2021.3.0-piz2usr ml intel-oneapi-mpi/2021.3.0-hcp2lkf ml intel-oneapi-mkl/2021.3.0-giz47h4 ``` -[script to run VASP on Swift with Intel MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_IntelMPI.slurm) +[script to run VASP on Swift with Intel MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_IntelMPI.slurm) Load VASP with Open MPI: ``` @@ -50,27 +50,27 @@ ml vasp ml slurm/21-08-1-1-o2xw5ti ml openmpi/4.1.1-6vr2flz ``` -[script to run VASP on Swift with Open MPI](https://github.com/NREL/HPC/blob/master/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_OpenMPI.slurm) +[script to run VASP on Swift with Open MPI](https://github.com/NREL/HPC/blob/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts/Swift_OpenMPI.slurm) ## VASP Documentation -This repo contains the results of two separate VASP performance studies. The first, Performance Study 1, studies VASP performance on Eagle using the input files provided in the directory. The second, Performance Study 2, studies VASP performance on Eagle and Swift using benchmarks from the ESIF benchmarking suite, which can be found [here](https://github.com/NREL/ESIFHPC3/tree/master/VASP) or in the benchmarks folder in the Performance Harness 2 directory. Each study evaluates performance differently, as described below, and provides recommendations for running VASP most efficiently in the README files. The READMEs in each directory contain the following information. +This repo contains the results of two separate VASP performance studies. The first, Performance Study 1, studies VASP performance on Eagle using the input files provided in the directory. The second, Performance Study 2, studies VASP performance on Eagle and Swift using benchmarks from the ESIF benchmarking suite, which can be found [here](https://github.com/NREL/ESIFHPC3/tree/code-examples/VASP) or in the benchmarks folder in the Performance Harness 2 directory. Each study evaluates performance differently, as described below, and provides recommendations for running VASP most efficiently in the README files. The READMEs in each directory contain the following information. -[Performance Study 1](https://github.com/NREL/HPC/tree/master/applications/vasp/Performance%20Study%201) (VASP6 on Eagle): +[Performance Study 1](https://github.com/NREL/HPC/tree/code-examples/applications/vasp/Performance%20Study%201) (VASP6 on Eagle): - Recommendations for setting LREAL - Recommendations for setting cpu pinning - Recommendations for setting NPAR - Recommendations for setting NSIM - Instructions for using the OpenMP version of VASP -- Instructions for running multiple VASP jobs on the same nodes (and [scripts to do so](https://github.com/NREL/HPC/tree/master/applications/vasp/Performance%20Study%201/multi)) +- Instructions for running multiple VASP jobs on the same nodes (and [scripts to do so](https://github.com/NREL/HPC/tree/code-examples/applications/vasp/Performance%20Study%201/multi)) - Runtime comparison using VASP5 -[Performance Study 2](https://github.com/NREL/HPC/tree/master/applications/vasp/Performance%20Study%202#https://github.com/NREL/HPC/tree/master/applications/vasp/Performance%20Study%202) (VASP6 on Eagle and Swift): +[Performance Study 2](https://github.com/NREL/HPC/tree/code-examples/applications/vasp/Performance%20Study%202#https://github.com/NREL/HPC/tree/code-examples/applications/vasp/Performance%20Study%202) (VASP6 on Eagle and Swift): - Information on how runtime scales with nodecount - Recommendations for chosing the most efficient value of cpus/node -- Recommendations for running VASP on Eagle's GPU nodes (and [scripts to do so](https://github.com/NREL/HPC/tree/master/applications/vasp/Performance%20Study%202/VASP%20scripts)) -- Recommendations for chosing Intel MPI or Open MPI (and [scripts for running with both MPIs](https://github.com/NREL/HPC/tree/master/applications/vasp/Performance%20Study%202/VASP%20scripts)) +- Recommendations for running VASP on Eagle's GPU nodes (and [scripts to do so](https://github.com/NREL/HPC/tree/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts)) +- Recommendations for chosing Intel MPI or Open MPI (and [scripts for running with both MPIs](https://github.com/NREL/HPC/tree/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts)) - Recommendations for setting KPAR - Recommendations for setting cpu pinning - Information on k-points scaling -- Instructions for running multiple VASP jobs on the same nodes on Swift (and [scripts to do so](https://github.com/NREL/HPC/tree/master/applications/vasp/Performance%20Study%202/VASP%20scripts)) +- Instructions for running multiple VASP jobs on the same nodes on Swift (and [scripts to do so](https://github.com/NREL/HPC/tree/code-examples/applications/vasp/Performance%20Study%202/VASP%20scripts)) diff --git a/general/Jupyterhub/adv_jupyter/README.md b/general/Jupyterhub/adv_jupyter/README.md index b73315474..49e33ed8b 100644 --- a/general/Jupyterhub/adv_jupyter/README.md +++ b/general/Jupyterhub/adv_jupyter/README.md @@ -11,7 +11,7 @@ Beyond the basics: this advanced Jupyter directory builds upon our Intro to Jupy * Slurm commands: `srun` from a notebook, job status checks, running MPI-enabled routines. * Explain `pip install slurm_magic` from inside notebook - * See https://github.com/NREL/HPC/blob/master/general/Jupyterhub/jupyter/dompi.ipynb + * See https://github.com/NREL/HPC/blob/code-examples/general/Jupyterhub/jupyter/dompi.ipynb * Demonstration of using slurm magics to run MNIST * multi-node parallelism with mpi4py diff --git a/general/Jupyterhub/adv_jupyter/mpi4py_tf/dompi.ipynb b/general/Jupyterhub/adv_jupyter/mpi4py_tf/dompi.ipynb index f5fc21f29..a65da9722 100644 --- a/general/Jupyterhub/adv_jupyter/mpi4py_tf/dompi.ipynb +++ b/general/Jupyterhub/adv_jupyter/mpi4py_tf/dompi.ipynb @@ -25,7 +25,7 @@ "\n", "Here is the source:\n", "\n", - "https://github.com/NERSC/slurm-magic/blob/master/slurm_magic.py\n", + "https://github.com/NERSC/slurm-magic/blob/code-examples/slurm_magic.py\n", "\n", "\n", "\n" @@ -327,8 +327,8 @@ "\r\n", "\r\n", "#add Tim's thread mapping module\r\n", - "wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/setup.py\r\n", - "wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/spam.c\r\n", + "wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/setup.py\r\n", + "wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/spam.c\r\n", "python3 setup.py install\r\n", "\r\n" ] @@ -413,7 +413,7 @@ "### To get tunnel\n", "\n", "`\n", - "wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/tunnel.sh\n", + "wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/tunnel.sh\n", "`\n", "\n", "### We're going to get a few examples to play with:" @@ -429,7 +429,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2021-05-12 13:55:03-- https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/c_ex02.c\n", + "--2021-05-12 13:55:03-- https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/c_ex02.c\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", @@ -444,7 +444,7 @@ } ], "source": [ - "!wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/c_ex02.c" + "!wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/c_ex02.c" ] }, { @@ -457,7 +457,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2021-05-12 13:55:03-- https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/report.py\n", + "--2021-05-12 13:55:03-- https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/report.py\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", @@ -472,7 +472,7 @@ } ], "source": [ - "!wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/report.py" + "!wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/report.py" ] }, { diff --git a/general/Jupyterhub/adv_jupyter/mpi4py_tf/makeit b/general/Jupyterhub/adv_jupyter/mpi4py_tf/makeit index 521c74f75..4d0c5396e 100644 --- a/general/Jupyterhub/adv_jupyter/mpi4py_tf/makeit +++ b/general/Jupyterhub/adv_jupyter/mpi4py_tf/makeit @@ -60,7 +60,7 @@ pip --no-cache-dir install cupy #add Tim's thread mapping module -wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/setup.py -wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/spam.c +wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/setup.py +wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/spam.c python3 setup.py install diff --git a/general/Jupyterhub/adv_jupyter/mpi4py_tf/mninstcu.ipynb b/general/Jupyterhub/adv_jupyter/mpi4py_tf/mninstcu.ipynb index 6fb9f5bb7..351338520 100644 --- a/general/Jupyterhub/adv_jupyter/mpi4py_tf/mninstcu.ipynb +++ b/general/Jupyterhub/adv_jupyter/mpi4py_tf/mninstcu.ipynb @@ -33,7 +33,7 @@ "Here is the source:\n", "`\n", "\n", - "https://github.com/NERSC/slurm-magic/blob/master/slurm_magic.py\n", + "https://github.com/NERSC/slurm-magic/blob/code-examples/slurm_magic.py\n", "\n", "\n", "\n" @@ -1908,8 +1908,8 @@ "\r\n", "\r\n", "#add Tim's thread mapping module\r\n", - "wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/setup.py\r\n", - "wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/spam.c\r\n", + "wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/setup.py\r\n", + "wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/spam.c\r\n", "python3 setup.py install\r\n", "\r\n" ] diff --git a/general/Jupyterhub/adv_jupyter/mpi4py_tf/spackBuild b/general/Jupyterhub/adv_jupyter/mpi4py_tf/spackBuild index 27fe2b4b4..8378d99f0 100755 --- a/general/Jupyterhub/adv_jupyter/mpi4py_tf/spackBuild +++ b/general/Jupyterhub/adv_jupyter/mpi4py_tf/spackBuild @@ -7,7 +7,7 @@ # Make a python/jupyter/mpi4py/pandas/tensorflow/cupy environment using spack. # We also install a bare version of R with Rmpi. The R and Python versions of -# MPI should work together. See: https://github.com/timkphd/examples/tree/master/mpi/mixedlang +# MPI should work together. See: https://github.com/timkphd/examples/tree/code-examples/mpi/mixedlang # for examples # ********** Install directory ********** @@ -37,7 +37,7 @@ cd $IDIR #If you don't have tymer use this poor man's version command -v tymer >/dev/null 2>&1 || alias tymer='python -c "import sys ;import time ;print(time.time(),time.asctime(),sys.argv[1:])" ' #You can get the full version from -#https://raw.githubusercontent.com/timkphd/examples/master/tims_tools/tymer +#https://raw.githubusercontent.com/timkphd/examples/code-examples/tims_tools/tymer # This is where tymer will put its data so we clean it out rm ~/sbuild @@ -219,8 +219,8 @@ tymer ~/sbuild done cupy #Add Tim's thread mapping module -wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/setup.py -wget https://raw.githubusercontent.com/NREL/HPC/master/slurm/source/spam.c +wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/setup.py +wget https://raw.githubusercontent.com/NREL/HPC/code-examples/slurm/source/spam.c python3 setup.py install tymer ~/sbuild done spam diff --git a/general/bash/cheatsheet.sh b/general/bash/cheatsheet.sh index 7d5aa7092..473bf28f1 100644 --- a/general/bash/cheatsheet.sh +++ b/general/bash/cheatsheet.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copied from https://raw.githubusercontent.com/LeCoupa/awesome-cheatsheets/master/languages/bash.sh +# Copied from https://raw.githubusercontent.com/LeCoupa/awesome-cheatsheets/code-examples/languages/bash.sh ############################################################################## # SHORTCUTS diff --git a/general/building-mpi-applications/README.md b/general/building-mpi-applications/README.md index 620947bd5..227ff8cb7 100644 --- a/general/building-mpi-applications/README.md +++ b/general/building-mpi-applications/README.md @@ -1,3 +1,3 @@ # building-mpi-applications -See [Plexos walkthrough](https://github.com/NREL/HPC/tree/master/applications/plexos-hpc-walkthrough) as an Example. Update this readme file using the [example Plexos readme](https://github.com/NREL/HPC/blob/master/applications/plexos-hpc-walkthrough/README.md) +See [Plexos walkthrough](https://github.com/NREL/HPC/tree/code-examples/applications/plexos-hpc-walkthrough) as an Example. Update this readme file using the [example Plexos readme](https://github.com/NREL/HPC/blob/code-examples/applications/plexos-hpc-walkthrough/README.md) diff --git a/general/markdown/RenameStep1.md b/general/markdown/RenameStep1.md index 4afa9a9fb..f02317038 100644 --- a/general/markdown/RenameStep1.md +++ b/general/markdown/RenameStep1.md @@ -3,5 +3,5 @@ This content is more to summarize formatting considerations, and possibly a general organization for how content might be laid out. Naturally, this will need to be adapted to the training or tutorial at hand. -A reasonable high-level layout is to use the top-level README.md as a master navigation, with a list of sequential steps to be followed that are linked to their respective detailed documents. +A reasonable high-level layout is to use the top-level README.md as a code-examples navigation, with a list of sequential steps to be followed that are linked to their respective detailed documents. diff --git a/general/software-environment-basics/conda-how-to.md b/general/software-environment-basics/conda-how-to.md index cace0d974..5253ab317 100644 --- a/general/software-environment-basics/conda-how-to.md +++ b/general/software-environment-basics/conda-how-to.md @@ -11,7 +11,7 @@ Table of Contents ### Creating a custom environment -Custom environments can be created with [conda create](https://docs.conda.io/projects/conda/en/latest/commands/create.html) or [conda env create](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file). `conda create` accepts package names in the command path, whereas `conda env create` requires the use of an environment.yml file. This [environment.yml](https://github.nrel.gov/hsorense/conda-peregrine/blob/master/environment.yml) is used to create Eagle's default conda environment. It can be copied and modified for a custom enviornment. Be sure to change the name to something other than default or root, or omit it altogether and use the command line option. +Custom environments can be created with [conda create](https://docs.conda.io/projects/conda/en/latest/commands/create.html) or [conda env create](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file). `conda create` accepts package names in the command path, whereas `conda env create` requires the use of an environment.yml file. This [environment.yml](https://github.nrel.gov/hsorense/conda-peregrine/blob/code-examples/environment.yml) is used to create Eagle's default conda environment. It can be copied and modified for a custom enviornment. Be sure to change the name to something other than default or root, or omit it altogether and use the command line option. The default location for custom environments is $HOME/.conda-envs . A custom directory can be used with the command line options for path and name. Environments tend to use large amounts of disk. If you are getting messages about going over the quota but can't find where the usage is, check the environments directory and remove unused ones. diff --git a/general/stream_benchmark/stream.c b/general/stream_benchmark/stream.c index 93e1e0324..3407fadc3 100644 --- a/general/stream_benchmark/stream.c +++ b/general/stream_benchmark/stream.c @@ -247,7 +247,7 @@ main() printf(HLINE); #pragma omp parallel { -#pragma omp master +#pragma omp code-examples { k = omp_get_num_threads(); printf ("Number of Threads requested = %i\n",k); diff --git a/languages/fortran/Fortran90/f90.md b/languages/fortran/Fortran90/f90.md index d9e0ffc11..6c6ccea09 100644 --- a/languages/fortran/Fortran90/f90.md +++ b/languages/fortran/Fortran90/f90.md @@ -1887,7 +1887,7 @@ end - Mutation - Nothing new in either of these files - [Source and makefile "git"](source) -- [Source and makefile "*tgz"](https://github.com/timkphd/examples/raw/master/fort/90/source/archive.tgz) +- [Source and makefile "*tgz"](https://github.com/timkphd/examples/raw/code-examples/fort/90/source/archive.tgz) - - - - - - @@ -2908,7 +2908,7 @@ end - [http://www.nsc.liu.se/~boein/f77to90/](http://www.nsc.liu.se/~boein/f77to90/) Fortran 90 for the Fortran 77 Programmer - Fortran 90 Handbook Complete ANSI/ISO Reference. Jeanne Adams, Walt Brainerd, Jeanne Martin, Brian Smith, Jerrold Wagener - Fortran 90 Programming. T. Ellis, Ivor Philips, Thomas Lahey -- [https://github.com/llvm/llvm-project/blob/master/flang/docs/FortranForCProgrammers.md](https://github.com/llvm/llvm-project/blob/master/flang/docs/FortranForCProgrammers.md) +- [https://github.com/llvm/llvm-project/blob/code-examples/flang/docs/FortranForCProgrammers.md](https://github.com/llvm/llvm-project/blob/code-examples/flang/docs/FortranForCProgrammers.md) - [FFT stuff](../mkl/) - [Fortran 95 and beyond](../95/) diff --git a/languages/julia/demos/notebooks/PyJulia_Demo.ipynb b/languages/julia/demos/notebooks/PyJulia_Demo.ipynb index 2f5d4154f..9128b9c34 100644 --- a/languages/julia/demos/notebooks/PyJulia_Demo.ipynb +++ b/languages/julia/demos/notebooks/PyJulia_Demo.ipynb @@ -30,7 +30,7 @@ "4. **Run the cells under Install PyJulia**.\n", "\n", "To run on Eagle:\n", - "1. See the instruction [here](https://github.com/NREL/HPC/blob/master/languages/python/jupyter/Kernels_and_Servers.ipynb) for running jupyter notebooks on Eagle.\n", + "1. See the instruction [here](https://github.com/NREL/HPC/blob/code-examples/languages/python/jupyter/Kernels_and_Servers.ipynb) for running jupyter notebooks on Eagle.\n", "2. See the instruction [here](../../how_to_guides/build_Julia.md) for building Julia on Eagle.\n", "3. Run the cells under Install PyJulia." ] diff --git a/languages/julia/how-to-guides/install-Julia.md b/languages/julia/how-to-guides/install-Julia.md index e4b6593e2..1e2e12c07 100644 --- a/languages/julia/how-to-guides/install-Julia.md +++ b/languages/julia/how-to-guides/install-Julia.md @@ -60,7 +60,7 @@ else: ### Prerequisites -All the [required build tools and libraries](https://github.com/JuliaLang/julia/blob/master/doc/build/build.md#required-build-tools-and-external-libraries) are available on Eagle either by default or through modules. The needed modules are covered in the instructions. +All the [required build tools and libraries](https://github.com/JuliaLang/julia/blob/code-examples/doc/build/build.md#required-build-tools-and-external-libraries) are available on Eagle either by default or through modules. The needed modules are covered in the instructions. ### Terms * `JULIA_HOME` is the base directory of julia source code (initially called `julia` after `git clone`) diff --git a/languages/julia/julia-tutorial/Julia-Parallel-Computing.md b/languages/julia/julia-tutorial/Julia-Parallel-Computing.md index 643e0c216..eb25320f3 100644 --- a/languages/julia/julia-tutorial/Julia-Parallel-Computing.md +++ b/languages/julia/julia-tutorial/Julia-Parallel-Computing.md @@ -377,7 +377,7 @@ Each Julia process is identified by a (64-bit) integer. We can get a list of all procs() = [1, 2, 3] -There is a distinction between the original Julia process and those we launched. The original Julia process is often called the **master** process and always has id equal to 1. The launched processes are called **workers**. We can obtain a list of workers with the `workers` function: +There is a distinction between the original Julia process and those we launched. The original Julia process is often called the **code-examples** process and always has id equal to 1. The launched processes are called **workers**. We can obtain a list of workers with the `workers` function: ```julia @@ -548,7 +548,7 @@ function run_mci_rc() end; ``` -Here we create a `RemoteChannel` on the master process, divide the computationally intensive `integrator` function into two calls and remotely execute them on the worker processes. Then we start a task on the master process to accumulate the values and call fetch to wait for and retrieve the result. +Here we create a `RemoteChannel` on the code-examples process, divide the computationally intensive `integrator` function into two calls and remotely execute them on the worker processes. Then we start a task on the code-examples process to accumulate the values and call fetch to wait for and retrieve the result. ```julia diff --git a/languages/julia/julia-tutorial/source-notebooks/Julia-Parallel-Computing.ipynb b/languages/julia/julia-tutorial/source-notebooks/Julia-Parallel-Computing.ipynb index a3b63344d..ea9555f31 100644 --- a/languages/julia/julia-tutorial/source-notebooks/Julia-Parallel-Computing.ipynb +++ b/languages/julia/julia-tutorial/source-notebooks/Julia-Parallel-Computing.ipynb @@ -1008,7 +1008,7 @@ } }, "source": [ - "There is a distinction between the original Julia process and those we launched. The original Julia process is often called the **master** process and always has id equal to 1. The launched processes are called **workers**. We can obtain a list of workers with the `workers` function:" + "There is a distinction between the original Julia process and those we launched. The original Julia process is often called the **code-examples** process and always has id equal to 1. The launched processes are called **workers**. We can obtain a list of workers with the `workers` function:" ] }, { @@ -1606,7 +1606,7 @@ } }, "source": [ - "Here we create a `RemoteChannel` on the master process, divide the computationally intensive `integrator` function into two calls and remotely execute them on the worker processes. Then we start a task on the master process to accumulate the values and call fetch to wait for and retrieve the result." + "Here we create a `RemoteChannel` on the code-examples process, divide the computationally intensive `integrator` function into two calls and remotely execute them on the worker processes. Then we start a task on the code-examples process to accumulate the values and call fetch to wait for and retrieve the result." ] }, { diff --git a/languages/python/anaconda/conda_tutorial.slides.html b/languages/python/anaconda/conda_tutorial.slides.html index 59d1f968c..8154232f0 100644 --- a/languages/python/anaconda/conda_tutorial.slides.html +++ b/languages/python/anaconda/conda_tutorial.slides.html @@ -70,7 +70,7 @@ /*! * Bootstrap v3.3.7 (http://getbootstrap.com) * Copyright 2011-2016 Twitter, Inc. - * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/code-examples/LICENSE) */ /*! normalize.css v3.0.3 | MIT License | github.com/necolas/normalize.css */ html { @@ -250,7 +250,7 @@ th { padding: 0; } -/*! Source: https://github.com/h5bp/html5-boilerplate/blob/master/src/css/main.css */ +/*! Source: https://github.com/h5bp/html5-boilerplate/blob/code-examples/src/css/main.css */ @media print { *, *:before, @@ -8411,7 +8411,7 @@ .fa-cc-visa:before { content: "\f1f0"; } -.fa-cc-mastercard:before { +.fa-cc-code-examplescard:before { content: "\f1f1"; } .fa-cc-discover:before { diff --git a/languages/python/openai_rllib/custom_gym_env/README.md b/languages/python/openai_rllib/custom_gym_env/README.md index 9efe10b04..d6c09c61d 100644 --- a/languages/python/openai_rllib/custom_gym_env/README.md +++ b/languages/python/openai_rllib/custom_gym_env/README.md @@ -75,7 +75,7 @@ Function `register_env` takes two arguments: env_name = "custom-env" register_env(env_name, lambda config: BasicEnv()) ``` -Once again, RLlib provides [detailed explanation](https://docs.ray.io/en/master/rllib-env.html) of how `register_env` works. +Once again, RLlib provides [detailed explanation](https://docs.ray.io/en/code-examples/rllib-env.html) of how `register_env` works. The `tune.run` function, instead of `args.name_env`, it uses the `env_name` defined above. diff --git a/languages/python/openai_rllib/simple-example-gpu/README.md b/languages/python/openai_rllib/simple-example-gpu/README.md index 8c2e95836..fbd8c293d 100644 --- a/languages/python/openai_rllib/simple-example-gpu/README.md +++ b/languages/python/openai_rllib/simple-example-gpu/README.md @@ -11,13 +11,13 @@ conda env create --prefix=//env_example_gpu -f env_exa ### **Only for Eagle users:** Creating Anaconda environment using Optimized Tensorflow -NREL's HPC group has recently created [a set of optimized Tensorflow drivers](https://github.com/NREL/HPC/tree/master/workshops/Optimized_TF) that maximize the efficiency of utilizing Eagle's Tesla V100 GPU units. The drivers are created for various Python 3 and Tensorflow 2.x.x versions. +NREL's HPC group has recently created [a set of optimized Tensorflow drivers](https://github.com/NREL/HPC/tree/code-examples/workshops/Optimized_TF) that maximize the efficiency of utilizing Eagle's Tesla V100 GPU units. The drivers are created for various Python 3 and Tensorflow 2.x.x versions. -The repo provides an [Anaconda environment version](https://github.com/erskordi/HPC/blob/HPC-RL/languages/python/openai_rllib/simple-example-gpu/env_example_optimized_tf.yml) for using these drivers. This environment is based on one of the [example environments](https://github.com/NREL/HPC/blob/master/workshops/Optimized_TF/py37tf22.yml) provided in the [Optimized TF repo](https://github.com/NREL/HPC/tree/master/workshops/Optimized_TF). +The repo provides an [Anaconda environment version](https://github.com/erskordi/HPC/blob/HPC-RL/languages/python/openai_rllib/simple-example-gpu/env_example_optimized_tf.yml) for using these drivers. This environment is based on one of the [example environments](https://github.com/NREL/HPC/blob/code-examples/workshops/Optimized_TF/py37tf22.yml) provided in the [Optimized TF repo](https://github.com/NREL/HPC/tree/code-examples/workshops/Optimized_TF). **The provided Anaconda environment currently works for Python 3.7, Tensorflow 2.2, and the latest Ray version** -*Make sure to follow the [instructions for installing this particular environment](https://github.com/NREL/HPC/tree/master/workshops/Optimized_TF) explicitly!* +*Make sure to follow the [instructions for installing this particular environment](https://github.com/NREL/HPC/tree/code-examples/workshops/Optimized_TF) explicitly!* ## Allocate GPU node diff --git a/languages/python/openai_rllib/simple-example/README.md b/languages/python/openai_rllib/simple-example/README.md index 896167017..4e60e27c1 100644 --- a/languages/python/openai_rllib/simple-example/README.md +++ b/languages/python/openai_rllib/simple-example/README.md @@ -2,7 +2,7 @@ RL algorithms are notorious for the amount of data they need to collect in order to learn policies. The more data collected, the better the training will be. The best way to do it is to run many Gym instances in parallel and collecting experience, and this is where RLlib assists. -[RLlib](https://docs.ray.io/en/master/rllib.html) is an open-source library for reinforcement learning that offers both high scalability and a unified API for a variety of applications. It supports all known deep learning frameworks such as Tensorflow, Pytorch, although most parts are framework-agnostic and can be used by either one. +[RLlib](https://docs.ray.io/en/code-examples/rllib.html) is an open-source library for reinforcement learning that offers both high scalability and a unified API for a variety of applications. It supports all known deep learning frameworks such as Tensorflow, Pytorch, although most parts are framework-agnostic and can be used by either one. The RL policy learning examples provided in this tutorial demonstrate the RLlib abilities. For convenience, the `CartPole-v0` OpenAI Gym environment will be used. @@ -15,7 +15,7 @@ Begin trainer by importing the `ray` package: import ray from ray import tune ``` -`Ray` consists of an API readily available for building [distributed applications](https://docs.ray.io/en/master/index.html). On top of it, there are several problem-solving libraries, one of which is RLlib. +`Ray` consists of an API readily available for building [distributed applications](https://docs.ray.io/en/code-examples/index.html). On top of it, there are several problem-solving libraries, one of which is RLlib. `Tune` is another one of `Ray`'s libraries for scalable hyperparameter tuning. All RLlib trainers (scripts for RL agent training) are compatible with Tune API, making experimenting easy and streamlined. @@ -81,7 +81,7 @@ tune.run( ``` That's it! The RLlib trainer is ready! -Note here that, except default hyperparameters like those above, [every RL algorithm](https://docs.ray.io/en/master/rllib-algorithms.html#available-algorithms-overview) provided by RLlib has its own hyperparameters and their default values that can be tuned in advance. +Note here that, except default hyperparameters like those above, [every RL algorithm](https://docs.ray.io/en/code-examples/rllib-algorithms.html#available-algorithms-overview) provided by RLlib has its own hyperparameters and their default values that can be tuned in advance. The code of the trainer in this example can be found [in the repo](https://github.com/erskordi/HPC/blob/HPC-RL/languages/python/openai_rllib/simple-example/simple_trainer.py). @@ -407,7 +407,7 @@ The following image shows the agent training progress, in terms of reward conver

Obviously, training using all CPU cores on a node led to faster convergence to the optimal value. -It is necessary to say here that CartPole is a simple environment where the optimal reward value (200) can be easily reached even when using a single CPU core on a personal computer. The power of using multiple cores becomes more apparent in cases of more complex environments (such as the [Atari environments](https://gym.openai.com/envs/#atari)). RLlib website also gives examples of the [scalability benefits](https://docs.ray.io/en/master/rllib-algorithms.html#ppo) for many state-of-the-art RL algorithms. +It is necessary to say here that CartPole is a simple environment where the optimal reward value (200) can be easily reached even when using a single CPU core on a personal computer. The power of using multiple cores becomes more apparent in cases of more complex environments (such as the [Atari environments](https://gym.openai.com/envs/#atari)). RLlib website also gives examples of the [scalability benefits](https://docs.ray.io/en/code-examples/rllib-algorithms.html#ppo) for many state-of-the-art RL algorithms. **Supplemental notes:** As you noticed, when using RLlib for RL traning, there is no need to `import gym`, as we did in the non-training example, because RLlib recognizes automatically all benchmark OpenAI Gym environments. Even when you create your own custom-made Gym environments, RLlib provides proper functions with which you can register your environment before training. diff --git a/languages/python/pyomo/README.md b/languages/python/pyomo/README.md index 0da340221..1ee27e09d 100644 --- a/languages/python/pyomo/README.md +++ b/languages/python/pyomo/README.md @@ -72,7 +72,7 @@ five major components: Pyomo has modeling objects for each of these components (as well as a few extra). Below we demonstrate their use on a the [p-median problem](https://en.wikipedia.org/wiki/Facility_location_problem) adapted from -[this example](https://github.com/Pyomo/PyomoGallery/blob/master/p_median/p-median.py) +[this example](https://github.com/Pyomo/PyomoGallery/blob/code-examples/p_median/p-median.py) utilizing a `ConcreteModel` and demonstrating some of the modeling flexibility in Pyomo. This example is also available as a [stand-alone python module](./p_median.py). ```python @@ -138,7 +138,7 @@ to use an external solver (linked through Pyomo) to *solve* or *optimize* this model. A more complex example `ConcreteModel` utilizing data brought in from a json -file is available [here](https://github.com/power-grid-lib/pglib-uc/blob/master/uc_model.py). +file is available [here](https://github.com/power-grid-lib/pglib-uc/blob/code-examples/uc_model.py). # Solvers diff --git a/languages/python/pyomo/p_median.py b/languages/python/pyomo/p_median.py index ca144af6e..c318e08d5 100644 --- a/languages/python/pyomo/p_median.py +++ b/languages/python/pyomo/p_median.py @@ -1,4 +1,4 @@ -# Adapted from: https://github.com/Pyomo/PyomoGallery/blob/master/p_median/p-median.py +# Adapted from: https://github.com/Pyomo/PyomoGallery/blob/code-examples/p_median/p-median.py import pyomo.environ as pyo import random diff --git a/slurm/multinode-task-per-core.sh b/slurm/multinode-task-per-core.sh index 04646f785..42a8b6cb1 100755 --- a/slurm/multinode-task-per-core.sh +++ b/slurm/multinode-task-per-core.sh @@ -18,8 +18,8 @@ USAGE: sbatch –A -N multinode-task-per-core.sh PROCS=$(($SLURM_NNODES * $SLURM_CPUS_ON_NODE)) # Number of CPUs * number of nodes -# Master node in jobs with N > 1 runs these -echo "I am node $SLURMD_NODENAME and I am the master node of this job with ID $SLURM_NODEID" +# code-examples node in jobs with N > 1 runs these +echo "I am node $SLURMD_NODENAME and I am the code-examples node of this job with ID $SLURM_NODEID" echo "There are $SLURM_NNODES nodes in this job, and each has $SLURM_CPUS_ON_NODE cores, for a total of $PROCS cores." printf "Let's get each node in the job to introduce itself:\n\n" @@ -39,7 +39,7 @@ echo :<<++++ Example Output -I am node r5i0n13 and I am the master node of this job with ID 0 +I am node r5i0n13 and I am the code-examples node of this job with ID 0 There are 2 nodes in this job, and each has 36 cores, for a total of 72 cores. Let's get each node in the job to introduce itself: diff --git a/slurm/source/slurm_variables b/slurm/source/slurm_variables index d1ab85a42..38824b99b 100644 --- a/slurm/source/slurm_variables +++ b/slurm/source/slurm_variables @@ -149,7 +149,7 @@ OUTPUT ENVIRONMENT VARIABLES Job array's index step size. SLURM_ARRAY_JOB_ID - Job array's master job ID number. + Job array's code-examples job ID number. SLURM_CLUSTER_NAME Name of the cluster on which the job is executing. diff --git a/slurm/source/stf_01.f90 b/slurm/source/stf_01.f90 index c6046dc4b..8fe157c74 100644 --- a/slurm/source/stf_01.f90 +++ b/slurm/source/stf_01.f90 @@ -102,7 +102,7 @@ module mympi ! use mpi include "mpif.h" integer numnodes,myid,mpi_err - integer, parameter::mpi_master=0 + integer, parameter::mpi_code-examples=0 integer status(MPI_STATUS_SIZE) end module !********************* @@ -129,7 +129,7 @@ program stommel call MPI_COMM_SIZE( MPI_COMM_WORLD, numnodes, mpi_err ) call MPI_COMM_RANK( MPI_COMM_WORLD, myid, mpi_err ) ! get the input. see above for typical values - if(myid .eq. mpi_master)then + if(myid .eq. mpi_code-examples)then instr="200 200" ; read(instr,*)nx,ny instr="2000000 2000000" ; read(instr,*)lx,ly instr="1.0e-9 2.25e-11 3.0e-6" ; read(instr,*)alpha,beta,gamma @@ -143,14 +143,14 @@ program stommel endif endif !send the data to other processors - call MPI_BCAST(nx, 1,MPI_INTEGER, mpi_master,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(ny, 1,MPI_INTEGER, mpi_master,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(steps,1,MPI_INTEGER, mpi_master,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(lx, 1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(ly, 1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(alpha,1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(beta, 1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(gamma,1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(nx, 1,MPI_INTEGER, mpi_code-examples,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(ny, 1,MPI_INTEGER, mpi_code-examples,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(steps,1,MPI_INTEGER, mpi_code-examples,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(lx, 1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(ly, 1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(alpha,1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(beta, 1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(gamma,1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) ! calculate the constants for the calculations dx=lx/(nx+1) dy=ly/(ny+1) @@ -170,7 +170,7 @@ program stommel dj=real(nx,b8)/real(numnodes,b8) j1=nint(1.0_b8+myid*dj) j2=nint(1.0_b8+(myid+1)*dj)-1 - if(myid == mpi_master)write(*,'("rows= ",i4)')numnodes + if(myid == mpi_code-examples)write(*,'("rows= ",i4)')numnodes write(*,101)myid,i1,i2,j1,j2 101 format("myid= ",i4,3x,& " (",i3," <= i <= ",i3,") , ", & @@ -196,11 +196,11 @@ program stommel call do_transfer(psi,i1,i2,j1,j2) ! write(*,*)myid,i,mydiff call MPI_REDUCE(mydiff,diff,1,MPI_DOUBLE_PRECISION, & - MPI_SUM,mpi_master,MPI_COMM_WORLD,mpi_err) - if(myid .eq. mpi_master .and. mod(i,iout) .eq. 0)write(*,'(i6,1x,g20.10)')i,diff + MPI_SUM,mpi_code-examples,MPI_COMM_WORLD,mpi_err) + if(myid .eq. mpi_code-examples .and. mod(i,iout) .eq. 0)write(*,'(i6,1x,g20.10)')i,diff enddo t2=MPI_Wtime() - if(myid .eq. mpi_master)write(*,'("run time =",f10.2)')t2-t1 + if(myid .eq. mpi_code-examples)write(*,'("run time =",f10.2)')t2-t1 !call write_grid(psi,i1,i2,j1,j2) call MPI_Finalize(mpi_err) end program stommel From 4a8a1cd1e11a528ceb52a72a38acac6bb456c11e Mon Sep 17 00:00:00 2001 From: hyandt Date: Wed, 14 Dec 2022 09:31:06 -0700 Subject: [PATCH 2/4] Fix word replacement --- .../DataSet/RTS-GMLC-6.400.xml | 2 +- applications/spark/README.md | 2 +- applications/spark/conf/spark-defaults.conf | 2 +- .../spark/conf/spark-defaults.conf.template | 2 +- applications/spark/config | 2 +- applications/spark/python.md | 10 +++---- applications/spark/r.md | 2 +- applications/spark/slurm_scripts/batch_job.sh | 2 +- .../spark/slurm_scripts/batch_jupyter.sh | 2 +- .../batch_job.sh | 2 +- .../batch_jupyter.sh | 2 +- applications/spark/spark_scripts/common.sh | 2 +- .../spark/spark_scripts/configure_spark.sh | 10 +++---- .../spark_scripts/start_spark_cluster.sh | 14 +++++----- .../spark/spark_scripts/stop_spark_cluster.sh | 2 +- .../spark/tests/batch_job.sh.template | 2 +- general/stream_benchmark/stream.c | 2 +- .../Julia-Parallel-Computing.md | 4 +-- .../Julia-Parallel-Computing.ipynb | 4 +-- slurm/multinode-task-per-core.sh | 6 ++-- slurm/source/slurm_variables | 2 +- slurm/source/stf_01.f90 | 28 +++++++++---------- 22 files changed, 53 insertions(+), 53 deletions(-) diff --git a/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml b/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml index aaeff3636..63c270d47 100755 --- a/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml +++ b/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml @@ -1,4 +1,4 @@ - + 1 2 diff --git a/applications/spark/README.md b/applications/spark/README.md index 4baec13c1..8ece7026e 100644 --- a/applications/spark/README.md +++ b/applications/spark/README.md @@ -163,7 +163,7 @@ be able to load data files in any of them: ## Debugging problems Open the Spark web UI to observe what's happening with your jobs. You will have to forward ports -8080 and 4040 of the code-examples node (first node in your SLURM allocation) through an ssh tunnel. +8080 and 4040 of the master node (first node in your SLURM allocation) through an ssh tunnel. Open your browser to http://localhost:4040 after configuring the tunnel to access the application UI. diff --git a/applications/spark/conf/spark-defaults.conf b/applications/spark/conf/spark-defaults.conf index 421c51b8b..95f6339fc 100644 --- a/applications/spark/conf/spark-defaults.conf +++ b/applications/spark/conf/spark-defaults.conf @@ -19,7 +19,7 @@ # This is useful for setting default environmental settings. # Example: -# spark.code-examples spark://code-examples:7077 +# spark.master spark://master:7077 # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer diff --git a/applications/spark/conf/spark-defaults.conf.template b/applications/spark/conf/spark-defaults.conf.template index a8598ca84..19cba6e71 100644 --- a/applications/spark/conf/spark-defaults.conf.template +++ b/applications/spark/conf/spark-defaults.conf.template @@ -19,7 +19,7 @@ # This is useful for setting default environmental settings. # Example: -# spark.code-examples spark://code-examples:7077 +# spark.master spark://master:7077 # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer diff --git a/applications/spark/config b/applications/spark/config index bb11509de..6eb94aec7 100644 --- a/applications/spark/config +++ b/applications/spark/config @@ -1,4 +1,4 @@ container = /datasets/images/apache_spark/spark_py39.sif container_instance_name = spark -code-examples_node_memory_overhead_gb = 10 +master_node_memory_overhead_gb = 10 worker_node_memory_overhead_gb = 5 diff --git a/applications/spark/python.md b/applications/spark/python.md index 1f5c911a5..25f2363ec 100644 --- a/applications/spark/python.md +++ b/applications/spark/python.md @@ -4,13 +4,13 @@ This uses ipython, which is optional. Remove the `--env` line to use the regular $ singularity run \ --env PYSPARK_DRIVER_PYTHON=ipython \ instance://spark \ - pyspark --code-examples spark://$(hostname):7077 + pyspark --master spark://$(hostname):7077 ``` Optional: check your environment to ensure that all configuration settings are correct. -Most importantly, ensure that you connected to the Spark cluster code-examples and are not in local mode. +Most importantly, ensure that you connected to the Spark cluster master and are not in local mode. pyspark prints the connection information during startup. For example: ``` -Spark context available as 'sc' (code-examples = spark://r2i7n35:7077, app id = app-20221202224041-0000). +Spark context available as 'sc' (master = spark://r2i7n35:7077, app id = app-20221202224041-0000). ``` You can dump all configuration settings with this command: ``` @@ -32,7 +32,7 @@ $ singularity run \ --env PYSPARK_DRIVER_PYTHON=jupyter \ --env PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser --port=8889 --ip=0.0.0.0" \ instance://spark \ - pyspark --code-examples spark://$(hostname):7077 + pyspark --master spark://$(hostname):7077 ``` The Jupyter process will print a URL to the terminal. You can access it from your laptop after you forward the ports through an ssh tunnel. @@ -54,7 +54,7 @@ spark = SparkSession.builder.appName("my_app").getOrCreate() ``` $ singularity run \ instance://spark \ - spark-submit --code-examples spark://$(hostname):7077 + spark-submit --master spark://$(hostname):7077 ``` Note: if your script is Python, the filename must end in `.py`. diff --git a/applications/spark/r.md b/applications/spark/r.md index db01f677f..9bf6c63dd 100644 --- a/applications/spark/r.md +++ b/applications/spark/r.md @@ -2,6 +2,6 @@ ``` $ singularity run instance://spark sparkR > library(sparklyr) -> sc = spark_connect(code-examples = paste0("spark://",Sys.info()["nodename"],":7077")) +> sc = spark_connect(master = paste0("spark://",Sys.info()["nodename"],":7077")) > df = spark_read_parquet(sc = sc, path = "my_data.parquet", memory = FALSE) ``` diff --git a/applications/spark/slurm_scripts/batch_job.sh b/applications/spark/slurm_scripts/batch_job.sh index 7be9df1cd..f3a45dc75 100755 --- a/applications/spark/slurm_scripts/batch_job.sh +++ b/applications/spark/slurm_scripts/batch_job.sh @@ -12,5 +12,5 @@ SCRIPT_DIR=~/repos/HPC/applications/spark/spark_scripts ${SCRIPT_DIR}/configure_spark.sh ${SCRIPT_DIR}/start_spark_cluster.sh # This runs an example script inside the container. -singularity run instance://spark spark-submit --code-examples spark://$(hostname):7077 /opt/spark/examples/src/main/python/pi.py 500 +singularity run instance://spark spark-submit --master spark://$(hostname):7077 /opt/spark/examples/src/main/python/pi.py 500 ${SCRIPT_DIR}/stop_spark_cluster.sh diff --git a/applications/spark/slurm_scripts/batch_jupyter.sh b/applications/spark/slurm_scripts/batch_jupyter.sh index bc73fb285..5791da674 100755 --- a/applications/spark/slurm_scripts/batch_jupyter.sh +++ b/applications/spark/slurm_scripts/batch_jupyter.sh @@ -22,6 +22,6 @@ singularity run \ --network-args \ "portmap=8889:8889" \ instance://spark \ - pyspark --code-examples spark://$(hostname):7077 + pyspark --master spark://$(hostname):7077 ${SCRIPT_DIR}/stop_spark_cluster.sh diff --git a/applications/spark/slurm_scripts_with_resource_monitoring/batch_job.sh b/applications/spark/slurm_scripts_with_resource_monitoring/batch_job.sh index 65a8aa1b2..66da511c6 100755 --- a/applications/spark/slurm_scripts_with_resource_monitoring/batch_job.sh +++ b/applications/spark/slurm_scripts_with_resource_monitoring/batch_job.sh @@ -16,7 +16,7 @@ srun collect_stats.sh . & ${SCRIPT_DIR}/configure_spark.sh ${SCRIPT_DIR}/start_spark_cluster.sh # This runs an example script inside the container. -singularity run instance://spark spark-submit --code-examples spark://$(hostname):7077 /opt/spark/examples/src/main/python/pi.py 500 +singularity run instance://spark spark-submit --master spark://$(hostname):7077 /opt/spark/examples/src/main/python/pi.py 500 ${SCRIPT_DIR}/stop_spark_cluster.sh touch shutdown diff --git a/applications/spark/slurm_scripts_with_resource_monitoring/batch_jupyter.sh b/applications/spark/slurm_scripts_with_resource_monitoring/batch_jupyter.sh index ccce0c155..19d2c65de 100755 --- a/applications/spark/slurm_scripts_with_resource_monitoring/batch_jupyter.sh +++ b/applications/spark/slurm_scripts_with_resource_monitoring/batch_jupyter.sh @@ -26,7 +26,7 @@ singularity run \ --network-args \ "portmap=8889:8889" \ instance://spark \ - pyspark --code-examples spark://$(hostname):7077 + pyspark --master spark://$(hostname):7077 ${SCRIPT_DIR}/stop_spark_cluster.sh diff --git a/applications/spark/spark_scripts/common.sh b/applications/spark/spark_scripts/common.sh index 4e868a5aa..dde285bd0 100644 --- a/applications/spark/spark_scripts/common.sh +++ b/applications/spark/spark_scripts/common.sh @@ -31,7 +31,7 @@ function get_config_variable() export CONTAINER=$(get_config_variable "container") export CONTAINER_INSTANCE_NAME=$(get_config_variable "container_instance_name") -export code-examples_NODE_MEMORY_OVERHEAD_GB=$(get_config_variable "code-examples_node_memory_overhead_gb") +export master_NODE_MEMORY_OVERHEAD_GB=$(get_config_variable "master_node_memory_overhead_gb") export WORKER_NODE_MEMORY_OVERHEAD_GB=$(get_config_variable "worker_node_memory_overhead_gb") function get_memory_gb() diff --git a/applications/spark/spark_scripts/configure_spark.sh b/applications/spark/spark_scripts/configure_spark.sh index 8c1b2486a..7a3b21f84 100755 --- a/applications/spark/spark_scripts/configure_spark.sh +++ b/applications/spark/spark_scripts/configure_spark.sh @@ -12,13 +12,13 @@ SLURM_JOB_IDS=() # Check for errors in user input. Exit on error. function check_errors() { - code-examples_memory_gb=$(( 1 + ${DRIVER_MEMORY_GB} )) + master_memory_gb=$(( 1 + ${DRIVER_MEMORY_GB} )) if [ ${ENABLE_HISTORY_SERVER} = true ]; then - (( code-examples_memory_gb += 1 )) + (( master_memory_gb += 1 )) fi - if [ ${code-examples_memory_gb} -gt ${code-examples_NODE_MEMORY_OVERHEAD_GB} ]; then - error "code-examples_node_memory_overhead_gb=${code-examples_NODE_MEMORY_OVERHEAD_GB} is too small." \ + if [ ${master_memory_gb} -gt ${master_NODE_MEMORY_OVERHEAD_GB} ]; then + error "master_node_memory_overhead_gb=${master_NODE_MEMORY_OVERHEAD_GB} is too small." \ "Increase it or reduce driver_memory_gb=${DRIVER_MEMORY_GB}" fi } @@ -40,7 +40,7 @@ function config_executors() memory_gb_by_node=() lowest_memory_gb=0 for node_mem in $(cat ${CONFIG_DIR}/conf/worker_memory); do - mem=$(( ${node_mem} - ${code-examples_NODE_MEMORY_OVERHEAD_GB} )) + mem=$(( ${node_mem} - ${master_NODE_MEMORY_OVERHEAD_GB} )) if [ ${lowest_memory_gb} -eq 0 ] || [ ${node_mem} -lt ${lowest_memory_gb} ]; then lowest_memory_gb=${mem} fi diff --git a/applications/spark/spark_scripts/start_spark_cluster.sh b/applications/spark/spark_scripts/start_spark_cluster.sh index 7ec80d85a..616f51007 100755 --- a/applications/spark/spark_scripts/start_spark_cluster.sh +++ b/applications/spark/spark_scripts/start_spark_cluster.sh @@ -34,26 +34,26 @@ function start_containers() function start_spark_processes() { - code-examples_node=$(hostname | tr -d '\n') - spark_cluster=spark://${code-examples_node}:7077 + master_node=$(hostname | tr -d '\n') + spark_cluster=spark://${master_node}:7077 - exec_spark_process start-code-examples.sh + exec_spark_process start-master.sh check_history_server_enabled if [ $? -eq 0 ]; then exec_spark_process start-history-server.sh fi - ${SCRIPT_DIR}/start_spark_worker.sh ${CONFIG_DIR} ${code-examples_NODE_MEMORY_OVERHEAD_GB} ${spark_cluster} + ${SCRIPT_DIR}/start_spark_worker.sh ${CONFIG_DIR} ${master_NODE_MEMORY_OVERHEAD_GB} ${spark_cluster} ret=$? if [[ $ret -ne 0 ]]; then - echo "Error: Failed to start Spark worker on the code-examples node: ${ret}" + echo "Error: Failed to start Spark worker on the master node: ${ret}" exit $ret fi - echo "Started Spark processes on code-examples node ${code-examples_node}" + echo "Started Spark processes on master node ${master_node}" # Spark does provide a way to start all nodes at once: start-workers.sh. # But that doesn't allow specifying memory for each node independently. for node_name in $(cat ${CONFIG_DIR}/conf/workers); do - if [[ $node_name != ${code-examples_node} ]]; then + if [[ $node_name != ${master_node} ]]; then ssh ${USER}@${node_name} ${SCRIPT_DIR}/start_spark_worker.sh \ ${CONFIG_DIR} ${WORKER_NODE_MEMORY_OVERHEAD_GB} ${spark_cluster} ret=$? diff --git a/applications/spark/spark_scripts/stop_spark_cluster.sh b/applications/spark/spark_scripts/stop_spark_cluster.sh index a7a8fcd02..80703fe66 100755 --- a/applications/spark/spark_scripts/stop_spark_cluster.sh +++ b/applications/spark/spark_scripts/stop_spark_cluster.sh @@ -46,7 +46,7 @@ fi # scripts ssh to each worker node. It doesn't happen in our ssh commands. # Workaround the issue by stopping the Spark worker inside stop_container.sh. # singularity exec instance://${CONTAINER_INSTANCE_NAME} stop-all.sh -singularity exec instance://${CONTAINER_INSTANCE_NAME} stop-code-examples.sh +singularity exec instance://${CONTAINER_INSTANCE_NAME} stop-master.sh for node_name in $(cat ${CONFIG_DIR}/conf/workers); do ssh ${USER}@${node_name} ${SCRIPT_DIR}/stop_container.sh ${CONFIG_DIR} done diff --git a/applications/spark/tests/batch_job.sh.template b/applications/spark/tests/batch_job.sh.template index 95044fe01..c582ec576 100644 --- a/applications/spark/tests/batch_job.sh.template +++ b/applications/spark/tests/batch_job.sh.template @@ -50,7 +50,7 @@ if [ ${ret} -ne 0 ]; then exit 1 fi -singularity run instance://spark spark-submit --code-examples spark://$(hostname):7077 ../test_job.py +singularity run instance://spark spark-submit --master spark://$(hostname):7077 ../test_job.py ret=$? if [ ${ret} -ne 0 ]; then echo "Error: Failed to run test_job.py: ${ret}" diff --git a/general/stream_benchmark/stream.c b/general/stream_benchmark/stream.c index 3407fadc3..93e1e0324 100644 --- a/general/stream_benchmark/stream.c +++ b/general/stream_benchmark/stream.c @@ -247,7 +247,7 @@ main() printf(HLINE); #pragma omp parallel { -#pragma omp code-examples +#pragma omp master { k = omp_get_num_threads(); printf ("Number of Threads requested = %i\n",k); diff --git a/languages/julia/julia-tutorial/Julia-Parallel-Computing.md b/languages/julia/julia-tutorial/Julia-Parallel-Computing.md index eb25320f3..643e0c216 100644 --- a/languages/julia/julia-tutorial/Julia-Parallel-Computing.md +++ b/languages/julia/julia-tutorial/Julia-Parallel-Computing.md @@ -377,7 +377,7 @@ Each Julia process is identified by a (64-bit) integer. We can get a list of all procs() = [1, 2, 3] -There is a distinction between the original Julia process and those we launched. The original Julia process is often called the **code-examples** process and always has id equal to 1. The launched processes are called **workers**. We can obtain a list of workers with the `workers` function: +There is a distinction between the original Julia process and those we launched. The original Julia process is often called the **master** process and always has id equal to 1. The launched processes are called **workers**. We can obtain a list of workers with the `workers` function: ```julia @@ -548,7 +548,7 @@ function run_mci_rc() end; ``` -Here we create a `RemoteChannel` on the code-examples process, divide the computationally intensive `integrator` function into two calls and remotely execute them on the worker processes. Then we start a task on the code-examples process to accumulate the values and call fetch to wait for and retrieve the result. +Here we create a `RemoteChannel` on the master process, divide the computationally intensive `integrator` function into two calls and remotely execute them on the worker processes. Then we start a task on the master process to accumulate the values and call fetch to wait for and retrieve the result. ```julia diff --git a/languages/julia/julia-tutorial/source-notebooks/Julia-Parallel-Computing.ipynb b/languages/julia/julia-tutorial/source-notebooks/Julia-Parallel-Computing.ipynb index ea9555f31..a3b63344d 100644 --- a/languages/julia/julia-tutorial/source-notebooks/Julia-Parallel-Computing.ipynb +++ b/languages/julia/julia-tutorial/source-notebooks/Julia-Parallel-Computing.ipynb @@ -1008,7 +1008,7 @@ } }, "source": [ - "There is a distinction between the original Julia process and those we launched. The original Julia process is often called the **code-examples** process and always has id equal to 1. The launched processes are called **workers**. We can obtain a list of workers with the `workers` function:" + "There is a distinction between the original Julia process and those we launched. The original Julia process is often called the **master** process and always has id equal to 1. The launched processes are called **workers**. We can obtain a list of workers with the `workers` function:" ] }, { @@ -1606,7 +1606,7 @@ } }, "source": [ - "Here we create a `RemoteChannel` on the code-examples process, divide the computationally intensive `integrator` function into two calls and remotely execute them on the worker processes. Then we start a task on the code-examples process to accumulate the values and call fetch to wait for and retrieve the result." + "Here we create a `RemoteChannel` on the master process, divide the computationally intensive `integrator` function into two calls and remotely execute them on the worker processes. Then we start a task on the master process to accumulate the values and call fetch to wait for and retrieve the result." ] }, { diff --git a/slurm/multinode-task-per-core.sh b/slurm/multinode-task-per-core.sh index 42a8b6cb1..c2b749f6b 100755 --- a/slurm/multinode-task-per-core.sh +++ b/slurm/multinode-task-per-core.sh @@ -18,8 +18,8 @@ USAGE: sbatch –A -N multinode-task-per-core.sh PROCS=$(($SLURM_NNODES * $SLURM_CPUS_ON_NODE)) # Number of CPUs * number of nodes -# code-examples node in jobs with N > 1 runs these -echo "I am node $SLURMD_NODENAME and I am the code-examples node of this job with ID $SLURM_NODEID" +# master node in jobs with N > 1 runs these +echo "I am node $SLURMD_NODENAME and I am the master node of this job with ID $SLURM_NODEID" echo "There are $SLURM_NNODES nodes in this job, and each has $SLURM_CPUS_ON_NODE cores, for a total of $PROCS cores." printf "Let's get each node in the job to introduce itself:\n\n" @@ -39,7 +39,7 @@ echo :<<++++ Example Output -I am node r5i0n13 and I am the code-examples node of this job with ID 0 +I am node r5i0n13 and I am the master node of this job with ID 0 There are 2 nodes in this job, and each has 36 cores, for a total of 72 cores. Let's get each node in the job to introduce itself: diff --git a/slurm/source/slurm_variables b/slurm/source/slurm_variables index 38824b99b..d1ab85a42 100644 --- a/slurm/source/slurm_variables +++ b/slurm/source/slurm_variables @@ -149,7 +149,7 @@ OUTPUT ENVIRONMENT VARIABLES Job array's index step size. SLURM_ARRAY_JOB_ID - Job array's code-examples job ID number. + Job array's master job ID number. SLURM_CLUSTER_NAME Name of the cluster on which the job is executing. diff --git a/slurm/source/stf_01.f90 b/slurm/source/stf_01.f90 index 8fe157c74..c6046dc4b 100644 --- a/slurm/source/stf_01.f90 +++ b/slurm/source/stf_01.f90 @@ -102,7 +102,7 @@ module mympi ! use mpi include "mpif.h" integer numnodes,myid,mpi_err - integer, parameter::mpi_code-examples=0 + integer, parameter::mpi_master=0 integer status(MPI_STATUS_SIZE) end module !********************* @@ -129,7 +129,7 @@ program stommel call MPI_COMM_SIZE( MPI_COMM_WORLD, numnodes, mpi_err ) call MPI_COMM_RANK( MPI_COMM_WORLD, myid, mpi_err ) ! get the input. see above for typical values - if(myid .eq. mpi_code-examples)then + if(myid .eq. mpi_master)then instr="200 200" ; read(instr,*)nx,ny instr="2000000 2000000" ; read(instr,*)lx,ly instr="1.0e-9 2.25e-11 3.0e-6" ; read(instr,*)alpha,beta,gamma @@ -143,14 +143,14 @@ program stommel endif endif !send the data to other processors - call MPI_BCAST(nx, 1,MPI_INTEGER, mpi_code-examples,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(ny, 1,MPI_INTEGER, mpi_code-examples,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(steps,1,MPI_INTEGER, mpi_code-examples,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(lx, 1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(ly, 1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(alpha,1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(beta, 1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) - call MPI_BCAST(gamma,1,MPI_DOUBLE_PRECISION,mpi_code-examples,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(nx, 1,MPI_INTEGER, mpi_master,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(ny, 1,MPI_INTEGER, mpi_master,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(steps,1,MPI_INTEGER, mpi_master,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(lx, 1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(ly, 1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(alpha,1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(beta, 1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) + call MPI_BCAST(gamma,1,MPI_DOUBLE_PRECISION,mpi_master,MPI_COMM_WORLD,mpi_err) ! calculate the constants for the calculations dx=lx/(nx+1) dy=ly/(ny+1) @@ -170,7 +170,7 @@ program stommel dj=real(nx,b8)/real(numnodes,b8) j1=nint(1.0_b8+myid*dj) j2=nint(1.0_b8+(myid+1)*dj)-1 - if(myid == mpi_code-examples)write(*,'("rows= ",i4)')numnodes + if(myid == mpi_master)write(*,'("rows= ",i4)')numnodes write(*,101)myid,i1,i2,j1,j2 101 format("myid= ",i4,3x,& " (",i3," <= i <= ",i3,") , ", & @@ -196,11 +196,11 @@ program stommel call do_transfer(psi,i1,i2,j1,j2) ! write(*,*)myid,i,mydiff call MPI_REDUCE(mydiff,diff,1,MPI_DOUBLE_PRECISION, & - MPI_SUM,mpi_code-examples,MPI_COMM_WORLD,mpi_err) - if(myid .eq. mpi_code-examples .and. mod(i,iout) .eq. 0)write(*,'(i6,1x,g20.10)')i,diff + MPI_SUM,mpi_master,MPI_COMM_WORLD,mpi_err) + if(myid .eq. mpi_master .and. mod(i,iout) .eq. 0)write(*,'(i6,1x,g20.10)')i,diff enddo t2=MPI_Wtime() - if(myid .eq. mpi_code-examples)write(*,'("run time =",f10.2)')t2-t1 + if(myid .eq. mpi_master)write(*,'("run time =",f10.2)')t2-t1 !call write_grid(psi,i1,i2,j1,j2) call MPI_Finalize(mpi_err) end program stommel From eb41bbbb4a39054611e541c324771c3205233bcf Mon Sep 17 00:00:00 2001 From: hyandt Date: Wed, 14 Dec 2022 09:45:41 -0700 Subject: [PATCH 3/4] Revert files --- .../DataSet/RTS-GMLC-6.400.xml | 12 ++++++------ applications/spark/conf/spark-env.sh | 10 +++++----- applications/spark/spark_scripts/common.sh | 2 +- applications/spark/spark_scripts/configure_spark.sh | 6 +++--- .../spark/spark_scripts/start_spark_cluster.sh | 2 +- slurm/multinode-task-per-core.sh | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml b/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml index 63c270d47..70dcb54fe 100755 --- a/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml +++ b/applications/plexos-hpc-walkthrough/DataSet/RTS-GMLC-6.400.xml @@ -1409,7 +1409,7 @@ true true 88 - code-examples switch for computation of reliability indices LOLP, LOLE, etc. + Master switch for computation of reliability indices LOLP, LOLE, etc. 107 @@ -1423,7 +1423,7 @@ true true 1734 - code-examples switch for computation of reliability indices LOLP, LOLE, etc across multiple areas. + Master switch for computation of reliability indices LOLP, LOLE, etc across multiple areas. 108 @@ -1604,7 +1604,7 @@ true true 88 - code-examples switch for computation of reliability indices LOLP, LOLE, etc. + Master switch for computation of reliability indices LOLP, LOLE, etc. 121 @@ -1618,7 +1618,7 @@ true true 1734 - code-examples switch for computation of reliability indices LOLP, LOLE, etc across multiple areas. + Master switch for computation of reliability indices LOLP, LOLE, etc across multiple areas. 122 @@ -10966,7 +10966,7 @@ Nested Reserves 0 -1 - code-examples Reserves + Master Reserves 0 -1 false @@ -133924,4 +133924,4 @@ 0 0 - \ No newline at end of file + \ No newline at end of file diff --git a/applications/spark/conf/spark-env.sh b/applications/spark/conf/spark-env.sh index 85e9b2305..b7f09b92f 100644 --- a/applications/spark/conf/spark-env.sh +++ b/applications/spark/conf/spark-env.sh @@ -45,20 +45,20 @@ # - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN # Options for the daemons used in the standalone deploy mode -# - SPARK_code-examples_HOST, to bind the code-examples to a different IP address or hostname -# - SPARK_code-examples_PORT / SPARK_code-examples_WEBUI_PORT, to use non-default ports for the code-examples -# - SPARK_code-examples_OPTS, to set config properties only for the code-examples (e.g. "-Dx=y") +# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname +# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master +# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") # - SPARK_WORKER_CORES, to set the number of cores to use on this machine # - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker # - SPARK_WORKER_DIR, to set the working directory of worker processes # - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") -# - SPARK_DAEMON_MEMORY, to allocate to the code-examples, worker and history server themselves (default: 1g). +# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") # - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons -# - SPARK_PUBLIC_DNS, to set the public dns name of the code-examples or workers +# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers # Options for launcher # - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y") diff --git a/applications/spark/spark_scripts/common.sh b/applications/spark/spark_scripts/common.sh index dde285bd0..d3a85700a 100644 --- a/applications/spark/spark_scripts/common.sh +++ b/applications/spark/spark_scripts/common.sh @@ -31,7 +31,7 @@ function get_config_variable() export CONTAINER=$(get_config_variable "container") export CONTAINER_INSTANCE_NAME=$(get_config_variable "container_instance_name") -export master_NODE_MEMORY_OVERHEAD_GB=$(get_config_variable "master_node_memory_overhead_gb") +export MASTER_NODE_MEMORY_OVERHEAD_GB=$(get_config_variable "master_node_memory_overhead_gb") export WORKER_NODE_MEMORY_OVERHEAD_GB=$(get_config_variable "worker_node_memory_overhead_gb") function get_memory_gb() diff --git a/applications/spark/spark_scripts/configure_spark.sh b/applications/spark/spark_scripts/configure_spark.sh index 7a3b21f84..150dde31b 100755 --- a/applications/spark/spark_scripts/configure_spark.sh +++ b/applications/spark/spark_scripts/configure_spark.sh @@ -17,8 +17,8 @@ function check_errors() (( master_memory_gb += 1 )) fi - if [ ${master_memory_gb} -gt ${master_NODE_MEMORY_OVERHEAD_GB} ]; then - error "master_node_memory_overhead_gb=${master_NODE_MEMORY_OVERHEAD_GB} is too small." \ + if [ ${master_memory_gb} -gt ${MASTER_NODE_MEMORY_OVERHEAD_GB} ]; then + error "master_node_memory_overhead_gb=${MASTER_NODE_MEMORY_OVERHEAD_GB} is too small." \ "Increase it or reduce driver_memory_gb=${DRIVER_MEMORY_GB}" fi } @@ -40,7 +40,7 @@ function config_executors() memory_gb_by_node=() lowest_memory_gb=0 for node_mem in $(cat ${CONFIG_DIR}/conf/worker_memory); do - mem=$(( ${node_mem} - ${master_NODE_MEMORY_OVERHEAD_GB} )) + mem=$(( ${node_mem} - ${MASTER_NODE_MEMORY_OVERHEAD_GB} )) if [ ${lowest_memory_gb} -eq 0 ] || [ ${node_mem} -lt ${lowest_memory_gb} ]; then lowest_memory_gb=${mem} fi diff --git a/applications/spark/spark_scripts/start_spark_cluster.sh b/applications/spark/spark_scripts/start_spark_cluster.sh index 616f51007..701c21f40 100755 --- a/applications/spark/spark_scripts/start_spark_cluster.sh +++ b/applications/spark/spark_scripts/start_spark_cluster.sh @@ -42,7 +42,7 @@ function start_spark_processes() if [ $? -eq 0 ]; then exec_spark_process start-history-server.sh fi - ${SCRIPT_DIR}/start_spark_worker.sh ${CONFIG_DIR} ${master_NODE_MEMORY_OVERHEAD_GB} ${spark_cluster} + ${SCRIPT_DIR}/start_spark_worker.sh ${CONFIG_DIR} ${MASTER_NODE_MEMORY_OVERHEAD_GB} ${spark_cluster} ret=$? if [[ $ret -ne 0 ]]; then echo "Error: Failed to start Spark worker on the master node: ${ret}" diff --git a/slurm/multinode-task-per-core.sh b/slurm/multinode-task-per-core.sh index c2b749f6b..04646f785 100755 --- a/slurm/multinode-task-per-core.sh +++ b/slurm/multinode-task-per-core.sh @@ -18,7 +18,7 @@ USAGE: sbatch –A -N multinode-task-per-core.sh PROCS=$(($SLURM_NNODES * $SLURM_CPUS_ON_NODE)) # Number of CPUs * number of nodes -# master node in jobs with N > 1 runs these +# Master node in jobs with N > 1 runs these echo "I am node $SLURMD_NODENAME and I am the master node of this job with ID $SLURM_NODEID" echo "There are $SLURM_NNODES nodes in this job, and each has $SLURM_CPUS_ON_NODE cores, for a total of $PROCS cores." printf "Let's get each node in the job to introduce itself:\n\n" From aba975255b482eae95990833c8557577712d337d Mon Sep 17 00:00:00 2001 From: hyandt Date: Wed, 14 Dec 2022 09:47:36 -0700 Subject: [PATCH 4/4] Revert file --- general/markdown/RenameStep1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/general/markdown/RenameStep1.md b/general/markdown/RenameStep1.md index f02317038..4afa9a9fb 100644 --- a/general/markdown/RenameStep1.md +++ b/general/markdown/RenameStep1.md @@ -3,5 +3,5 @@ This content is more to summarize formatting considerations, and possibly a general organization for how content might be laid out. Naturally, this will need to be adapted to the training or tutorial at hand. -A reasonable high-level layout is to use the top-level README.md as a code-examples navigation, with a list of sequential steps to be followed that are linked to their respective detailed documents. +A reasonable high-level layout is to use the top-level README.md as a master navigation, with a list of sequential steps to be followed that are linked to their respective detailed documents.