diff --git a/cluster-setup/Dockerfile b/cluster-setup/Dockerfile
new file mode 100644
index 000000000..a22e82b0c
--- /dev/null
+++ b/cluster-setup/Dockerfile
@@ -0,0 +1,11 @@
+ARG PYTHON_VERSION="bookworm"
+
+FROM python:${PYTHON_VERSION} AS base
+
+RUN apt update -y && apt upgrade -y
+
+RUN pip install --upgrade pip
+
+RUN pip install pyyaml
+
+CMD ["/bin/bash", "-l"]
diff --git a/cluster-setup/README.md b/cluster-setup/README.md
index 38c85a845..9d2068212 100644
--- a/cluster-setup/README.md
+++ b/cluster-setup/README.md
@@ -1,9 +1,447 @@
 # CfE Cluster Setup
 
-This repository contains code and instructions for setting up a multi-host compute cluster.
+This directory contains code and instructions for setting up a multi-host compute cluster.
+
+## Deployment to Octomore
+
+This procedure, as of December 12, 2023, looks like the following.
+
+### Before you wipe the old machine
+
+If you're planning to restore the data from the old machine after the deployment,
+make sure your backups are in order.  System backups are typically kept using `rsnapshot`,
+and a backup of the Kive PostgreSQL database is kept using `barman`.  For example,
+on our production server, these are kept on a NAS mounted at `/media/dragonite`.
+
+Optionally, if your backups are on a physical drive connected to the machine, to avoid
+accidentally damaging or altering the backups, you could physically remove them until the 
+setup is complete and you're ready to restore data from them.
+
+There a few files that are worth preserving in particular and having available to you
+during the deployment process:
+
+* Preserve copies of your system's `/etc/passwd`, `/etc/group`, and `/etc/shadow`.  This 
+  information will be used to populate the new system with the same users and groups
+  from the old system.
+* Create a dump of the Kive PostgreSQL database using `pg_dumpall`.  As the upgrade may
+  involve moving to a newer version of PostgreSQL, we likely can't use the Barman
+  backups to migrate from; thus we must do it the "old-fashioned" way.
+* Preserve a copy of `/etc/kive/kive_apache.conf` and/or `/etc/kive/kive_purge.conf`.  
+  These files contain the database password used by Kive (via `apache2`) to access PostgreSQL.  
+  You can also just preserve this password and discard the files, as the files will be 
+  recreated by Ansible.
+* Preserve a copy of the `barman` user's `.pgpass` file.  This contains the passwords
+  used by the `barman` and `streaming_barman` users when connecting to PostgreSQL,
+  and keeping these makes it easier to get the database set back up after importing
+  the database from the old system.  Likewise here you can also just preserve the passwords
+  and discard the file.  (Note that this file will typically *not* be present in the `rsnapshot`
+  backups, as the Barman user's home directory is in `/var`, which is not backed up.)
+
+### Install Ubuntu and do basic network setup on the head node
+
+First, manually install Ubuntu Jammy on the head node using an Ubuntu live USB drive.
+At most points, follow the defaults.  Some places where you need to fill in some details:
+
+- Create a user with username `ubuntu` when prompted during installation.  This will be
+  our "bootstrap" user.
+- Choose an appropriate system name for the computer, e.g. "octomore".
+- Choose a root drive.  As of the time of writing, there is a 120GB SSD on the system; this
+  is an appropriate choice for the root drive.
+- Manually set up the LAN-facing interface (probably `eno0`) with IP address 192.168.69.86,
+  subnet 192.168.68.0/23, gateway 192.168.68.1, and DHCP server 192.168.168.101.
+- Enable SSH when prompted.  You don't need to import any identity at this point.
+
+Note that the completion screen isn't super obvious, so keep an eye for a completion message 
+at the top left of the screen at the end of the process.  Once this is done, you can interact 
+with the head node via SSH.  
+
+Next, upload the contents of [initialization/head] to the server and run `head_configuration.bash`
+using `sudo`.
+This sets up the root user's SSH key and `/etc/hosts`, and installs Ansible on the head node.  
+Accept the defaults whenever it asks which services should be restarted.
+Now that Ansible is available on the root node, most of the rest of the procedure will be done
+using Ansible playbooks defined in the [deployment] directory.  Copy the `cluster-setup` directory 
+to the head node using the `deploy_cluster_setup.bash` script, placing it in a sensible location 
+with the appropriate permissions.  If you make changes, you can also use the same script
+to keep them synchronized between your workstation and the head node.
+
+#### Prepare Ansible configuration
+
+Go to the `deployment/group_vars` directory and create an `all.yaml` file from the
+`octomore_template.yaml` file by copying and filling in some details.
+
+For the passwords, you can use a password generator to generate new passwords and secret keys; 
+however, it makes sense to use the same PostgreSQL passwords as on the old system.  
+These passwords are:
+
+* `kive_db_password`: this is the one preserved from `/etc/kive/kive_apache.conf` 
+or `/etc/kive/kive_purge.conf`.
+* `barman_password`: this is in the `barman` user's `.pgpass` file.
+* `streaming_barman_password`: this is also in the `barman` user's `.pgpass` file.
+
+Some other notable settings that you may need to adjust:
+
+* `kive_allowed_hosts`: this is a JSON-formatted list of IP addresses/URLs that the
+web server will respond to requests on.
+* `kive_subject_prefix`: this will be prepended to the emails sent by the Kive system.
+It's a good idea to include some details on this system, e.g. "Kive server on Octomore",
+or "Kive server on developer workstation".
+* `kive_purge_start`: sets the threshold for the Kive purge task to perform file cleanup.
+* `kive_purge_stop`: sets the stopping threshold for this Kive purge task; that is, a 
+purge will stop when the remaining files' total size is under this threshold.
+* `kive_log_level`: the logging level, as understood by [Django's logging utilities][DjangoLogging],
+used by the purge task.
+
+Then go to `deployment/` and create an `ansible.cfg` from one of the provided templates, 
+probably `ansible_octomore.cfg`.  These files will be necessary for Ansible to work.
+
+> Note: all playbooks should be run using `sudo`!
+
+[DjangoLogging]: https://docs.djangoproject.com/en/4.0/topics/logging/
+
+#### General preliminary setup
+
+The first playbook we will run sets up the `/data` partition, so the first thing we do
+is find the `/dev/disk/by-id/` entry that corresponds to the drive you want to use as `/data`
+and put the *basename* (i.e. the name of the soft link in the directory without the 
+`/dev/disk/by-id/` part of the path) into `group_vars/all.yml` as the lone entry in the 
+`data_physical_volumes` list.  (Or, if you wish to use several volumes combined into 
+one logical volume, put all their names in this list.)  
+
+> If any drives are already recognized by LVM from a previous system, you should 
+> delete the logical volumes, volume groups, and physical volumes associated with them.  
+> Details of how to do so may be found in [the LVM documentation][UbuntuLVMDocs].
+> If there are any [mdadm][https://raid.wiki.kernel.org/index.php/A_guide_to_mdadm]
+> RAID arrays on these drives, you may also need to shut those down first using
+> `mdadm --stop [array device]`.
+
+[UbuntuLVMDocs]: https://manpages.ubuntu.com/manpages/jammy/en/man8/lvm.8.html
+
+Now we can run the playbook `octomore_preliminary_setup.yaml`.  This sets up the `/data` partition,
+prepares some other system stuff on the head node, and configures the internal-facing networking.
+With this in place, the playbook should set up an `ext4` volume at `/data` on the drive 
+you specified.
+
+#### Set up your backup drive
+
+Next, set up a backup drive for your system.  A sample of how this was done for Octomore
+is detailed in `create_backup_filesystem.yaml`.  On another server you might use a 
+NAS-based backup solution instead.  The goal in the end is to have a backup drive mounted 
+at the path specified in your `group_vars` as `kive_backup_path`; by default this would 
+be `/media/backup`.
+
+### Install Ubuntu on the compute nodes
+
+At this point, go back into the server room and install Ubuntu Jammy on the compute nodes.
+These machines only have one hard drive, and their ethernet should automatically be set up
+by default (the head node provides NAT and DHCP), so this should be a very straightforward
+installation.  Again, create a user with username `ubuntu` to be the bootstrap user.
 
+Fetch the SSH public key generated by the root user on the head node during the running of
+`head_configuration.bash` and place it in the [initialization/worker] directory on the 
+head node as `head_node_root_id_ed25519.pub` (don't commit this file to source control;
+it isn't a security risk, but it isn't needed and might cause confusion later).  Make an 
+appropriate `/etc/hosts` file for the worker nodes and place it in [initialization/worker] 
+as `cluster_hosts`; appropriate templates for both Octomore and Bulbasaur are in that 
+directory as `cluster_hosts_octomore` and `cluster_hosts_bulbasaur` respectively, so you can
+copy one of those to `cluster_hosts` if you don't need anything customized.
 
-# Test Environment
+Copy the contents of the [initialization/worker] directory to each compute node, 
+including the aforementioned SSH public key.  Then, run `worker_configuration.bash` using 
+`sudo`, which will install the necessary packages and set up the necessary SSH access for 
+the node to be used with Ansible.
+
+### Annoying detour: reassign the bootstrap user's UID and GID
+
+At this point, your `ubuntu` user on all the machines likely have a UID and GID of 1000.
+This may conflict with one of the user accounts that will later be
+imported into this machine.  If this is the case, you can run `reassign_bootstrap_user_uid.yaml`.  
+You may need to create a *second* bootstrap user to do this, as running the playbook as `ubuntu` 
+may fail because the user is currently being used (even if you use `sudo`).  This second bootstrap
+user can be removed right after this playbook is done, and you can proceed again as the `ubuntu`
+user.
+
+### Import users and groups from the old system
+
+The next playbook to run imports users from the old system.  First, a YAML file must be prepared
+using `export_users_and_groups.py` from the old system's `/etc/shadow`, `/etc/passwd`, and 
+`/etc/group`.  (A Dockerfile and docker compose file are provided in this directory if you 
+need a simple environment with Python 3 to run the script.)  Next, run
+
+    sudo ansible-playbook --extra-vars "@[name of the produced YAML file]" import_users.yaml
+
+This will import user accounts into the head node.  (These will later be synchronized to the
+compute node as part of a subsequent playbook.)
+
+From here, you can lock and expire the `ubuntu` user and start using one of the just-imported accounts,
+if you have one.  Make sure that your uploaded `cluster-setup` directory is accessible by
+the account you're using if you do so.  The `lock_bootstrap_user.yaml` playbook can do this;
+modify the `user_name` variable if necessary.
+
+### Get SSL credentials for the webserver
+
+Before you install Kive in the next step, you must get the SSL credentials for the server.
+These must be acquired securely from IT or within the software group, and placed into the 
+[deployment] directory.  *DO NOT* commit these files to source!
+
+The files needed are:
+
+* `DigiCertCA.crt`: the DigiCert certificate authority (CA) key, which specifies that DigiCert
+  issued the key.
+* `star_cfe.crt`: the wildcard certificate issued by DigiCert, which certifies that this server
+  belongs to the `cfenet.ubc.ca` or `bccfe.ca` domain.
+* `star_cfe.key`: our private signing key, used to issue a public key for HTTPS connections.
+
+These will then be used in the next step to configure Apache.
+
+### Set up network drives
+
+Our compute server also requires two network mounts, for `macdatafile` and `RAW_DATA`, in
+order for MiCall to run.  The playbook `mount_network_drives.yaml` sets these up; fill
+in the required variables in `group_vars/all.yaml`; their names and dummy values are in
+`group_vars/octomore_template.yaml`.
+
+*TO DO*: this playbook can be updated to use the `mount_network_drives` role.
+
+#### Changing network drive permissions
+
+Originally Octomore's network drives were mounted read-write on all nodes, and 
+Bulbasaur's network drives were mounted read-only.  However, after discussion,
+we decided it was likely best to mount them read-write only on Octomore's head node
+and read-only on its compute nodes, as we believe only the head node needs to write to 
+these drives when it runs MiCall Watcher.
+
+It also makes sense to restrict all of Bulbasaur's nodes to read-only access, but if 
+Bulbasaur ever needs to be used for clinical work, we would need to change the permissions 
+(and install MiCall Watcher).
+
+If it turns out that we were incorrect, we would need to restore all of Octomore's
+nodes to having read-write permissions on both network drives.
+
+The playbooks `network_drives_standard.yaml`, `network_drives_read_only.yaml`, 
+and `network_drives_read_write.yaml` were written to make these configuration changes
+whenever necessary.
+
+### Install Kive
+
+With all of that table-setting in place, the main playbook to run is `kive_setup.yml`.  This is
+the "main" playbook, and will take longer to run.
+
+At this point, you should have a fresh, "empty" server, with Kive running.  Several 
+`systemd`-based background tasks that perform Kive cleanup and backups should also be 
+in place.  If that's your goal, then you can stop here.
+
+### Install FastTree
+
+Our Phylowatch service requires [FastTree] 2.1.9 to be installed on the cluster (at the time
+of writing).  This is an older version so the binaries are not directly available on the
+FastTree website; rather, we must compile it from [the source code][FastTreeSourceCode].  
+At the time of writing, the source code is available on their website, but if this ever
+disappears, we maintain a vendored copy on macdatafile in the `Phylowatch` directory
+as `FastTree-2.1.9.c`.
+
+[FastTree]: https://microbesonline.org/fasttree/
+[FastTreeSourceCode]: https://microbesonline.org/fasttree/FastTree-2.1.9.c
+
+Put this file into the `deployment` directory on the head node, and run the 
+`install_fasttree.yaml` playbook to compile and install FastTree.
+
+### Optional (but recommended): install smartmontools
+
+To install the `smartmontools` package, which provides `smartctl`, use the
+`install_smartmontools.yaml` playbook (or simply install it using `apt`).
+
+## Restore from an old system
+
+If you are restoring an old system, make the backups available somewhere on
+your system; e.g. at `/media/old_data` or a similar mount point.
+
+### Shut down Kive and backup services
+
+First, shut down the Kive purge tasks created in the previous step:
+
+    sudo systemctl stop kive_purge.timer
+    sudo systemctl stop kive_purge_synch.timer
+
+Next, shut down the backup tasks that were created in the previous step:
+
+    sudo systemctl stop barman_backup.timer
+    sudo systemctl stop rsnapshot_alpha.timer
+    sudo systemctl stop rsnapshot_beta.timer
+    sudo systemctl stop rsnapshot_gamma.timer
+
+Barman installs a cron job by default at the system level.  For now, disable this
+by commenting out the entry in `/etc/cron.d/barman`.
+
+Finally, shut down Kive itself by shutting down the PostgreSQL database and 
+webserver:
+
+    sudo systemctl stop apache2
+    sudo systemctl stop postgresql@14-main
+
+### Annoying detour 2: set the system locale to "Canada English"
+
+At this point in the Octomore migration, it was discovered that the old database
+contents would not properly restore to the new database due to problems with the 
+database locale.  The old database had as its locale `en_CA.UTF-8`, which was not
+available on the newly-upgraded Octomore.
+
+To this end, the `set_locale_to_canada.yml` playbook was used to enable this 
+locale on all nodes, and the database then restored without issue.  If this comes
+up again, use this same playbook to correct the issue.
+
+### Restoring the database
+
+Now, restore the Kive data folders from the old backups.  On our prod and dev 
+clusters this folder was `/data/kive`; use `rsync -avz` to copy this information 
+into place on your new server at wherever you set `kive_media_root` to in your
+`group-vars` (by default, `/data/kive/media_root`).  Assuming all has gone correctly 
+with importing users and groups, the ownership of the files should be as they were 
+on the old system.
+
+Next, move the just-created PostgreSQL "cluster" to a backup location (or simply
+delete it if you're very confident).  On a fresh install, the cluster is at
+`/var/lib/postgresql/14/main`.  Move this to, for example, `/var/lib/postgresql/14/main_backup`.
+Create a fresh empty cluster in the original location using `initdb`:
+
+    sudo -u postgres /usr/lib/postgresql/14/bin/initdb /var/lib/postgresql/14/main
+
+At the same time, we should also move (or delete) the Barman backups created to this point, 
+as they are inconsistent with the database that we are about to restore.  Move the Barman 
+backup folder to a backup location, and create a fresh backup folder in the same location.  
+For example, if the backup folder was at `/media/backup/BarmanDBBackup`:
+
+    sudo mv /media/backup/BarmanDBBackup /media/backup/BarmanDBBackup_original
+    sudo mkdir /media/backup/BarmanDBBackup
+    sudo chown barman:barman /media/backup/BarmanDBBackup
+
+Next you can restore the database using `psql` as the `postgres` user.  Bring up the database
+again (this time with the fresh empty cluster) and use `psql` to load the data:
+
+```
+sudo systemctl start postgresql@14-main
+sudo -u postgres psql -f [dumped file from the old system] postgres
+```
+
+Note that in the `psql` command, we specified the database `postgres`.  This must be 
+specified (it's a mandatory parameter to `psql`) but will actually be ignored.
+
+At this point, the database will have been restored to the old settings.  If you didn't
+use it before in your Ansible configuration (i.e. in `group_vars/all.yaml`), you should
+now either specify the PostgreSQL passwords preserved from the old system in 
+`/etc/kive/kive_apache.conf`, `/etc/kive/kive_purge.conf`, and the `barman` user's 
+`.pgpass`, or reset the passwords using `psql` as the `postgres` user to the ones you 
+used in your Ansible settings.
+
+With the database running and restored, bring Apache back up with `sudo systemctl start apache2`.
+If the test Kive website doesn't work, check the PostgreSQL logs for clues, and make sure
+that Apache is able to reach the database.  Make sure that the password in `/etc/kive/kive_apache.conf`
+and `/etc/kive/kive_purge.conf` is correct and working.
+
+### Restore other old user data
+
+This can be done at the leisure of each user, so long as the old backups are mounted.
+Use `rsync -avz` to move whatever user data back into place you like.
+
+### Finish setting up Barman
+
+At this point we can manually verify the last details that Barman needs to 
+run correctly.  First, reactivate the Barman cron job by uncommenting
+the entry you commented out before in `/etc/cron.d/barman`.  Then check on the 
+`barman` configuration by running, as the `barman` user,
+
+    barman check kive
+
+There may be problems with the configuration still.  If so, the Barman log at 
+`/var/log/barman/barman.log` and the PostgreSQL logs at `/var/log/postgresql`
+may be helpful in diagnosing the problems.  Some that I experienced
+while I was going through the process:
+
+* The `barman` and `streaming_barman` PostgreSQL user passwords may be incorrect,
+  resulting in the check showing failures for "PostgreSQL", "pg_basebackup compatible", 
+  and "pg_receivexlog compatible".  This happened because I didn't preserve these 
+  passwords from before I wiped out the database, so I couldn't use the same passwords 
+  for `barman` and `streaming_barman` in my Ansible configuration.
+  This can easily be remedied by changing these users' PostgreSQL passwords 
+  in `psql` (as the `postgres` system user) with the command `\password [username]`;
+  use the passwords in the `barman` system user's `.pgpass` file.
+* The "replication slot" entry in the `barman check kive` output may report a failure.
+  One possible reason for this is that `barman cron` has not run successfully yet,
+  as in the previous steps we had disabled the system-level cron job that runs this
+  every minute.  This task is what invokes `barman receive-wal`.  If this appears to
+  be the problem, you can manually invoke `barman cron` as the `barman` user.  Or, 
+  you can wait one minute for the cron job to run and see if this error clears up.
+* The output will also indicate that there are not enough backups in place, which is 
+  normal and expected at this point.  These backups will be created by the
+  `barman_backup` systemd service.
+* The check may still report a failure for "WAL archive".  This is normal, as the WAL
+  archiving must be verified for a fresh install, and will be handled below.
+
+Next, verify the WAL archiving.  To do this, as the `barman` user, run
+
+    barman switch-wal --force --archive kive
+
+This may fail at first due to a timeout, but try again if so; it's likely to succeed
+eventually if all is configured well.  Check the configuration again to confirm
+that things are ready to go.  (Ignore the error caused by there not being enough 
+backups in place.)
+
+### Restart Kive and backup services
+
+With everything in place, restart the regularly-scheduled backup `systemd` tasks
+and Kive purge tasks using `systemctl start` as the root user:
+
+* `barman_backup`
+* `rsnapshot_alpha`
+* `rsnapshot_beta`
+* `rsnapshot_gamma`
+* `kive_purge`
+* `kive_purge_synch`
+
+For example, run `sudo systemctl start barman_backup.timer` to start `barman_backup`, and
+similarly for the others.
+
+Lastly, bring Kive itself back up by bringing up:
+
+* `postgresql@14-main`
+* `apache2`
+
+[initialization/head]: initialization/head
+[initialization/worker]: initialization/worker
+[initialization]: initialization
+[deployment]: ./deployment
+
+## Test Environment
+
+We can use Multipass to bring up a test environment for development purposes, or 
+Vagrant.
+
+### Multipass
+
+The [initialization] directory contains templates and scripts for generating cloud-init
+files to use when setting up a "head" VM and a "worker" VM.
+
+For the head configuration, you must supply a YAML file containing the names and IPs of 
+the compute nodes in the same format as they appear in the Ansible `group_vars`; for example, 
+simply copy `deployment/group_vars/default_template.yml` (these values are not hugely useful
+for this test deployment anyway).  Specify this as a parameter to the `create_head_user_data.py` 
+script and it will generate a `user_data` file suitable for use with Multipass:
+
+    multipass launch --name TestHead --cloud-init [user data file you generated] --mount [path to the cluster-setup directory]:/app
+
+For the worker configuration, you must put the SSH public key generated for the root user
+on the "head node" somewhere accessible by whoever you want to run `create_worker_user_data.py`,
+and specify it as the parameter.  This creates a `user_data` file suitable for use with 
+Multipass: similarly to the above,
+
+    multipass launch --name TestWorker --cloud-init [user data file you generated] --mount [path to the cluster-setup directory]:/app
+
+These commands launch the machines and also mount the `cluster-setup` directory at `/app`
+on both nodes.  Now that both machines are online and have IP addresses, you can run
+`configure_hosts_file.bash` on the head node to configure its `/etc/hosts` file so that
+Ansible will know how to reach the worker node.
+
+### Vagrant
 
 This directory contains a Vagrantfile that describes two VMs (a head node and a
 worker node) that can be used to test Ansible playbooks or practice performing
@@ -11,56 +449,78 @@ cluster management tasks. Ansible is installed on the `head` node, and this dire
 is mounted at `/vagrant`. Playbooks can be edited from the host machine, but should
 be run from the `head` node.
 
-
-# Quickstart
-
-This will guide you through setting up your test environment and running your
-first Ansible commands. You'll need to have [Vagrant] and [VirtualBox] installed.
-
+You'll need to have [Vagrant] and [VirtualBox] or VMWare installed.
 To begin, bring up the Vagrant VMs. This will create two VMs (`head` and
 `worker`) and install Ansible on `head`.
 
     vagrant up
 
-Next, log in to `head` and move into the test environment directory. This is where
-we'll do most of our testing and practice.
+On the head node, run (as root) `setup_ssh_keys.bash` and `setup_ssh_access.bash`; this will
+install some dummy keys to enable passwordless SSH from the root user to itself,
+which is necessary for Ansible.
 
-    vagrant ssh head
-    cd /vagrant/testenv
+On the compute node, run (as root) `setup_ssh_access.bash`, which will allow the head
+node's root user to SSH into the compute node without a password.  This is also needed
+for Ansible.
 
-`ansible.cfg` contains the configuration for the test environment. Most
-importantly, it directs ansible to load its inventory from
-`testenv/inventory.ini` instead of from the default location under `/etc`.
+With both nodes running, you can use `configure_hosts_file.bash` on the head node,
+also as root, to fill in the head node's `/etc/hosts` file so that Ansible will know
+how to reach the compute node.
 
-From `./testenv`, you can run Ansible commands against the inventoried
-hosts (including the head node).
+At this point, you can log into the head node and work with the code in this directory
+at `/vagrant`.  In particular, the Ansible scripts are located in `/vagrant/deployment`.
 
-This command runs the Ansible's `ping` module against all hosts, which checks that
-they can be accessed.
+To confirm that your Ansible configuration is correct, you can run this command:
 
     ansible -m ping all
 
+This command runs the Ansible's `ping` module against all hosts, which checks that
+they can be accessed.
 
 [Vagrant]: https://www.vagrantup.com/downloads.html
 [VirtualBox]: https://www.virtualbox.org/wiki/Downloads
 
+## Using Ansible
+
+`ansible.cfg` contains the configuration for the test environment. Most
+importantly, it directs ansible to load its inventory from
+`deployment/inventory.ini` instead of from the default location under `/etc`.
+
+From `./deployment`, you can run Ansible commands against the inventoried
+hosts (including the head node).
 
-# Architecture (for lack of a better name)
+### Architecture (for lack of a better name)
 
 Ansible executes *tasks* against one or more managed machines. Tasks may also
-depend on *variables*, *files*, or *templates*. These can be grouped into *roles*.
+depend on *variables*, *files*, or *templates*. These can also be grouped into *roles*,
+which we make use of in this project to help organize our code.
+
+### Running playbooks
+
+Run playbooks using `ansible-playbook`, e.g.
 
-This project uses roles to configure servers (e.g. Slurm worker, Kive server).
+    ansible-playbook kive_setup.yml
 
+For all of our playbooks, you're intended to use `sudo` as well.
 
-# Ansible Docs
+#### Debugging a single role
 
-Essential:
+Per [this](https://stackoverflow.com/questions/38350674/ansible-can-i-execute-role-from-command-line)
+stack overflow answer, a single role can be run with the following command:
+
+    ansible <hostname> -m include_role -a name=<role name>
+
+This has more verbose output and can be run in isolation, making it suitable
+for development and debugging.
+
+### Ansible documentation
+
+#### Essential
 
 - [Concepts](https://docs.ansible.com/ansible/latest/user_guide/basic_concepts.html)
 - [Quickstart](https://docs.ansible.com/ansible/latest/user_guide/quickstart.html)
 
-Thorough:
+#### Thorough
 
 - [Playbooks](https://docs.ansible.com/ansible/2.3/playbooks.html)
 - [How to build your inventory](https://docs.ansible.com/ansible/latest/user_guide/intro_inventory.html#intro-inventory)
@@ -69,7 +529,7 @@ Thorough:
 - [Best Practices](https://docs.ansible.com/ansible/latest/user_guide/playbooks_best_practices.html#playbooks-best-practices)
 - [Interpreter Discovery](https://docs.ansible.com/ansible/latest/reference_appendices/interpreter_discovery.html#interpreter-discovery)
 
-Extended:
+#### Extended
 
 - [Installation](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#installation-guide)
 - [Become (privesc)](https://docs.ansible.com/ansible/2.3/become.html)
@@ -77,8 +537,7 @@ Extended:
 - [Asynchronous Actions and Polling](https://docs.ansible.com/ansible/2.3/playbooks_async.html)
 - [Vault](https://docs.ansible.com/ansible/2.3/playbooks_vault.html)
 
-
-# Useful modules
+#### Useful modules
 
 - [copy](https://docs.ansible.com/ansible/latest/modules/copy_module.html#copy-module)
 - [user](https://docs.ansible.com/ansible/latest/modules/user_module.html#user-module)
@@ -95,18 +554,4 @@ Extended:
 - [lineinfile](https://docs.ansible.com/ansible/latest/modules/lineinfile_module.html)
 - [blockinfile](https://docs.ansible.com/ansible/latest/modules/blockinfile_module.html#blockinfile-module)
 - [git](https://docs.ansible.com/ansible/latest/modules/git_module.html#git-module)
-- [unarchive](https://docs.ansible.com/ansible/latest/modules/unarchive_module.html)
-
-# Applying a single role
-
-Per [this](https://stackoverflow.com/questions/38350674/ansible-can-i-execute-role-from-command-line)
-stack overflow answer, a single role can be run with the following command:
-
-    ansible <hostname> -m include_role -a name=<role name>
-
-This has more verbose output and can be run in isolation, making it suitable
-for development and debugging.
-
-
-<!-- TODO(nknight): Move ansible reference into its own document -->
-<!-- TODO(nknight): Overview of roles and environments -->
+- [unarchive](https://docs.ansible.com/ansible/latest/modules/unarchive_module.html)
\ No newline at end of file
diff --git a/cluster-setup/Vagrantfile b/cluster-setup/Vagrantfile
index 88484642b..b854872e0 100644
--- a/cluster-setup/Vagrantfile
+++ b/cluster-setup/Vagrantfile
@@ -1,69 +1,93 @@
-HEAD_IP = "192.168.45.10"
-WORKER_IP = "192.168.45.11"
+HEAD_IP = "192.168.56.10"
+WORKER_IP = "192.168.56.11"
 
 # Copy the test SSH keys into `/home/vagrant/.ssh/`. Keys are copied manually
 # to allow easy SSH traffic between VMs.
-def add_keys(vm)
+def add_keys(vm, home_dir="/home/vagrant")
   vm.provision "file",
     source: "./setupfiles/vagrant_testkey",
-    destination: "/home/vagrant/.ssh/id_ed25519"
+    destination: "#{home_dir}/.ssh/id_ed25519"
   vm.provision "file",
     source: "./setupfiles/vagrant_testkey.pub",
-    destination: "/home/vagrant/.ssh/id_ed25519.pub"
+    destination: "#{home_dir}/.ssh/id_ed25519.pub"
   vm.provision "shell", inline: <<-EOS
-    chmod 600 /home/vagrant/.ssh/id_ed25519
-    chmod 644 /home/vagrant/.ssh/id_ed25519.pub
+    chmod 600 "#{home_dir}/.ssh/id_ed25519"
+    chmod 644 "#{home_dir}/.ssh/id_ed25519.pub"
   EOS
 end
 
 # Enable SSH access by copying the test SSH public key into
 # `/home/vagrant/.ssh/authorized_keys`
-def add_key_access(vm)
+def add_key_access(vm, home_dir="/home/vagrant")
   vm.provision "file",
     source: "./setupfiles/vagrant_testkey.pub",
     destination: "/tmp/vagrant_testkey.pub"
   vm.provision "shell", inline: <<-EOS
-    cat /tmp/vagrant_testkey.pub >> /home/vagrant/.ssh/authorized_keys
-    chmod 600 /home/vagrant/.ssh/authorized_keys
+    cat /tmp/vagrant_testkey.pub >> "#{home_dir}/.ssh/authorized_keys"
+    chmod 600 "#{home_dir}/.ssh/authorized_keys"
   EOS
 end
 
 # Add a synced folder to emulate the cluster's network drive.
 def add_data_dir(vm)
   Dir.mkdir("./data") if not Dir.exists?("./data")
-  vm.synced_folder "./data", "/data", mount_options: ["dmode=777", "fmode=664"]
+  # vm.synced_folder "./data", "/data", mount_options: ["dmode=777", "fmode=664"]
 end
 
 Vagrant.configure("2") do |config|
-  config.vm.box = "geerlingguy/centos8"
+  config.vm.box = "bento/ubuntu-22.04"
+
+  # Give preference to VMware when it's available.
+  config.vm.provider "vmware_desktop" do |vmw|
+    vmw.gui = false
+    vmw.memory = 8 * 1024
+    vmw.cpus = 2
+  end
   
   config.vm.provider "virtualbox" do |vb|
     vb.gui = false
     vb.memory = 8 * 1024
-    vb.cpus = 4
+    vb.cpus = 2
   end
 
   config.vm.define :head do |head|
     head.vm.hostname = "head"
-    head.vm.network "private_network", ip: HEAD_IP
+
+    # Static IPs are not supported by the VMware provider,
+    # but we can use them when using Virtualbox.
+    head.vm.network "private_network"
+    # head.vm.provider "virtualbox" do |vb, override|
+    #   head.vm.network "private_network", ip: HEAD_IP
+    # end
+
     add_keys(head.vm)
     add_key_access(head.vm)
+    # add_keys(head.vm, home_dir="/root")
+    # add_key_access(head.vm, home_dir="/root")
     add_data_dir(head.vm)
     head.vm.provision "shell", path: "./setupfiles/install-ansible.sh"
   end
 
   config.vm.define :worker do |worker|
     worker.vm.hostname = "worker"
-    worker.vm.network "private_network", ip: WORKER_IP
+
+    # As for the head node we set up Virtualbox and VMware providers differently.
+    worker.vm.network "private_network"
+    # worker.vm.provider "virtualbox" do |vb, override|
+    #   override.vm.network "private_network", ip: WORKER_IP
+    # end
+
     add_key_access(worker.vm)
+    # add_key_access(worker.vm, home_dir="/root")
     worker.vm.provision "shell", inline: <<-EOS
-      dnf install -q -y python3 epel-release
-      dnf config-manager --set-enabled PowerTools
+      apt update
+      apt install -y python3
     EOS
-    add_data_dir(worker.vm)
   end
 
-  config.vm.provision "shell", inline: <<-EOS
-    echo "#{HEAD_IP}\thead\n#{WORKER_IP}\tworker" >> /etc/hosts
-  EOS
+  # # Note: when using a VMware provider, these IP addresses won't be meaningful
+  # # and you'll have to manually configure /etc/hosts.
+  # config.vm.provision "shell", inline: <<-EOS
+  #   echo "#{HEAD_IP}\thead\n#{WORKER_IP}\tworker" >> /etc/hosts
+  # EOS
 end
diff --git a/cluster-setup/compose.yaml b/cluster-setup/compose.yaml
new file mode 100644
index 000000000..d5cb749f4
--- /dev/null
+++ b/cluster-setup/compose.yaml
@@ -0,0 +1,8 @@
+version: '3.9'
+
+services:
+  kive_export_clusters:
+    image: ${KIVE_EXPORT_USERS_IMAGE:-kive_export_users:latest}
+    restart: unless-stopped
+    volumes:
+      - ${KIVE_CLUSTER_SETUP_PATH:-/usr/local/share/Kive/cluster_setup}:/app
diff --git a/cluster-setup/configure_hosts_file.bash b/cluster-setup/configure_hosts_file.bash
new file mode 100644
index 000000000..d628587d6
--- /dev/null
+++ b/cluster-setup/configure_hosts_file.bash
@@ -0,0 +1,6 @@
+#! /usr/bin/env bash
+
+name=$1
+ip=$2
+
+echo -e "${ip}\t${name}" >> /etc/hosts
diff --git a/cluster-setup/deploy_cluster_setup.bash b/cluster-setup/deploy_cluster_setup.bash
new file mode 100644
index 000000000..3e6ebabdc
--- /dev/null
+++ b/cluster-setup/deploy_cluster_setup.bash
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# Use rsync to copy the cluster setup code to a target server.
+
+# Command-line parameters:
+# prod|test
+# e.g.
+# ./deploy_cluster_setup.sh prod
+
+# If you need to override the default login/server or upload path, set
+# the environment variables CLUSTER_SETUP_LOGIN and/or CLUSTER_SETUP_PATH.
+# Check out the version of the code you want before running, as this script
+# does not check out a fresh repository; we want this script to transfer over
+# config files that would not be in a stock repo.
+
+# Make sure you have (or the account you log into the server with has) appropriate
+# permissions on the deployment path.
+
+prod_or_test=$1
+
+echo "Deployed tag/commit/branch $git_tag on $(date)." > deployment_notes.txt
+echo 'Output of "git describe":' >> deployment_notes.txt
+git describe --tags >> deployment_notes.txt
+echo 'Output of "git show --format=oneline --no-patch":' >> deployment_notes.txt
+git show --format=oneline --no-patch >> deployment_notes.txt
+
+if [ "$prod_or_test" == "prod" ]; then
+  server="kive-int.cfenet.ubc.ca"
+else
+  server="testkive-int.cfenet.ubc.ca"
+fi
+server_login=${CLUSTER_SETUP_LOGIN:-"${USER}@${server}"}
+
+deployment_path=${CLUSTER_SETUP_PATH:-"/usr/local/src/cluster-setup"}
+
+rsync -avz --exclude-from deploy_exclude_list.txt -a ./ ${server_login}:${deployment_path}
+
+echo "... done."
diff --git a/cluster-setup/deploy_exclude_list.txt b/cluster-setup/deploy_exclude_list.txt
new file mode 100644
index 000000000..2a4b28dba
--- /dev/null
+++ b/cluster-setup/deploy_exclude_list.txt
@@ -0,0 +1,4 @@
+deployment/group_vars/all.yml
+deployment/group_vars/all.yaml
+deployment/ansible.cfg
+initialization/worker/head_node_root_id_ed25519.pub
diff --git a/cluster-setup/deployment/ansible_bulbasaur.cfg b/cluster-setup/deployment/ansible_bulbasaur.cfg
new file mode 100644
index 000000000..ad384c3be
--- /dev/null
+++ b/cluster-setup/deployment/ansible_bulbasaur.cfg
@@ -0,0 +1,3 @@
+[defaults]
+inventory = ./inventory_bulbasaur.ini
+interpreter_python = /usr/bin/python3
diff --git a/cluster-setup/deployment/ansible_dev.cfg b/cluster-setup/deployment/ansible_dev.cfg
new file mode 100644
index 000000000..ff114fed3
--- /dev/null
+++ b/cluster-setup/deployment/ansible_dev.cfg
@@ -0,0 +1,8 @@
+# The main Ansible configuration file.  Copy this to "ansible.cfg" and
+# fill in the appropriate inventory file to use.
+# See the following for available sections and keys:
+# https://docs.ansible.com/ansible/latest/reference_appendices/config.html
+
+[defaults]
+inventory = ./inventory_dev.ini
+interpreter_python = /usr/bin/python3
diff --git a/cluster-setup/deployment/ansible_octomore.cfg b/cluster-setup/deployment/ansible_octomore.cfg
new file mode 100644
index 000000000..5b6db308d
--- /dev/null
+++ b/cluster-setup/deployment/ansible_octomore.cfg
@@ -0,0 +1,3 @@
+[defaults]
+inventory = ./inventory_octomore.ini
+interpreter_python = /usr/bin/python3
diff --git a/cluster-setup/deployment/ansible_template.cfg b/cluster-setup/deployment/ansible_template.cfg
new file mode 100644
index 000000000..ff114fed3
--- /dev/null
+++ b/cluster-setup/deployment/ansible_template.cfg
@@ -0,0 +1,8 @@
+# The main Ansible configuration file.  Copy this to "ansible.cfg" and
+# fill in the appropriate inventory file to use.
+# See the following for available sections and keys:
+# https://docs.ansible.com/ansible/latest/reference_appendices/config.html
+
+[defaults]
+inventory = ./inventory_dev.ini
+interpreter_python = /usr/bin/python3
diff --git a/cluster-setup/deployment/bulbasaur_preliminary_setup.yaml b/cluster-setup/deployment/bulbasaur_preliminary_setup.yaml
new file mode 100644
index 000000000..725e12e09
--- /dev/null
+++ b/cluster-setup/deployment/bulbasaur_preliminary_setup.yaml
@@ -0,0 +1,46 @@
+---
+
+- name: preliminary setup to run on the Octomore head node
+  hosts: head
+  become: true
+  roles:
+    - head_node_internal_interface
+  tasks:
+    - name: create /opt
+      file:
+        path: /opt
+        owner: root
+        group: root
+        mode: '0755'
+        state: directory
+
+    - name: create the /data mount point
+      file:
+        path: /data
+        owner: root
+        group: root
+        mode: '0755'
+        state: directory
+
+    - name: mount the already-existing filesystem
+      mount:
+        path: /data
+        src: /dev/data-vg/data-lv
+        fstype: ext4
+        state: mounted
+
+    - name: move the existing home folder to the side
+      command: mv /data/home /data/home_old
+      args:
+        creates: /data/home_old
+        removes: /data/home
+
+    - name: move the existing Kive folder to the side
+      command: mv /data/kive /data/kive_old
+      args:
+        creates: /data/kive_old
+        removes: /data/kive
+
+    - name: set up head node networking
+      include_role:
+        name: head_node_networking
diff --git a/cluster-setup/deployment/copy_users_and_groups.yaml b/cluster-setup/deployment/copy_users_and_groups.yaml
new file mode 100644
index 000000000..70fad8b84
--- /dev/null
+++ b/cluster-setup/deployment/copy_users_and_groups.yaml
@@ -0,0 +1,6 @@
+---
+
+- name: copy users and groups from head to workers
+  hosts: workers
+  roles:
+    - copy_users_and_groups
diff --git a/cluster-setup/deployment/create_backup_filesystem.yaml b/cluster-setup/deployment/create_backup_filesystem.yaml
new file mode 100644
index 000000000..c8ab86f7d
--- /dev/null
+++ b/cluster-setup/deployment/create_backup_filesystem.yaml
@@ -0,0 +1,57 @@
+---
+
+- name: create backup filesystem
+  hosts: head
+#  vars:
+#    backup_physical_volumes:
+#      - "ata-ST10000NM001G-2MW103_ZS51H7QX"
+  # Replaced the above with the `backup_physical_volumes` variable in group_vars.
+  tasks:
+    - name: create a single partition on each of the physical volumes
+      loop: "{{ backup_physical_volumes }}"
+      community.general.parted:
+        device: "/dev/disk/by-id/{{ item }}"
+        number: 1
+        state: present
+        label: gpt
+
+    - name: construct a list of the partition names
+      block:
+        - name: initialize the list as empty
+          set_fact:
+            partition_names: [ ]
+        - name: append names to the list
+          loop: "{{ backup_physical_volumes }}"
+          set_fact:
+            partition_names: "{{ partition_names + ['/dev/disk/by-id/' ~ item ~ '-part1'] }}"
+
+    - name: create a volume group out of the partitions
+      lvg:
+        vg: backup-vg
+        pvs: "{{ partition_names | join(',') }}"
+
+    - name: create a logical volume from the volume group
+      community.general.lvol:
+        vg: backup-vg
+        lv: backup-lv
+        size: 100%VG
+
+    - name: create the filesystem
+      community.general.filesystem:
+        fstype: ext4
+        dev: /dev/backup-vg/backup-lv
+
+    - name: create the /media/backup mount point
+      file:
+        path: /media/backup
+        owner: root
+        group: root
+        mode: '0755'
+        state: directory
+
+    - name: mount the filesystem
+      mount:
+        path: /media/backup
+        src: /dev/backup-vg/backup-lv
+        fstype: ext4
+        state: mounted
diff --git a/cluster-setup/deployment/group_vars/bulbasaur_template.yaml b/cluster-setup/deployment/group_vars/bulbasaur_template.yaml
new file mode 100644
index 000000000..d7bfa94c4
--- /dev/null
+++ b/cluster-setup/deployment/group_vars/bulbasaur_template.yaml
@@ -0,0 +1,161 @@
+---
+# These are settings appropriate for Octomore.  Copy this file to
+# "all.yml" and fill in the required passwords when deploying.
+# Most of the network information should be already set appropriately.
+
+# Variables needed to set up Kive.
+kive_allowed_hosts: "[\"192.168.69.179\", \"testkive-int.cfenet.ubc.ca\"]"
+kive_listen_port: 80
+update_kive_source: yes
+kive_server_email: testkive-noreply@bccfe.ca
+kive_admins: "[[\"kive\", \"kive@bccfe.ca\"]]"
+kive_subject_prefix: "Kive server on Bulbasaur"
+kive_backup_path: /media/backup
+kive_version: v0.16.2
+kive_python_package: python3.7
+kive_python_executable: python3.7
+
+# Settings used by the Kive purge tasks.
+kive_purge_start: "4TB"
+kive_purge_stop: "3.5TB"
+kive_log_level: INFO
+
+# Slurm configuration.
+slurmctlnode: bulbasaur
+slurm_nodes:
+  - name: bulbasaur
+    memory: 80000
+    cpus: 24
+    sockets: 2
+    cores_per_socket: 6
+    threads_per_core: 2
+  - name: b06
+    memory: 80000
+    cpus: 40
+    sockets: 2
+    cores_per_socket: 10
+    threads_per_core: 2
+
+# Settings for network services running on the head node,
+# e.g. firewall, NFS, and PostgreSQL.
+nfs_export_to_hosts: 192.168.1.0/255.255.255.0
+kive_db_host: 192.168.1.1
+kive_db_subnet: 192.168.1.0/24
+
+# Internal network (i.e. the network that connects the head node and compute nodes)
+# configuration.
+internal_subnet: 192.168.1.0
+internal_netmask: 255.255.255.0
+internal_mask_bits: 24
+internal_broadcast: 192.168.1.255
+internal_dhcp_range: [192.168.1.100, 192.168.1.200]
+
+head_internal_interface: eth1
+head_internal_interface_mac: "00:1e:67:fe:fb:76"
+head_external_interface: eno0
+head_internal_address: 192.168.1.1
+head_internal_mask_bits: 24
+
+# This creates DHCP reservations for the compute nodes.
+# Slow nodes: b01 through B03.  The rest are "fast nodes".
+# As of August 2020, b01 is broken and not spun up.
+compute_nodes:
+  - name: b01
+    ip: 192.168.1.2
+    mac: "06:B3:ED:E6:A2:66"
+    # Alternative MACs: 00:1E:67:54:9B:(E4-E7)
+  - name: b02
+    ip: 192.168.1.3
+    mac: "00:1E:67:80:3C:B1"
+    # Alternative MACs: 00:1E:67:80:3C:(B2-B4)
+  - name: b03
+    ip: 192.168.1.4
+    mac: "00:1E:67:92:D5:2A"
+    # Alternative MACs: 00:1E:67:92:D5:(2B-2D)
+  - name: b04
+    ip: 192.168.1.5
+    mac: "5A:83:62:41:FD:14"
+    # Alternative MACs: 00:1E:67:A9:14:(8D-90)
+  - name: b05
+    ip: 192.168.1.6
+    mac: "00:1E:67:BC:32:F3"
+    # Alternative MACs: 00:1E:67:BC:32:(F4-F6)
+  - name: b06
+    ip: 192.168.1.7
+    mac: "A4:BF:01:02:4A:A2"
+    # Alternative MAC: A4:BF:01:02:4A:A3
+  - name: b07a
+    ip: 192.168.1.8
+    mac: "00:0A:CD:2D:2A:FB"
+    # Alternative MACs: AC:1F:6B:91:C3:(FE-FF)
+  - name: b07b
+    ip: 192.168.1.9
+    mac: "00:0A:CD:2D:2B:F4"
+    # Alternative MACs: AC:1F:6B:91:C3:(E8-E9)
+  - name: b08a
+    ip: 192.168.1.10
+    mac: "00:0A:CD:2D:2A:FE"
+    # Alternative MACs: AC:1F:6B:91:C4:(24-25)
+  - name: b08b
+    ip: 192.168.1.11
+    mac: "00:0A:CD:2D:2A:EC"
+    # Alternative MACs: AC:1F:6B:91:C3:(F2-F3)
+
+# The following are sensitive, and should be kept secret for a production system.
+kive_db_password: fixme-14mPdzu5vTOQG2DgtDG1inghQpMX0TBdUqEK6nVNHVo
+kive_server_secret_key: fixme-kpXk1iKLbHn6-T7zieLHgADFA8ZSh5itd8k_Sp932fM
+barman_password: fixme-barman
+streaming_barman_password: fixme-streaming-barman
+
+# Slurm installation:
+slurm_src_basename: "slurm-23.02.5"
+slurm_tarball: "slurm-23.02.5.tar.bz2"
+slurm_source_url: "https://download.schedmd.com/slurm/slurm-23.02.5.tar.bz2"
+slurm_sha1_checksum: b3f06d7030bd771a3a94be06e3c0d58a2630a21e
+
+# mod_wsgi installation:
+mod_wsgi_source_url: "https://files.pythonhosted.org/packages/fe/12/b3756f3b72ae3410a83a9b98862925aea64e854c93ef16c1e46b11e32d59/mod_wsgi-4.9.4.tar.gz"
+mod_wsgi_tarball: "mod_wsgi-4.9.4.tar.gz"
+mod_wsgi_basename: "mod_wsgi-4.9.4"
+mod_wsgi_sha256_checksum: "8e762662ea5b01afc386bbcfbaa079748eb6203ab1d6d3a3dac9237f5666cfc9"
+
+# The following are defaults, and probably don't need to be changed.
+# In a typical deployment, Kive, its virtualenv, and slurm are only "installed"
+# by the head node and shared to the compute nodes via NFS, so kive_venv,
+# kive_slurm_path, and kive_root should be paths that are on the shared partitions
+# (typically /opt and /usr/local).  If one would rather install Kive and slurm
+# locally to each node, then these paths and/or the NFS shares should be reworked.
+kive_venv: /opt/venv_kive
+kive_slurm_path: /opt/venv_kive/bin
+kive_db_name: kive
+kive_db_user: kive
+kive_media_root: /data/kive/media_root
+kive_static_root: /var/www/html/kive/static
+kive_root: /usr/local/share/Kive
+# - httpd configuration
+kive_httpd_user: kive
+kive_httpd_group: kive
+
+copied_groups:
+  - kive
+  - sudo
+
+default_shell: /usr/bin/bash
+
+data_physical_volumes:
+  - ata-ST10000VN0008-2PJ103_ZLW0388G
+
+backup_physical_volumes:
+  - ata-ST10000VN000-3AK101_WP015CNV
+
+macdatafile_username: "[fill this in]"
+macdatafile_password: "[fill this in]"
+macdatafile_domain: "[fill this in]"
+macdatafile_read_only: false
+macdatafile_network_share: "[fill this in]"
+
+raw_data_username: "[fill this in]"
+raw_data_password: "[fill this in]"
+raw_data_domain: "[fill this in]"
+raw_data_read_only: false  # set to true for a dev/test system
+raw_data_network_share: "[fill this in]"
diff --git a/cluster-setup/deployment/group_vars/default_template.yml b/cluster-setup/deployment/group_vars/default_template.yml
new file mode 100644
index 000000000..c8b2fe7d0
--- /dev/null
+++ b/cluster-setup/deployment/group_vars/default_template.yml
@@ -0,0 +1,87 @@
+---
+# These are default settings for a "testing" environment, e.g. using
+# VMs to simulate the procedure.  Copy this to "all.yml" and fill in
+# appropriate values for networking and passwords etc.
+
+# Variables needed to set up Kive.
+kive_allowed_hosts: "[\"*\"]"
+kive_listen_port: 8080
+update_kive_source: yes
+kive_server_email: kive-noreply@bccfe.ca
+kive_admins: "[[\"kive\", \"kive@bccfe.ca\"]]"
+kive_subject_prefix: "Kive (development) server"
+kive_backup_path: /data/kive_db_backup
+kive_python_package: python3.7
+kive_python_executable: python3.7
+
+# Settings used by the Kive purge tasks; uncomment if you need to customize
+# for your system (the defaults are likely good for a development system).
+# kive_purge_start: "20GB"
+# kive_purge_stop: "15GB"
+# kive_log_level: WARN
+
+# Settings for network services running on the head node,
+# e.g. firewall, NFS, and PostgreSQL.
+nfs_export_to_hosts: 192.168.64.0/255.255.255.0
+kive_db_host: 192.168.64.9
+kive_db_subnet: 192.168.64.0/24
+
+# Internal network (i.e. the network that connects the head node and compute nodes)
+# configuration.
+internal_subnet: 192.168.1.0
+internal_netmask: 255.255.255.0
+internal_mask_bits: 24
+internal_broadcast: 192.168.1.255
+internal_dhcp_range: [192.168.1.100, 192.168.1.200]
+
+head_internal_interface: eth1
+head_internal_interface_mac: "00:1e:67:fe:fb:76"
+head_external_interface: eth0
+head_internal_address: 192.168.1.1
+head_internal_mask_bits: 24
+
+compute_nodes:
+  - name: worker
+    ip: 192.168.1.2
+    mac: "ab:cd:ef:01:23:45"
+
+# The following are sensitive, and should be kept secret for a production system.
+kive_db_password: fixme-14mPdzu5vTOQG2DgtDG1inghQpMX0TBdUqEK6nVNHVo
+kive_server_secret_key: fixme-kpXk1iKLbHn6-T7zieLHgADFA8ZSh5itd8k_Sp932fM
+barman_password: fixme-barman
+streaming_barman_password: fixme-streaming-barman
+
+# Slurm installation:
+slurm_src_basename: "slurm-23.02.5"
+slurm_tarball: "slurm-23.02.5.tar.bz2"
+slurm_source_url: "https://download.schedmd.com/slurm/slurm-23.02.5.tar.bz2"
+slurm_sha1_checksum: b3f06d7030bd771a3a94be06e3c0d58a2630a21e
+
+# mod_wsgi installation:
+mod_wsgi_source_url: "https://files.pythonhosted.org/packages/fe/12/b3756f3b72ae3410a83a9b98862925aea64e854c93ef16c1e46b11e32d59/mod_wsgi-4.9.4.tar.gz"
+mod_wsgi_tarball: "mod_wsgi-4.9.4.tar.gz"
+mod_wsgi_basename: "mod_wsgi-4.9.4"
+mod_wsgi_sha256_checksum: "8e762662ea5b01afc386bbcfbaa079748eb6203ab1d6d3a3dac9237f5666cfc9"
+
+# The following are defaults, and probably don't need to be changed.
+# In a typical deployment, Kive, its virtualenv, and slurm are only "installed"
+# by the head node and shared to the compute nodes via NFS, so kive_venv,
+# kive_slurm_path, and kive_root should be paths that are on the shared partitions
+# (typically /opt and /usr/local).  If one would rather install Kive and slurm
+# locally to each node, then these paths and/or the NFS shares should be reworked.
+kive_venv: /opt/venv_kive
+kive_slurm_path: /opt/venv_kive/bin
+kive_db_name: kive
+kive_db_user: kive
+kive_media_root: /data/kive/media_root
+kive_static_root: /var/www/html/kive/static
+kive_root: /usr/local/share/Kive
+# - httpd configuration
+kive_httpd_user: kive
+kive_httpd_group: kive
+
+copied_groups:
+  - kive
+  - sudo
+
+default_shell: /usr/bin/bash
diff --git a/cluster-setup/deployment/group_vars/octomore_template.yaml b/cluster-setup/deployment/group_vars/octomore_template.yaml
new file mode 100644
index 000000000..8a03ccb15
--- /dev/null
+++ b/cluster-setup/deployment/group_vars/octomore_template.yaml
@@ -0,0 +1,161 @@
+---
+# These are settings appropriate for Octomore.  Copy this file to
+# "all.yml" and fill in the required passwords when deploying.
+# Most of the network information should be already set appropriately.
+
+# Variables needed to set up Kive.
+kive_allowed_hosts: "[\"192.168.69.179\", \"kive-int.cfenet.ubc.ca\"]"
+kive_listen_port: 80
+update_kive_source: yes
+kive_server_email: kive-noreply@bccfe.ca
+kive_admins: "[[\"kive\", \"kive@bccfe.ca\"]]"
+kive_subject_prefix: "Kive server on Octomore"
+kive_backup_path: /media/backup
+kive_version: v0.16.2
+kive_python_package: python3.7
+kive_python_executable: python3.7
+
+# Settings used by the Kive purge tasks.
+kive_purge_start: "4TB"
+kive_purge_stop: "3.5TB"
+kive_log_level: INFO
+
+# Slurm configuration.
+slurmctlnode: octomore
+slurm_nodes:
+  - name: octomore
+    memory: 96000
+    cpus: 48
+    sockets: 2
+    cores_per_socket: 12
+    threads_per_core: 2
+  - name: b05
+    memory: 80000
+    cpus: 40
+    sockets: 2
+    cores_per_socket: 10
+    threads_per_core: 2
+
+# Settings for network services running on the head node,
+# e.g. firewall, NFS, and PostgreSQL.
+nfs_export_to_hosts: 192.168.1.0/255.255.255.0
+kive_db_host: 192.168.1.1
+kive_db_subnet: 192.168.1.0/24
+
+# Internal network (i.e. the network that connects the head node and compute nodes)
+# configuration.
+internal_subnet: 192.168.1.0
+internal_netmask: 255.255.255.0
+internal_mask_bits: 24
+internal_broadcast: 192.168.1.255
+internal_dhcp_range: [192.168.1.100, 192.168.1.200]
+
+head_internal_interface: eth1
+head_internal_interface_mac: "00:1e:67:fe:fb:76"
+head_external_interface: eno0
+head_internal_address: 192.168.1.1
+head_internal_mask_bits: 24
+
+# This creates DHCP reservations for the compute nodes.
+# Slow nodes: b01 through B03.  The rest are "fast nodes".
+# As of August 2020, b01 is broken and not spun up.
+compute_nodes:
+  - name: b01
+    ip: 192.168.1.2
+    mac: "06:B3:ED:E6:A2:66"
+    # Alternative MACs: 00:1E:67:54:9B:(E4-E7)
+  - name: b02
+    ip: 192.168.1.3
+    mac: "00:1E:67:80:3C:B1"
+    # Alternative MACs: 00:1E:67:80:3C:(B2-B4)
+  - name: b03
+    ip: 192.168.1.4
+    mac: "00:1E:67:92:D5:2A"
+    # Alternative MACs: 00:1E:67:92:D5:(2B-2D)
+  - name: b04
+    ip: 192.168.1.5
+    mac: "5A:83:62:41:FD:14"
+    # Alternative MACs: 00:1E:67:A9:14:(8D-90)
+  - name: b05
+    ip: 192.168.1.6
+    mac: "00:1E:67:BC:32:F3"
+    # Alternative MACs: 00:1E:67:BC:32:(F4-F6)
+  - name: b06
+    ip: 192.168.1.7
+    mac: "A4:BF:01:02:4A:A2"
+    # Alternative MAC: A4:BF:01:02:4A:A3
+  - name: b07a
+    ip: 192.168.1.8
+    mac: "00:0A:CD:2D:2A:FB"
+    # Alternative MACs: AC:1F:6B:91:C3:(FE-FF)
+  - name: b07b
+    ip: 192.168.1.9
+    mac: "00:0A:CD:2D:2B:F4"
+    # Alternative MACs: AC:1F:6B:91:C3:(E8-E9)
+  - name: b08a
+    ip: 192.168.1.10
+    mac: "00:0A:CD:2D:2A:FE"
+    # Alternative MACs: AC:1F:6B:91:C4:(24-25)
+  - name: b08b
+    ip: 192.168.1.11
+    mac: "00:0A:CD:2D:2A:EC"
+    # Alternative MACs: AC:1F:6B:91:C3:(F2-F3)
+
+# The following are sensitive, and should be kept secret for a production system.
+kive_db_password: fixme-14mPdzu5vTOQG2DgtDG1inghQpMX0TBdUqEK6nVNHVo
+kive_server_secret_key: fixme-kpXk1iKLbHn6-T7zieLHgADFA8ZSh5itd8k_Sp932fM
+barman_password: fixme-barman
+streaming_barman_password: fixme-streaming-barman
+
+# Slurm installation:
+slurm_src_basename: "slurm-23.02.5"
+slurm_tarball: "slurm-23.02.5.tar.bz2"
+slurm_source_url: "https://download.schedmd.com/slurm/slurm-23.02.5.tar.bz2"
+slurm_sha1_checksum: b3f06d7030bd771a3a94be06e3c0d58a2630a21e
+
+# mod_wsgi installation:
+mod_wsgi_source_url: "https://files.pythonhosted.org/packages/fe/12/b3756f3b72ae3410a83a9b98862925aea64e854c93ef16c1e46b11e32d59/mod_wsgi-4.9.4.tar.gz"
+mod_wsgi_tarball: "mod_wsgi-4.9.4.tar.gz"
+mod_wsgi_basename: "mod_wsgi-4.9.4"
+mod_wsgi_sha256_checksum: "8e762662ea5b01afc386bbcfbaa079748eb6203ab1d6d3a3dac9237f5666cfc9"
+
+# The following are defaults, and probably don't need to be changed.
+# In a typical deployment, Kive, its virtualenv, and slurm are only "installed"
+# by the head node and shared to the compute nodes via NFS, so kive_venv,
+# kive_slurm_path, and kive_root should be paths that are on the shared partitions
+# (typically /opt and /usr/local).  If one would rather install Kive and slurm
+# locally to each node, then these paths and/or the NFS shares should be reworked.
+kive_venv: /opt/venv_kive
+kive_slurm_path: /opt/venv_kive/bin
+kive_db_name: kive
+kive_db_user: kive
+kive_media_root: /data/kive/media_root
+kive_static_root: /var/www/html/kive/static
+kive_root: /usr/local/share/Kive
+# - httpd configuration
+kive_httpd_user: kive
+kive_httpd_group: kive
+
+copied_groups:
+  - kive
+  - sudo
+
+default_shell: /usr/bin/bash
+
+data_physical_volumes:
+  - ata-ST10000NM001G-2MW103_ZS51H7QX
+
+backup_physical_volumes:
+  - ata-ST10000NM0016-1TT101_ZA286TWE
+
+macdatafile_username: "[fill this in]"
+macdatafile_password: "[fill this in]"
+macdatafile_domain: "[fill this in]"
+macdatafile_read_only: false
+macdatafile_network_share: "[fill this in]"
+
+raw_data_username: "[fill this in]"
+raw_data_password: "[fill this in]"
+raw_data_domain: "[fill this in]"
+raw_data_read_only: false  # set to true for a dev/test system
+raw_data_network_share: "[fill this in]"
\ No newline at end of file
diff --git a/cluster-setup/deployment/import_users.yaml b/cluster-setup/deployment/import_users.yaml
new file mode 100644
index 000000000..f7639058c
--- /dev/null
+++ b/cluster-setup/deployment/import_users.yaml
@@ -0,0 +1,6 @@
+---
+
+- name: import users
+  hosts: head
+  roles:
+    - import_users_and_groups
diff --git a/cluster-setup/deployment/install_fasttree.yaml b/cluster-setup/deployment/install_fasttree.yaml
new file mode 100644
index 000000000..46d995b39
--- /dev/null
+++ b/cluster-setup/deployment/install_fasttree.yaml
@@ -0,0 +1,49 @@
+---
+
+- hosts: head
+  become: true
+  tasks:
+    - name: make a directory to hold the source code
+      file:
+        path: /usr/local/src/FastTree-2.1.9
+        owner: root
+        group: root
+        mode: '0755'
+        state: directory
+
+    - name: copy FastTree source code to the host
+      copy:
+        src: FastTree-2.1.9.c
+        dest: /usr/local/src/FastTree-2.1.9/FastTree.c
+        owner: root
+        group: root
+
+    - name: compile single-precision FastTree
+      command:
+        chdir: /usr/local/src/FastTree-2.1.9
+        cmd: gcc -O3 -finline-functions -funroll-loops -Wall -o FastTree.single FastTree.c -lm
+        creates: FastTree.single
+
+    - name: compile double-precision FastTree
+      command:
+        chdir: /usr/local/src/FastTree-2.1.9
+        cmd: gcc -DUSE_DOUBLE -O3 -finline-functions -funroll-loops -Wall -o FastTree.double FastTree.c -lm
+        creates: FastTree.double
+
+    - name: install FastTree.single
+      copy:
+        src: /usr/local/src/FastTree-2.1.9/FastTree.single
+        dest: /usr/local/bin/FastTree.single
+        mode: "0755"
+
+    - name: install FastTree.double
+      copy:
+        src: /usr/local/src/FastTree-2.1.9/FastTree.double
+        dest: /usr/local/bin/FastTree.double
+        mode: "0755"
+
+    - name: make an alias to FastTree.single
+      file:
+        src: /usr/local/bin/FastTree.single
+        path: /usr/local/bin/FastTree
+        state: link
diff --git a/cluster-setup/deployment/install_postgresql_12.yaml b/cluster-setup/deployment/install_postgresql_12.yaml
new file mode 100644
index 000000000..c085d4388
--- /dev/null
+++ b/cluster-setup/deployment/install_postgresql_12.yaml
@@ -0,0 +1,23 @@
+---
+
+- name: install PostgreSQL 12 on the head node
+  hosts: head
+  tasks:
+    - name: add the apt signing key for the PostgreSQL apt repository
+      ansible.builtin.apt_key:
+        url: https://www.postgresql.org/media/keys/ACCC4CF8.asc
+        state: present
+
+    - name: add PostgreSQL apt repository
+      become: true
+      ansible.builtin.apt_repository:
+        repo: deb https://apt.postgresql.org/pub/repos/apt jammy-pgdg main
+        state: present
+
+    - name: install PostgreSQL 12
+      become: true
+      apt:
+        name:
+          - postgresql-12
+          - postgresql-client-12
+
diff --git a/cluster-setup/deployment/install_smartmontools.yaml b/cluster-setup/deployment/install_smartmontools.yaml
new file mode 100644
index 000000000..3d12a9812
--- /dev/null
+++ b/cluster-setup/deployment/install_smartmontools.yaml
@@ -0,0 +1,9 @@
+---
+
+- hosts: head
+  become: true
+  tasks:
+    - name: install smartmontools
+      apt:
+        name:
+          - smartmontools
diff --git a/cluster-setup/deployment/inventory_bulbasaur.ini b/cluster-setup/deployment/inventory_bulbasaur.ini
new file mode 100644
index 000000000..ef27ab212
--- /dev/null
+++ b/cluster-setup/deployment/inventory_bulbasaur.ini
@@ -0,0 +1,21 @@
+# This is an inventory.ini file appropriate for Bulbasaur.
+# This defines all of the compute nodes we have, which may or may not
+# actually be connected to Bulbasaur.
+
+# Documentation on this file:
+# https://docs.ansible.com/ansible/latest/user_guide/intro_inventory.html#adding-variables-to-inventory
+
+[head]
+localhost
+
+[workers]
+# b01
+# b02
+# b03
+# b04
+# b05
+b06
+# b07a
+# b07b
+# b08a
+# b08b
diff --git a/cluster-setup/deployment/inventory_dev.ini b/cluster-setup/deployment/inventory_dev.ini
new file mode 100644
index 000000000..4fc8b91a8
--- /dev/null
+++ b/cluster-setup/deployment/inventory_dev.ini
@@ -0,0 +1,11 @@
+# This is an inventory.ini file appropriate for a dev setup used for
+# testing the Ansible playbooks.  This defines a two-node cluster with
+# one head node and one compute node ("worker").
+
+# Documentation on this file:
+# https://docs.ansible.com/ansible/latest/user_guide/intro_inventory.html#adding-variables-to-inventory
+
+head
+
+[workers]
+worker
diff --git a/cluster-setup/deployment/inventory_octomore.ini b/cluster-setup/deployment/inventory_octomore.ini
new file mode 100644
index 000000000..18bd521d8
--- /dev/null
+++ b/cluster-setup/deployment/inventory_octomore.ini
@@ -0,0 +1,21 @@
+# This is an inventory.ini file appropriate for Octomore.
+# This defines all of the compute nodes we have, which may or may not
+# actually be connected to Octomore.
+
+# Documentation on this file:
+# https://docs.ansible.com/ansible/latest/user_guide/intro_inventory.html#adding-variables-to-inventory
+
+[head]
+localhost
+
+[workers]
+# b01
+b02
+b03
+# b04
+b05
+# b06
+b07a
+b07b
+b08a
+b08b
diff --git a/cluster-setup/deployment/kive_setup.yml b/cluster-setup/deployment/kive_setup.yml
new file mode 100644
index 000000000..f18a0f06b
--- /dev/null
+++ b/cluster-setup/deployment/kive_setup.yml
@@ -0,0 +1,33 @@
+---
+
+- name: configure head node
+  hosts: head
+  tasks:
+    - name: build Slurm
+      include_role:
+        name: slurm_builder
+    - name: configure and start slurmctld and supporting services
+      include_role:
+        name: slurm_controller
+    - name: configure and start slurmd
+      include_role:
+        name: slurm_node
+    - name: set up Kive and supporting services on the head node
+      include_role:
+        name: kive_server
+
+- name: configure workers
+  hosts: workers
+  tasks:
+    - name: synchronize users and groups from the head node
+      include_role:
+        name: copy_users_and_groups
+    - name: set up worker node networking
+      include_role:
+        name: worker_node_networking
+    - name: configure and start slurmd
+      include_role:
+        name: slurm_node
+    - name: prepare node to work as a Kive node
+      include_role:
+        name: kive_node
diff --git a/cluster-setup/deployment/lock_bootstrap_user.yaml b/cluster-setup/deployment/lock_bootstrap_user.yaml
new file mode 100644
index 000000000..c73ceff37
--- /dev/null
+++ b/cluster-setup/deployment/lock_bootstrap_user.yaml
@@ -0,0 +1,12 @@
+---
+
+- name: lock and expire the bootstrap user
+  hosts: all
+  vars:
+    user_name: ubuntu
+  tasks:
+    - name: lock and expire the user
+      user:
+        name: "{{ user_name }}"
+        password_lock: true
+        expires: 1
diff --git a/cluster-setup/deployment/mount_network_drives.yaml b/cluster-setup/deployment/mount_network_drives.yaml
new file mode 100644
index 000000000..8caf1749c
--- /dev/null
+++ b/cluster-setup/deployment/mount_network_drives.yaml
@@ -0,0 +1,100 @@
+---
+
+- name: install CIFS utilities
+  hosts: all
+  tasks:
+    - name: install cifs-utils
+      become: true
+      apt:
+        name:
+          - cifs-utils
+
+- name: create CIFS credentials for the mounts
+  hosts: head
+  tasks:
+    - name: prepare credentials for macdatafile
+      become: true
+      vars:
+        cifs_username: "{{ macdatafile_username }}"
+        cifs_password: "{{ macdatafile_password }}"
+        cifs_domain: "{{ macdatafile_domain }}"
+      template:
+        src: templates/cifs_credentials.j2
+        dest: /opt/smbcredentials
+        owner: root
+        group: root
+        mode: "0600"
+
+    - name: prepare credentials for RAW_DATA
+      become: true
+      vars:
+        cifs_username: "{{ raw_data_username }}"
+        cifs_password: "{{ raw_data_password }}"
+        cifs_domain: "{{ raw_data_domain }}"
+      template:
+        src: templates/cifs_credentials.j2
+        dest: /opt/smbcredentials-nextgen
+        owner: root
+        group: root
+        mode: "0600"
+
+
+- name: mount the macdatafile CIFS network volume
+  hosts: all
+  tasks:
+    - name: create the macdatafile mount point
+      become: true
+      file:
+        path: /media/macdatafile
+        owner: root
+        group: root
+        mode: '0777'
+        state: directory
+
+    - name: set mount options
+      set_fact:
+        macdatafile_mount_options: credentials=/opt/smbcredentials,x-systemd.requires-mounts-for=/opt,noperm,file_mode=0777,dir_mode=0777
+
+    - name: set read-only if configured
+      when: macdatafile_read_only | bool
+      set_fact:
+        macdatafile_mount_options: "{{ macdatafile_mount_options }},ro"
+
+    - name: mount the macdatafile CIFS volume
+      become: true
+      mount:
+        path: /media/macdatafile
+        src: "{{ macdatafile_network_share }}"
+        opts: "{{ macdatafile_mount_options }}"
+        fstype: cifs
+        state: mounted
+
+- name: mount the RAW_DATA CIFS network volume
+  hosts: all
+  tasks:
+    - name: create the RAW_DATA mount point
+      become: true
+      file:
+        path: /media/RAW_DATA
+        owner: root
+        group: root
+        mode: '0777'
+        state: directory
+
+    - name: set mount options
+      set_fact:
+        raw_data_mount_options: credentials=/opt/smbcredentials-nextgen,x-systemd.requires-mounts-for=/opt,noperm,file_mode=0777,dir_mode=0777
+
+    - name: set read-only if configured
+      when: raw_data_read_only | bool
+      set_fact:
+        raw_data_mount_options: "{{ raw_data_mount_options }},ro"
+
+    - name: mount the RAW_DATA CIFS volume
+      become: true
+      mount:
+        path: /media/RAW_DATA
+        src: "{{ raw_data_network_share }}"
+        opts: "{{ raw_data_mount_options }}"
+        fstype: cifs
+        state: mounted
diff --git a/cluster-setup/deployment/network_drives_read_only.yaml b/cluster-setup/deployment/network_drives_read_only.yaml
new file mode 100644
index 000000000..9bf3e0390
--- /dev/null
+++ b/cluster-setup/deployment/network_drives_read_only.yaml
@@ -0,0 +1,27 @@
+---
+
+- name: mount macdatafile and RAW_DATA read-only on all nodes
+  hosts: all
+  become: true
+  tasks:
+  - name: do the mounts on all the nodes
+    loop:
+      - mount_point: /media/macdatafile
+        network_share: "{{ macdatafile_network_share }}"
+        cifs_credentials: /opt/smbcredentials
+        cifs_credentials_volume: /opt
+        read_only: true
+
+      - mount_point: /media/RAW_DATA
+        network_share: "{{ raw_data_network_share }}"
+        cifs_credentials: /opt/smbcredentials-nextgen
+        cifs_credentials_volume: /opt
+        read_only: true
+    include_role:
+      name: mount_network_drives
+    vars:
+      mount_point: "{{ item.mount_point }}"
+      network_share: "{{ item.network_share }}"
+      cifs_credentials: "{{ item.cifs_credentials }}"
+      cifs_credentials_volume: "{{ item.cifs_credentials_volume }}"
+      read_only: "{{ item.read_only }}"
diff --git a/cluster-setup/deployment/network_drives_read_write.yaml b/cluster-setup/deployment/network_drives_read_write.yaml
new file mode 100644
index 000000000..3be3965b4
--- /dev/null
+++ b/cluster-setup/deployment/network_drives_read_write.yaml
@@ -0,0 +1,27 @@
+---
+
+- name: mount macdatafile and RAW_DATA read-write on all nodes
+  hosts: all
+  become: true
+  tasks:
+  - name: do the mounts on all the nodes
+    loop:
+      - mount_point: /media/macdatafile
+        network_share: "{{ macdatafile_network_share }}"
+        cifs_credentials: /opt/smbcredentials
+        cifs_credentials_volume: /opt
+        read_only: false
+
+      - mount_point: /media/RAW_DATA
+        network_share: "{{ raw_data_network_share }}"
+        cifs_credentials: /opt/smbcredentials-nextgen
+        cifs_credentials_volume: /opt
+        read_only: false
+    include_role:
+      name: mount_network_drives
+    vars:
+      mount_point: "{{ item.mount_point }}"
+      network_share: "{{ item.network_share }}"
+      cifs_credentials: "{{ item.cifs_credentials }}"
+      cifs_credentials_volume: "{{ item.cifs_credentials_volume }}"
+      read_only: "{{ item.read_only }}"
diff --git a/cluster-setup/deployment/network_drives_standard.yaml b/cluster-setup/deployment/network_drives_standard.yaml
new file mode 100644
index 000000000..96fcc8398
--- /dev/null
+++ b/cluster-setup/deployment/network_drives_standard.yaml
@@ -0,0 +1,53 @@
+---
+
+- name: "mount macdatafile and RAW_DATA read-write on the head node"
+  hosts: head
+  become: true
+  tasks:
+    - name: do the mounts on the head node
+      loop:
+        - mount_point: /media/macdatafile
+          network_share: "{{ macdatafile_network_share }}"
+          cifs_credentials: /opt/smbcredentials
+          cifs_credentials_volume: /opt
+          read_only: false
+
+        - mount_point: /media/RAW_DATA
+          network_share: "{{ raw_data_network_share }}"
+          cifs_credentials: /opt/smbcredentials-nextgen
+          cifs_credentials_volume: /opt
+          read_only: false
+      include_role:
+        name: mount_network_drives
+      vars:
+        mount_point: "{{ item.mount_point }}"
+        network_share: "{{ item.network_share }}"
+        cifs_credentials: "{{ item.cifs_credentials }}"
+        cifs_credentials_volume: "{{ item.cifs_credentials_volume }}"
+        read_only: "{{ item.read_only }}"
+
+- name: "mount macdatafile and RAW_DATA read-only on the worker nodes"
+  hosts: workers
+  become: true
+  tasks:
+    - name: do the mounts on the worker nodes
+      loop:
+        - mount_point: /media/macdatafile
+          network_share: "{{ macdatafile_network_share }}"
+          cifs_credentials: /opt/smbcredentials
+          cifs_credentials_volume: /opt
+          read_only: true
+
+        - mount_point: /media/RAW_DATA
+          network_share: "{{ raw_data_network_share }}"
+          cifs_credentials: /opt/smbcredentials-nextgen
+          cifs_credentials_volume: /opt
+          read_only: true
+      include_role:
+        name: mount_network_drives
+      vars:
+        mount_point: "{{ item.mount_point }}"
+        network_share: "{{ item.network_share }}"
+        cifs_credentials: "{{ item.cifs_credentials }}"
+        cifs_credentials_volume: "{{ item.cifs_credentials_volume }}"
+        read_only: "{{ item.read_only }}"
diff --git a/cluster-setup/testenv/notes-for-prod.md b/cluster-setup/deployment/notes-for-prod.md
similarity index 100%
rename from cluster-setup/testenv/notes-for-prod.md
rename to cluster-setup/deployment/notes-for-prod.md
diff --git a/cluster-setup/deployment/octomore_preliminary_setup.yaml b/cluster-setup/deployment/octomore_preliminary_setup.yaml
new file mode 100644
index 000000000..34fe20b4e
--- /dev/null
+++ b/cluster-setup/deployment/octomore_preliminary_setup.yaml
@@ -0,0 +1,18 @@
+---
+
+- name: preliminary setup to run on the Octomore head node
+  hosts: head
+  roles:
+    - create_data_filesystem
+    - head_node_internal_interface
+  tasks:
+    - name: create /opt
+      file:
+        path: /opt
+        owner: root
+        group: root
+        mode: '0755'
+        state: directory
+    - name: set up head node networking
+      include_role:
+        name: head_node_networking
diff --git a/cluster-setup/deployment/reassign_bootstrap_user_uid.yaml b/cluster-setup/deployment/reassign_bootstrap_user_uid.yaml
new file mode 100644
index 000000000..1d4d51cc2
--- /dev/null
+++ b/cluster-setup/deployment/reassign_bootstrap_user_uid.yaml
@@ -0,0 +1,22 @@
+---
+
+- name: assign a different UID and GID to the bootstrap user
+  hosts: all
+  vars:
+    user_name: ubuntu
+    group_name: ubuntu
+    old_gid: 1000
+    new_uid: 1020
+    new_gid: 1020
+  tasks:
+    - name: change UID
+      user:
+        name: "{{ user_name }}"
+        uid: "{{ new_uid }}"
+    - name: change GID
+      group:
+        name: "{{ group_name }}"
+        gid: "{{ new_gid }}"
+    - name: change group perms of the home folders
+      become: true
+      command: find /home/{{ user_name }} -group {{ old_gid }} -exec chgrp -h {{ group_name }} {} \;
diff --git a/cluster-setup/deployment/roles/copy_users_and_groups/defaults/main.yml b/cluster-setup/deployment/roles/copy_users_and_groups/defaults/main.yml
new file mode 100644
index 000000000..126e48421
--- /dev/null
+++ b/cluster-setup/deployment/roles/copy_users_and_groups/defaults/main.yml
@@ -0,0 +1,3 @@
+---
+
+copied_groups: []
diff --git a/cluster-setup/deployment/roles/copy_users_and_groups/tasks/main.yml b/cluster-setup/deployment/roles/copy_users_and_groups/tasks/main.yml
new file mode 100644
index 000000000..819dfdcdd
--- /dev/null
+++ b/cluster-setup/deployment/roles/copy_users_and_groups/tasks/main.yml
@@ -0,0 +1,83 @@
+---
+
+- name: read users
+  delegate_to: localhost
+  register: user_list
+  community.general.read_csv:
+    path: /etc/passwd
+    delimiter: ":"
+    fieldnames:
+      - name
+      - passwdx
+      - uid
+      - gid
+      - info
+      - home
+      - shell
+
+- name: read groups
+  delegate_to: localhost
+  register: group_list
+  community.general.read_csv:
+    path: /etc/group
+    delimiter: ":"
+    fieldnames:
+      - name
+      - passwdx
+      - gid
+      - users
+
+- name: record group members
+  loop: "{{ group_list.list }}"
+  when: item.name in copied_groups
+  set_fact:
+    group_name: "{{ item.name }}"
+    group_members: "{{ item.users.split(',') }}"
+  register: system_groups
+
+- name: build user groups
+  with_subelements:
+    - "{{ system_groups.results }}"
+    - ansible_facts.group_members
+  when: item.1 != ''
+  set_fact:
+    user_groups: "{{ user_groups | default({}) | combine({ item.1: [item.0.ansible_facts.group_name] }, list_merge='append') }}"
+
+- name: read system passwords
+  delegate_to: localhost
+  register: shadow_dict
+  community.general.read_csv:
+    path: /etc/shadow
+    delimiter: ":"
+    key: name
+    fieldnames:
+      - name
+      - passwd
+      - lastchanged
+      - min
+      - max
+      - warn
+      - inactive
+      - expire
+
+- name: copy system groups
+  loop: "{{ group_list.list }}"
+  when: >
+    (item.name in copied_groups) or
+    (item.name in shadow_dict.dict and shadow_dict.dict[item.name]['passwd'].startswith("$"))
+  group:
+    gid: "{{ item.gid }}"
+    name: "{{ item.name }}"
+
+- name: copy system users
+  loop: "{{ user_list.list }}"
+  when: shadow_dict.dict[item.name]['passwd'].startswith("$")
+  user:
+    uid: "{{ item.uid }}"
+    create_home: no
+    name: "{{ item.name }}"
+    password: "{{ shadow_dict.dict[item.name]['passwd'] }}"
+    group: "{{ item.name }}"
+    groups: "{{ user_groups[item.name] | default([]) }}"
+    append: true
+    shell: "{{ default_shell }}"
diff --git a/cluster-setup/deployment/roles/create_data_filesystem/tasks/main.yaml b/cluster-setup/deployment/roles/create_data_filesystem/tasks/main.yaml
new file mode 100644
index 000000000..2b2ba4305
--- /dev/null
+++ b/cluster-setup/deployment/roles/create_data_filesystem/tasks/main.yaml
@@ -0,0 +1,50 @@
+---
+
+- name: create a single partition on each of the physical volumes
+  loop: "{{ data_physical_volumes }}"
+  community.general.parted:
+    device: "/dev/disk/by-id/{{ item }}"
+    number: 1
+    state: present
+    label: gpt
+
+- name: construct a list of the partition names
+  block:
+    - name: initialize the list as empty
+      set_fact:
+        data_partition_names: []
+    - name: append names to the list
+      loop: "{{ data_physical_volumes }}"
+      set_fact:
+        data_partition_names: "{{ data_partition_names + ['/dev/disk/by-id/' ~ item ~ '-part1'] }}"
+
+- name: create a volume group out of the data partitions
+  lvg:
+      vg: data-vg
+      pvs: "{{ data_partition_names | join(',') }}"
+
+- name: create a logical volume from the volume group
+  community.general.lvol:
+    vg: data-vg
+    lv: data-lv
+    size: 100%VG
+
+- name: create the filesystem
+  community.general.filesystem:
+    fstype: ext4
+    dev: /dev/data-vg/data-lv
+
+- name: create the /data mount point
+  file:
+    path: /data
+    owner: root
+    group: root
+    mode: '0755'
+    state: directory
+
+- name: mount the filesystem
+  mount:
+    path: /data
+    src: /dev/data-vg/data-lv
+    fstype: ext4
+    state: mounted
diff --git a/cluster-setup/deployment/roles/head_node_internal_interface/tasks/main.yaml b/cluster-setup/deployment/roles/head_node_internal_interface/tasks/main.yaml
new file mode 100644
index 000000000..b56910779
--- /dev/null
+++ b/cluster-setup/deployment/roles/head_node_internal_interface/tasks/main.yaml
@@ -0,0 +1,15 @@
+---
+
+- name: generate and install netplan configuration for the internal-facing interface
+  become: true
+  template:
+    src: 60-internal-interface.yaml.j2
+    dest: /etc/netplan/60-internal-interface.yaml
+    owner: root
+    group: root
+    mode: "644"
+
+- name: apply the configuration
+  become: true
+  command:
+    argv: [netplan, apply]
diff --git a/cluster-setup/deployment/roles/head_node_internal_interface/templates/60-internal-interface.yaml.j2 b/cluster-setup/deployment/roles/head_node_internal_interface/templates/60-internal-interface.yaml.j2
new file mode 100644
index 000000000..7191f05a8
--- /dev/null
+++ b/cluster-setup/deployment/roles/head_node_internal_interface/templates/60-internal-interface.yaml.j2
@@ -0,0 +1,10 @@
+network:
+    ethernets:
+        internal0:
+            dhcp4: false
+            addresses:
+              - "{{ head_internal_address }}/{{ head_internal_mask_bits }}"
+            match:
+                macaddress: "{{ head_internal_interface_mac }}"
+            set-name: {{ head_internal_interface }}
+    version: 2
diff --git a/cluster-setup/deployment/roles/head_node_networking/tasks/main.yml b/cluster-setup/deployment/roles/head_node_networking/tasks/main.yml
new file mode 100644
index 000000000..30e8f6dc4
--- /dev/null
+++ b/cluster-setup/deployment/roles/head_node_networking/tasks/main.yml
@@ -0,0 +1,163 @@
+---
+
+- name: set timezone
+  block:
+    - name: change the timezone
+      community.general.timezone:
+        name: America/Vancouver
+    - name: restart cron to reflect the new timezone
+      systemd:
+        name: cron
+        state: restarted
+
+- name: check if original /home has been renamed
+  stat: path=/data/home
+  register: data_home
+
+- name: move /home to /data to make it accessible to workers
+  become: true
+  block:
+  - name: create /data
+    file:
+      path: /data
+      owner: root
+      group: root
+      mode: '0755'
+      state: directory
+  - name: move /home to /data/home
+    command: mv /home /data/home
+  when: not data_home.stat.exists
+
+- name: symbolic link for /home
+  become: true
+  file:
+    path: /home
+    src: /data/home
+    state: link
+
+- name: enable packet forwarding
+  become: true
+  blockinfile:
+    path: /etc/ufw/sysctl.conf
+    block: |
+      net/ipv4/ip_forward=1
+
+- name: configure IP masquerading
+  become: true
+  blockinfile:
+    path: /etc/ufw/before.rules
+    block: |
+       *nat
+       :POSTROUTING ACCEPT [0:0]
+       -A POSTROUTING -s {{ internal_subnet }}/{{ internal_mask_bits }} -o {{ head_external_interface }} -j MASQUERADE
+       COMMIT
+
+- name: restart ufw to allow the new rules to take effect
+  become: true
+  systemd:
+    name: ufw
+    state: restarted
+    enabled: true
+
+- name: allow forwarded packets from the compute nodes to traverse the firewall
+  become: true
+  community.general.ufw:
+    rule: allow
+    route: true
+    interface_in: "{{ head_internal_interface }}"
+    interface_out: "{{ head_external_interface }}"
+    from: "{{ internal_subnet }}/{{ internal_mask_bits }}"
+
+- name: open port for SSH access
+  become: true
+  community.general.ufw:
+    rule: allow
+    port: ssh
+    protocol: tcp
+
+- name: open NFS ports
+  become: true
+  block:
+    - name: open TCP port
+      community.general.ufw:
+        rule: allow
+        port: nfs
+        proto: tcp
+    - name: open UDP port
+      community.general.ufw:
+        rule: allow
+        port: nfs
+        proto: udp
+
+- name: open port for workers to communicate with slurmctld
+  become: true
+  community.general.ufw:
+    rule: allow
+    port: 6817
+    protocol: tcp
+
+- name: enable ufw
+  become: true
+  community.general.ufw:
+    state: enabled
+
+- name: install NFS server
+  become: true
+  apt:
+    name:
+      - nfs-kernel-server
+    state: present
+
+- name: start NFS service
+  systemd:
+    name: nfs-server
+    state: started
+    enabled: true
+
+- name: set up NFS exports
+  become: true
+  register: nfs_exports_file
+  blockinfile:
+    path: /etc/exports
+    block: |
+      /data              {{ nfs_export_to_hosts }}(rw,sync,no_all_squash,no_root_squash)
+      /usr/local         {{ nfs_export_to_hosts }}(ro,sync,no_root_squash)
+      /opt               {{ nfs_export_to_hosts }}(ro,sync,no_root_squash)
+
+- name: reload NFS exports
+  become: true
+  when: nfs_exports_file.changed
+  command: exportfs -r
+
+- name: install DHCP server
+  become: true
+  apt:
+    name:
+      - isc-dhcp-server
+
+- name: configure DHCP server
+  become: true
+  block:
+    - name: write conf file
+      template:
+        src: dhcpd.conf.j2
+        dest: /etc/dhcp/dhcpd.conf
+        owner: root
+        group: root
+        mode: "644"
+        backup: true
+    - name: tell dhcpd which interface to serve DHCP requests on
+      template:
+        src: isc-dhcp-server.j2
+        dest: /etc/default/isc-dhcp-server
+        owner: root
+        group: root
+        mode: "644"
+        backup: true
+
+- name: (re)start DHCP server
+  become: true
+  systemd:
+    name: isc-dhcp-server
+    state: restarted
+    enabled: true
diff --git a/cluster-setup/deployment/roles/head_node_networking/templates/dhcpd.conf.j2 b/cluster-setup/deployment/roles/head_node_networking/templates/dhcpd.conf.j2
new file mode 100644
index 000000000..998ded0ec
--- /dev/null
+++ b/cluster-setup/deployment/roles/head_node_networking/templates/dhcpd.conf.j2
@@ -0,0 +1,21 @@
+# This dhcpd configuration managed by Ansible.  Changes to this file will not be
+# persisted if the Ansible playbooks used to set up this machine are rerun!
+
+default-lease-time 600;
+max-lease-time 7200;
+
+option domain-name-servers 192.168.168.101;
+
+subnet {{ internal_subnet }} netmask {{ internal_netmask }} {
+  range {{ internal_dhcp_range[0] }} {{ internal_dhcp_range [1] }};
+  option routers {{ head_internal_address }};
+  option broadcast-address {{ internal_broadcast }};
+}
+
+{% for node in compute_nodes %}
+host {{ node.name }} {
+  option host-name {{ node.name }};
+  hardware ethernet {{ node.mac }};
+  fixed-address {{ node.ip }};
+}
+{% endfor %}
diff --git a/cluster-setup/deployment/roles/head_node_networking/templates/isc-dhcp-server.j2 b/cluster-setup/deployment/roles/head_node_networking/templates/isc-dhcp-server.j2
new file mode 100644
index 000000000..3e5a3763d
--- /dev/null
+++ b/cluster-setup/deployment/roles/head_node_networking/templates/isc-dhcp-server.j2
@@ -0,0 +1,6 @@
+# This isc-dhcp-server configuration file is managed by Ansible.
+# Changes to this file will not be persisted if the Ansible playbooks
+# used to set up this machine are rerun!
+
+INTERFACESv4="{{ head_internal_interface }}"
+INTERFACESv6=""
diff --git a/cluster-setup/deployment/roles/import_users_and_groups/tasks/main.yml b/cluster-setup/deployment/roles/import_users_and_groups/tasks/main.yml
new file mode 100644
index 000000000..9f6af9225
--- /dev/null
+++ b/cluster-setup/deployment/roles/import_users_and_groups/tasks/main.yml
@@ -0,0 +1,29 @@
+---
+
+- name: create groups
+  become: true
+  block:
+    - name: create imported users' primary groups
+      loop: "{{ primary_groups }}"
+      group:
+        gid: "{{ item.gid }}"
+        name: "{{ item.name }}"
+    - name: create all other imported groups
+      loop: "{{ other_groups }}"
+      group:
+        gid: "{{ item.gid }}"
+        name: "{{ item.name }}"
+
+- name: create users
+  become: true
+  loop: "{{ users }}"
+  user:
+    uid: "{{ item.uid }}"
+    create_home: yes
+    home: "{{ item.home }}"
+    name: "{{ item.name }}"
+    password: "{{ item.hashed_password }}"
+    group: "{{ item.primary_group }}"
+    groups: "{{ item.groups }}"
+    append: true
+    shell: "{{ default_shell }}"
diff --git a/roles/kive_node/README.md b/cluster-setup/deployment/roles/kive_node/README.md
similarity index 100%
rename from roles/kive_node/README.md
rename to cluster-setup/deployment/roles/kive_node/README.md
diff --git a/roles/kive_node/meta/main.yml b/cluster-setup/deployment/roles/kive_node/meta/main.yml
similarity index 100%
rename from roles/kive_node/meta/main.yml
rename to cluster-setup/deployment/roles/kive_node/meta/main.yml
diff --git a/cluster-setup/deployment/roles/kive_node/tasks/main.yml b/cluster-setup/deployment/roles/kive_node/tasks/main.yml
new file mode 100644
index 000000000..d49395bfe
--- /dev/null
+++ b/cluster-setup/deployment/roles/kive_node/tasks/main.yml
@@ -0,0 +1,65 @@
+---
+
+- name: add the deadsnakes PPA to get old versions of Python
+  become: true
+  apt_repository:
+    repo: ppa:deadsnakes/ppa
+
+- name: install the version of Python required by kive
+  become: true
+  apt:
+    name:
+      - "{{ kive_python_package }}"
+      - "{{ kive_python_package }}-distutils"
+      - "{{ kive_python_package }}-venv"
+      - "{{ kive_python_package }}-dev"
+      - "lib{{ kive_python_package }}-dev"
+
+- name: install pip for this version of Python
+  become: true
+  shell: "curl https://bootstrap.pypa.io/get-pip.py | sudo {{ kive_python_executable }}"
+
+- name: kive package dependencies
+  become: true
+  apt:
+    name:
+#      - python3-dev
+#      - python3-venv
+      - libsqlite3-dev
+      - wcanadian
+      - lsof
+      - graphviz
+      - libgraphviz-dev
+
+- name: install postgres database libraries
+  become: true
+  apt:
+    name:
+      - postgresql-client
+
+- name: configure mail service for error logging
+  block:
+    - name: install postfix
+      become: true
+      apt:
+        name:
+          - postfix
+    - name: start postfix daemon
+      systemd:
+        name: postfix
+        state: started
+        enabled: true
+
+- name: create kive group
+  become: true
+  group:
+    name: kive
+    gid: 762  # random gid in system uid range (200, 999); hard-coded for consistency across hosts
+
+- name: create kive user
+  become: true
+  user:
+    name: kive
+    system: yes
+    uid: 762  # random uid in system uid range (200, 999); hard-coded for consistency across hosts
+    group: kive
diff --git a/roles/kive_server/README.md b/cluster-setup/deployment/roles/kive_server/README.md
similarity index 89%
rename from roles/kive_server/README.md
rename to cluster-setup/deployment/roles/kive_server/README.md
index 9d16ad651..1dfb7393b 100644
--- a/roles/kive_server/README.md
+++ b/cluster-setup/deployment/roles/kive_server/README.md
@@ -3,7 +3,7 @@ server. It also applies the [Slurm controller] and [Slurm worker] roles.
 
 [Kive node]: ../kive_node
 [Slurm controller]: ../slurm_controller
-[Slurm worker]: ../slurm_worker
+[Slurm worker]: ../worker_node_networking
 
 It includes:
 
diff --git a/cluster-setup/deployment/roles/kive_server/files/001-kive-ssl.conf b/cluster-setup/deployment/roles/kive_server/files/001-kive-ssl.conf
new file mode 100644
index 000000000..748a7d1cf
--- /dev/null
+++ b/cluster-setup/deployment/roles/kive_server/files/001-kive-ssl.conf
@@ -0,0 +1,30 @@
+# SSL configuration for the Kive web portal.  This file was created by copying
+# and modifying `/etc/apache2/sites-available/default-ssl.conf`.  That file
+# has some helpful comments that may be useful to look at if you ever need
+# to further adjust this file.
+
+<IfModule mod_ssl.c>
+    <VirtualHost _default_:443>
+        ServerAdmin webmaster@localhost
+
+        DocumentRoot /var/www/html
+
+        ErrorLog ${APACHE_LOG_DIR}/error.log
+        CustomLog ${APACHE_LOG_DIR}/access.log combined
+
+        SSLEngine on
+
+        SSLCertificateFile      /etc/ssl/certs/star_cfe.crt
+        SSLCertificateKeyFile /etc/ssl/private/star_cfe.key
+        SSLCertificateChainFile /etc/ssl/certs/DigiCertCA.crt
+
+        #SSLOptions +FakeBasicAuth +ExportCertData +StrictRequire
+        <FilesMatch "\.(cgi|shtml|phtml|php)$">
+                        SSLOptions +StdEnvVars
+        </FilesMatch>
+        <Directory /usr/lib/cgi-bin>
+                        SSLOptions +StdEnvVars
+        </Directory>
+
+    </VirtualHost>
+</IfModule>
diff --git a/roles/kive_server/files/barman_backup.timer b/cluster-setup/deployment/roles/kive_server/files/barman_backup.timer
similarity index 100%
rename from roles/kive_server/files/barman_backup.timer
rename to cluster-setup/deployment/roles/kive_server/files/barman_backup.timer
diff --git a/roles/kive_server/files/crontab_mail.py b/cluster-setup/deployment/roles/kive_server/files/crontab_mail.py
similarity index 100%
rename from roles/kive_server/files/crontab_mail.py
rename to cluster-setup/deployment/roles/kive_server/files/crontab_mail.py
diff --git a/roles/kive_server/files/kive_purge.service b/cluster-setup/deployment/roles/kive_server/files/kive_purge.service
similarity index 100%
rename from roles/kive_server/files/kive_purge.service
rename to cluster-setup/deployment/roles/kive_server/files/kive_purge.service
diff --git a/roles/kive_server/files/kive_purge.timer b/cluster-setup/deployment/roles/kive_server/files/kive_purge.timer
similarity index 100%
rename from roles/kive_server/files/kive_purge.timer
rename to cluster-setup/deployment/roles/kive_server/files/kive_purge.timer
diff --git a/roles/kive_server/files/kive_purge_synch.service b/cluster-setup/deployment/roles/kive_server/files/kive_purge_synch.service
similarity index 100%
rename from roles/kive_server/files/kive_purge_synch.service
rename to cluster-setup/deployment/roles/kive_server/files/kive_purge_synch.service
diff --git a/roles/kive_server/files/kive_purge_synch.timer b/cluster-setup/deployment/roles/kive_server/files/kive_purge_synch.timer
similarity index 100%
rename from roles/kive_server/files/kive_purge_synch.timer
rename to cluster-setup/deployment/roles/kive_server/files/kive_purge_synch.timer
diff --git a/roles/kive_server/files/purge_apache_logs b/cluster-setup/deployment/roles/kive_server/files/purge_apache_logs
similarity index 54%
rename from roles/kive_server/files/purge_apache_logs
rename to cluster-setup/deployment/roles/kive_server/files/purge_apache_logs
index 69bf4a03e..33d81bc7b 100644
--- a/roles/kive_server/files/purge_apache_logs
+++ b/cluster-setup/deployment/roles/kive_server/files/purge_apache_logs
@@ -1,5 +1,5 @@
 #!/usr/bin/bash
 # This removes all but the 10 most recent error_log files.
-ls -r /var/log/httpd/error_log*|tail -n+11|xargs rm -f
+ls -r /var/log/apache2/error_log*|tail -n+11|xargs rm -f
 # This removes all but the 10 most recent access_log files.
-ls -r /var/log/httpd/access_log*|tail -n+11|xargs rm -f
+ls -r /var/log/apache2/access_log*|tail -n+11|xargs rm -f
diff --git a/roles/kive_server/files/rsnapshot_alpha.timer b/cluster-setup/deployment/roles/kive_server/files/rsnapshot_alpha.timer
similarity index 100%
rename from roles/kive_server/files/rsnapshot_alpha.timer
rename to cluster-setup/deployment/roles/kive_server/files/rsnapshot_alpha.timer
diff --git a/roles/kive_server/files/rsnapshot_beta.timer b/cluster-setup/deployment/roles/kive_server/files/rsnapshot_beta.timer
similarity index 100%
rename from roles/kive_server/files/rsnapshot_beta.timer
rename to cluster-setup/deployment/roles/kive_server/files/rsnapshot_beta.timer
diff --git a/roles/kive_server/files/rsnapshot_gamma.timer b/cluster-setup/deployment/roles/kive_server/files/rsnapshot_gamma.timer
similarity index 100%
rename from roles/kive_server/files/rsnapshot_gamma.timer
rename to cluster-setup/deployment/roles/kive_server/files/rsnapshot_gamma.timer
diff --git a/cluster-setup/deployment/roles/kive_server/files/wsgi.load b/cluster-setup/deployment/roles/kive_server/files/wsgi.load
new file mode 100644
index 000000000..d76d1d7a3
--- /dev/null
+++ b/cluster-setup/deployment/roles/kive_server/files/wsgi.load
@@ -0,0 +1 @@
+LoadModule wsgi_module /usr/lib/apache2/modules/mod_wsgi.so
diff --git a/roles/kive_server/handlers/main.yml b/cluster-setup/deployment/roles/kive_server/handlers/main.yml
similarity index 68%
rename from roles/kive_server/handlers/main.yml
rename to cluster-setup/deployment/roles/kive_server/handlers/main.yml
index 8b42c90cb..66639bd0e 100644
--- a/roles/kive_server/handlers/main.yml
+++ b/cluster-setup/deployment/roles/kive_server/handlers/main.yml
@@ -2,10 +2,10 @@
   become: true
   become_user: root
   systemd:
-    name: "postgresql-12"
+    name: "postgresql"
     state: restarted
-- name: restart http server
+- name: restart web server
   become: true
   systemd:
-    name: httpd
+    name: apache2
     state: restarted
diff --git a/roles/kive_server/meta/main.yml b/cluster-setup/deployment/roles/kive_server/meta/main.yml
similarity index 100%
rename from roles/kive_server/meta/main.yml
rename to cluster-setup/deployment/roles/kive_server/meta/main.yml
diff --git a/roles/kive_server/tasks/main.yml b/cluster-setup/deployment/roles/kive_server/tasks/main.yml
similarity index 50%
rename from roles/kive_server/tasks/main.yml
rename to cluster-setup/deployment/roles/kive_server/tasks/main.yml
index 599c34fd0..354590718 100644
--- a/roles/kive_server/tasks/main.yml
+++ b/cluster-setup/deployment/roles/kive_server/tasks/main.yml
@@ -1,23 +1,233 @@
 ---
 
+# NOTE(nknight): this is done with `file` instead of during user creation so that we
+# can set the permissions explicitly.
+- name: create kive home directory
+  file:
+    path: /home/kive/
+    state: directory
+    mode: "go-rx"
+    group: kive
+    owner: kive
+
+
+- name: create kive app directories
+  become: true
+  loop:
+    - /etc/kive/
+    - /var/kive/
+    - /var/log/kive/
+    - "{{ kive_media_root }}"
+  file:
+    path: "{{ item }}"
+    state: directory
+    mode: "2770"
+    owner: kive
+    group: kive
+
+
+- name: kive environment configuration
+  become: true
+  become_user: kive
+  block:
+    - name: set kive environment variables and activate the virtualenv
+      blockinfile:
+        path: /home/kive/.bash_profile
+        block: |
+          export KIVE_DB_NAME={{ kive_db_name }}
+          export KIVE_DB_USER={{ kive_db_user }}
+          export KIVE_DB_HOST={{ kive_db_host }}
+          export KIVE_DB_PASSWORD={{ kive_db_password }}
+
+          export KIVE_MEDIA_ROOT={{ kive_media_root }}
+          export KIVE_STATIC_ROOT={{ kive_static_root }}
+          export KIVE_SLURM_PATH={{ kive_slurm_path }}
+          source {{ kive_venv }}/bin/activate
+        create: true  # create the file if it doesn't exist
+        backup: true
+        owner: kive
+        group: kive
+
 - name: install package dependencies
+  become: true
+  apt:
+    update_cache: true
+    name:
+      - postgresql
+      - apache2
+      - apache2-utils
+      - apache2-dev
+      # - libapache2-mod-wsgi-py3
+      - barman
+      - barman-cli
+      - rsnapshot
+
+- name: fetch kive source code
+  become: true
+  git:
+    dest: "{{ kive_root }}"
+    repo: https://github.com/cfe-lab/Kive.git
+    version: "{{ kive_version | default('master') }}"
+    update: "{{ update_kive_source | default('no') }}"
+
+- name: set up the Kive Python virtualenv
   become: true
   block:
-    - name: install postgresql
-      dnf:
-        name:
-          - postgresql12-server
-          - httpd
-          - python3-mod_wsgi
+    - name: create directory for virtualenv
+      file:
+        path: "{{ kive_venv }}"
+        state: directory
+    - name: copy requirements file to track changes
+      register: kive_requirements
+      copy:
+        dest: "{{ kive_venv }}/requirements.txt"
+        src: "{{ kive_root }}/requirements.txt"
+    - name: kive python dependencies
+      when: kive_requirements.changed
+      pip:
+        requirements: "{{ kive_root }}/requirements.txt"
+        virtualenv_command: "{{ kive_python_executable }} -m venv"
+        virtualenv: "{{ kive_venv }}"
 
+- name: install mod_wsgi from source
+  become: true
+  block:
+    - name: check if mod_wsgi slurm source files are already downloaded
+      stat:
+        path: "/usr/local/src/{{ mod_wsgi_tarball }}"
+      register: mod_wsgi_download
 
-- name: httpd configuration
+    - name: fetch mod_wsgi source files
+      become: true
+      get_url:
+        url: "{{ mod_wsgi_source_url }}"
+        dest: "/usr/local/src/{{ mod_wsgi_tarball }}"
+        checksum: "sha256:{{ mod_wsgi_sha256_checksum }}"
+      when: not mod_wsgi_download.stat.exists
+
+    - name: decompress mod_wsgi tarball
+      unarchive:
+        remote_src: true
+        src: "/usr/local/src/{{ mod_wsgi_tarball }}"
+        dest: "/usr/local/src"
+        owner: root
+        group: root
+
+    - name: make a link to the mod_wsgi source code directory
+      file:
+        src: "/usr/local/src/{{ mod_wsgi_basename }}"
+        dest: "/usr/local/src/mod_wsgi"
+        state: link
+
+    - name: configure mod_wsgi build
+      command:
+        argv:
+          - "/usr/local/src/mod_wsgi/configure"
+          - "--with-python=/usr/bin/python3.7"
+        chdir: "/usr/local/src/mod_wsgi"
+        creates: "/usr/local/src/mod_wsgi/Makefile"
+
+    - name: build and install mod_wsgi
+      make:
+        chdir: "/usr/local/src/mod_wsgi"
+        target: install
+
+    - name: add wsgi to the "modules available"
+      copy:
+        src: wsgi.load
+        dest: /etc/apache2/mods-available
+
+    - name: enable the wsgi module in apache
+      community.general.apache2_module:
+        state: present
+        name: wsgi
+
+# This is following the general instructions for Ubuntu SSL support
+# in `/usr/share/doc/apache2/README.Debian.gz`.
+# For the certificates to be installed, they should be placed in the
+# directory you're running Ansible in, with the desired names.
+- name: enable Apache SSL support
   become: true
   block:
-    - name: httpd conf file
+    - name: enable the SSL module
+      community.general.apache2_module:
+        state: present
+        name: ssl
+
+    - name: install SSL certificate
+      copy:
+        src: star_cfe.crt
+        dest: /etc/ssl/certs/star_cfe.crt
+        owner: root
+        group: root
+
+    - name: install SSL key
       copy:
-        src: 001-kive.conf
-        dest: /etc/httpd/conf.d/
+        src: star_cfe.key
+        dest: /etc/ssl/private/star_cfe.key
+        owner: root
+        group: root
+        mode: "0600"
+
+    - name: install SSL certificate chain file
+      copy:
+        src: DigiCertCA.crt
+        dest: /etc/ssl/certs/DigiCertCA.crt
+        owner: root
+        group: root
+
+
+- name: set up log purging
+  become: true
+  block:
+    - name: apache log purge script
+      copy:
+        src: purge_apache_logs
+        dest: /usr/sbin
+        mode: "755"
+    - name: purge service files
+      loop:
+        - kive_purge.service
+        - kive_purge.timer
+        - kive_purge_synch.service
+        - kive_purge_synch.timer
+      copy:
+        src: "{{ item }}"
+        dest: /etc/systemd/system
+    - name: purge config file
+      template:
+        src: kive_purge.conf.j2
+        dest: /etc/kive/kive_purge.conf
+        owner: kive
+        group: kive
+        mode: "640"
+    - name: enable and start kive_purge timers
+      ignore_errors: "{{ ansible_check_mode }}"  # Unit files not copied yet
+      loop:
+        - kive_purge.timer
+        - kive_purge_synch.timer
+      systemd:
+        name: "{{ item }}"
+        enabled: true
+        state: started
+
+
+- name: web server configuration
+  become: true
+  block:
+    - name: Kive-specific configuration
+      block:
+        - name: install Kive-specific config file
+          template:
+            src: 001-kive.conf.j2
+            dest: /etc/apache2/conf-available/001-kive.conf
+            owner: root
+            group: root
+            mode: "644"
+        - name: enable Kive-specific configuration
+          command:
+            cmd: "a2enconf 001-kive"
+
     - name: kive web conf file
       template:
         src: kive_apache.conf.j2
@@ -25,161 +235,185 @@
         owner: kive
         group: kive
         mode: "640"
+
     - name: server environment variables via systemd
       blockinfile:
-        path: /etc/systemd/system/httpd.service.d/override.conf
+        path: /etc/systemd/system/apache2.service.d/override.conf
         create: true
         backup: true
         mode: "644"
         block: |
           [Service]
           EnvironmentFile=/etc/kive/kive_apache.conf
-          Environment=APACHE_RUN_USER=kive
-          Environment=APACHE_RUN_GROUP=kive
-    - name: update httpd.conf
-      loop:
-        - from: "Listen 80$"
-          to: "Listen {{ kive_listen_port }}"
-        - from: "User apache$"
-          to: "User {{ kive_httpd_user }}"
-        - from: "Group apache$"
-          to: "Group {{kive_httpd_group }}"
-      replace:
-        path: /etc/httpd/conf/httpd.conf
-        regexp: "{{ item.from }}"
-        replace: "{{ item.to }}"
-    - name: apache log purge settings
-      loop:
-        - from: 'ErrorLog "logs/error_log"'
-          to: ErrorLog "|/usr/sbin/rotatelogs -l -p /usr/sbin/purge_apache_logs /var/log/httpd/error_log.%Y-%m-%d-%H%M%S 15M
-        - from: 'CustomLog "logs/access_log" combined'
-          to: CustomLog "|/usr/sbin/rotatelogs -l -p /usr/sbin/purge_apache_logs /var/log/httpd/access_log.%Y-%m-%d-%H%M%S 15M" combined
-      replace:
-        path: /etc/httpd/conf/httpd.conf
-        regexp: "{{ item.from }}"
-        replace: "{{ item.to }}"
-    - name: enable httpd
+
+    - name: update apache2 envvars
+      blockinfile:
+        path: /etc/apache2/envvars
+        backup: true
+        block: |
+          export APACHE_RUN_USER=kive
+          export APACHE_RUN_GROUP=kive
+
+#    - name: update apache2 port
+#      blockinfile:
+#        path: /etc/apache2/ports.conf
+#        backup: true
+#        block: |
+#          Listen {{ kive_listen_port }}
+
+    - name: set up rotating apache logs
+      block:
+        - name: add rotating logs configuration file
+          blockinfile:
+            path: /etc/apache2/conf-available/rotate-kive-logs.conf
+            create: true
+            backup: true
+            mode: "644"
+            block: |
+              ErrorLog "|/usr/bin/rotatelogs -l -p /usr/sbin/purge_apache_logs /var/log/apache2/error_log.%Y-%m-%d-%H%M%S 15M"
+              CustomLog "|/usr/bin/rotatelogs -l -p /usr/sbin/purge_apache_logs /var/log/apache2/access_log.%Y-%m-%d-%H%M%S 15M" combined
+        - name: activate rotating logs configuration
+          command:
+            cmd: "a2enconf rotate-kive-logs"
+
+    - name: install the httpd site configuration for Kive
+      copy:
+        src: 001-kive-ssl.conf
+        dest: /etc/apache2/sites-available
+        owner: root
+        group: root
+
+    - name: enable the httpd site configuration for Kive
+      command:
+        cmd: "a2ensite 001-kive-ssl"
+
+    - name: enable and (re)start apache2
       systemd:
-        name: httpd
-        state: started
+        name: apache2
+        state: restarted
         enabled: true
-    - name: ensure firewalld is running
+        daemon_reload: true
+
+    - name: ensure ufw (the firewall) is running
       systemd:
-        name: firewalld
-        state: started
+        name: ufw
+        state: restarted
         enabled: true
-    - name: firewall's internal interface
-      ansible.posix.firewalld:
-        zone: internal
-        interface: "{{ kive_internal_interface }}"
-        permanent: yes
-        immediate: yes
-        state: enabled
-    - name: firewall's external interface
-      ansible.posix.firewalld:
-        zone: external
-        interface: "{{ kive_external_interface }}"
-        permanent: yes
-        immediate: yes
-        state: enabled
-    - name: firewall's internal interface should ACCEPT
-      register: internal_accept
-      ansible.posix.firewalld:
-        zone: internal
-        target: ACCEPT
-        permanent: yes
+
+#    - name: firewall's internal interface should ACCEPT
+#      register: internal_accept
+#      community.general.ufw:
+#        default: allow
+#        interface: "{{ head_internal_interface }}"
+#        direction: incoming
+#        state: enabled
+#
+#    - name: reload internal interface firewall config
+#      when: internal_accept.changed
+#      community.general.ufw:
+#        interface: "{{ head_internal_interface }}"
+#        state: reloaded
+
+    - name: permit access to Postgres on the internal interface
+      community.general.ufw:
+        interface: "{{ head_internal_interface }}"
+        direction: in
+        port: 5432
+        protocol: tcp
+        rule: allow
         state: enabled
-    - name: reload firewall config
-      when: internal_accept.changed
-      command: firewall-cmd --reload
+
     - name: permit https service through firewall
-      ansible.posix.firewalld:
-        zone: external
-        service: https
-        permanent: yes
-        immediate: yes
+      community.general.ufw:
+        interface: "{{ head_external_interface }}"
+        direction: in
+        port: https
+        protocol: tcp
+        rule: allow
         state: enabled
+
     - name: permit http service through firewall
-      ansible.posix.firewalld:
-        zone: external
-        service: http
-        permanent: yes
-        immediate: yes
+      community.general.ufw:
+        interface: "{{ head_external_interface }}"
+        direction: in
+        port: http
+        protocol: tcp
+        rule: allow
         state: enabled
 
+- name: enable and start database service
+  become: true
+  systemd:
+    name: "postgresql@14-main"
+    state: started
+    enabled: true
 
 - name: configure postgres server
   become: true
   become_user: postgres
   block:
-    - name: ensure database is initialized
-      become: true
-      become_user: root
-      command:
-        cmd: "/usr/pgsql-12/bin/postgresql-12-setup initdb"
-        creates: "/var/lib/pgsql/12/data/PG_VERSION"
-    - name: start database service
-      become: true
-      become_user: root
-      systemd:
-        name: "postgresql-12"
-        state: started
-        enabled: true
+
     - name: add kive entries to pg_hba.conf
       block:
         - name: local connections
           notify: Restart postgresql service
           postgresql_pg_hba:
             contype: local
-            dest: "/var/lib/pgsql/12/data/pg_hba.conf"
+            dest: "/etc/postgresql/14/main/pg_hba.conf"
             databases: all
             users: kive
             method: "scram-sha-256"
+
         - name: host connections
           notify: Restart postgresql service
           postgresql_pg_hba:
             contype: host
-            dest: "/var/lib/pgsql/12/data/pg_hba.conf"
+            dest: "/etc/postgresql/14/main/pg_hba.conf"
             databases: all
             users: kive
-            source: "{{kive_db_host}}/24"
+            source: "{{ kive_db_subnet }}"
             method: "scram-sha-256"
+
         - name: barman connections
           notify: Restart postgresql service
           postgresql_pg_hba:
             contype: host
-            dest: "/var/lib/pgsql/12/data/pg_hba.conf"
+            dest: "/etc/postgresql/14/main/pg_hba.conf"
             databases: all
             users: barman
             source: 127.0.0.1/32
             method: "scram-sha-256"
+
         - name: barman IPv6 connections
           notify: Restart postgresql service
           postgresql_pg_hba:
             contype: host
-            dest: "/var/lib/pgsql/12/data/pg_hba.conf"
+            dest: "/etc/postgresql/14/main/pg_hba.conf"
             databases: all
             users: barman
             source: "::1/128"
             method: "scram-sha-256"
+
         - name: streaming_barman connections
           notify: Restart postgresql service
           postgresql_pg_hba:
             contype: host
-            dest: "/var/lib/pgsql/12/data/pg_hba.conf"
+            dest: "/etc/postgresql/14/main/pg_hba.conf"
             databases: replication
             users: streaming_barman
             source: 127.0.0.1/32
             method: "scram-sha-256"
+
         - name: streaming_barman IPv6 connections
           notify: Restart postgresql service
           postgresql_pg_hba:
             contype: host
-            dest: "/var/lib/pgsql/12/data/pg_hba.conf"
+            dest: "/etc/postgresql/14/main/pg_hba.conf"
             databases: replication
             users: streaming_barman
             source: "::1/128"
             method: "scram-sha-256"
+
     - name: local database settings in postgresql.conf
       notify: Restart postgresql service
       loop:
@@ -197,16 +431,16 @@
           value: 10
         - option: max_replication_slots
           value: 10
-          # Additionally, setting up standard WAL archiving.
-        - option: archive_mode
-          value: "on"
-        - option: archive_command
-          value: "'barman-wal-archive localhost kive %p'"
+#          # Additionally, setting up standard WAL archiving.
+#        - option: archive_mode
+#          value: "on"
+#        - option: archive_command
+#          value: "'barman-wal-archive localhost kive %p'"
         # Remove old option that was accidentally added.
         - option: archiver
       community.general.ini_file:
         backup: yes
-        path: /var/lib/pgsql/12/data/postgresql.conf
+        path: "/etc/postgresql/14/main/postgresql.conf"
         create: no
         section: null
         option: "{{ item.option }}"
@@ -214,6 +448,10 @@
         state: "{{ 'present' if item.value is defined else 'absent' }}"
 
 
+- name: Flush handlers
+  meta: flush_handlers
+
+
 - name: kive installation
   block:
     - name: build kive API
@@ -223,7 +461,7 @@
         chdir: "{{ kive_root }}/api/"
         creates: "{{ kive_root }}/api/build/"
     - name: collect kive's static files
-      notify: restart http server
+      # notify: restart web server
       environment:
         KIVE_STATIC_ROOT: "{{ kive_static_root }}"
       community.general.django_manage:
@@ -232,42 +470,6 @@
         virtualenv: "{{ kive_venv }}"
 
 
-- name: set up log purging
-  become: true
-  block:
-    - name: apache log purge script
-      copy:
-        src: purge_apache_logs
-        dest: /usr/sbin
-        mode: "755"
-    - name: purge service files
-      loop:
-        - kive_purge.service
-        - kive_purge.timer
-        - kive_purge_synch.service
-        - kive_purge_synch.timer
-      copy:
-        src: "{{ item }}"
-        dest: /etc/systemd/system
-    - name: purge config file
-      template:
-        src: kive_purge.conf.j2
-        dest: /etc/kive/kive_purge.conf
-        owner: kive
-        group: kive
-        mode: "640"
-    - name: enable and start kive_purge timers
-      ignore_errors: "{{ ansible_check_mode }}"  # Unit files not copied yet
-      loop:
-        - kive_purge.timer
-        - kive_purge_synch.timer
-      systemd:
-        name: "{{ item }}"
-        enabled: true
-        state: started
-
-
-
 - name: set up kive database
   block:
     - name: create kive database
@@ -284,6 +486,13 @@
         name: kive
         password: "{{ kive_db_password }}"
         db: kive
+    - name: grant kive database user all privileges
+      become: true
+      become_user: postgres
+      postgresql_privs:
+        role: kive
+        db: kive
+        type: database
         priv: ALL
     - name: create barman database user
       become: true
@@ -309,8 +518,8 @@
       environment:
           KIVE_DB_NAME: "{{ kive_db_name }}"
           KIVE_DB_USER: "{{ kive_db_user }}"
-          KIVE_DB_HOST: "{{kive_db_host }}"
-          KIVE_DB_PASSWORD: "{{ kive_db_password}}"
+          KIVE_DB_HOST: "{{ kive_db_host }}"
+          KIVE_DB_PASSWORD: "{{ kive_db_password }}"
       become: true
       become_user: kive
       community.general.django_manage:
@@ -318,22 +527,42 @@
         app_path: "{{ kive_root }}/kive"
         virtualenv: "{{ kive_venv }}"
 
-- name: database backup global config
-  loop:
-    - option: path_prefix
-      value: /usr/pgsql-12/bin/
-    - option: archiver
-      value: "on"
-  community.general.ini_file:
-    backup: yes
-    path: /etc/barman.conf
-    create: no
-    section: barman
-    option: "{{ item.option }}"
-    value: "{{ item.value }}"
+#- name: database backup global config
+#  loop:
+#    - option: path_prefix
+#      value: /usr/pgsql-12/bin/
+#    - option: archiver
+#      value: "on"
+#  community.general.ini_file:
+#    backup: yes
+#    path: /etc/barman.conf
+#    create: no
+#    section: barman
+#    option: "{{ item.option }}"
+#    value: "{{ item.value }}"
+
+- name: create the Barman backup directory
+  file:
+    path: "{{ kive_backup_path }}/BarmanDBBackup"
+    state: directory
+    owner: barman
+    group: barman
+
+- name: barman passwords file
+  blockinfile:
+    path: /var/lib/barman/.pgpass
+    create: yes
+    owner: barman
+    group: barman
+    mode: u=rw,g=,o=
+    block: |
+      localhost:*:*:barman:{{ barman_password }}
+      localhost:*:*:streaming_barman:{{ streaming_barman_password }}
 
 - name: database backup kive config
   loop:
+    - option: path_prefix
+      value: /usr/lib/postgresql/14/bin
     - option: description
       value: Kive database
     - option: conninfo
@@ -346,9 +575,9 @@
       value: bzip2
     # archiver = on is necessary for the "fallback" WAL backup that happens via
     # PostgreSQL's archive_command setting in postgresql.conf (which we have
-    # configured to use barman-wal-archive
-    - option: archiver
-      value: "on"
+    # configured to use barman-wal-archive)
+#    - option: archiver
+#      value: "on"
     - option: streaming_conninfo
       value: host=localhost user=streaming_barman dbname=kive
     - option: streaming_archiver
@@ -367,6 +596,11 @@
     option: "{{ item.option }}"
     value: "{{ item.value }}"
 
+#- name: force a WAL switch to verify the WAL archiving process
+#  become: true
+#  become_user: barman
+#  command: barman switch-wal --force --archive kive
+
 - name: SSH keys
   block:
     - name: Set key locations
@@ -403,17 +637,8 @@
         user: postgres
         key: "{{ lookup('file', '/var/lib/barman/.ssh/id_rsa.pub') }}"
 
-- name: barman passwords file
-  blockinfile:
-    path: /var/lib/barman/.pgpass
-    create: yes
-    owner: barman
-    group: barman
-    mode: u=rw,g=,o=
-    block: |
-      localhost:*:*:barman:{{ barman_password }}
-      localhost:*:*:streaming_barman:{{ streaming_barman_password }}
-
+# This variable should be a JSON-formatted list of 2-lists, like
+# [["User One", "userone@bccfe.ca"], ..., ["User N", "userN@bccfe.ca"]]
 - name: parse admin e-mails from json
   set_fact:
     mail_admins_list: "{{ kive_admins | from_json }}"
@@ -421,7 +646,7 @@
 - name: build admin e-mails list
   loop: "{{ mail_admins_list }}"
   set_fact:
-    mail_admins_emails: "{{ (mail_admins_emails | default([])) + [item.1] }}"
+    mail_admins_emails: "{{ (mail_admins_emails | default([])) + [item[1]] }}"
 
 # Copy crontab_mail.py out of the Kive source code, because it gets run by root.
 # This way, it can be locked down more than the Kive source code.
@@ -432,10 +657,17 @@
     owner: root
     group: root
     mode: u=rw,g=r,o=r
+
+- name: create the rsnapshot backup directory
+  file:
+    path: "{{ kive_backup_path }}/rsnapshot"
+    state: directory
+
 - name: rsnapshot config file
   template:
     src: rsnapshot.conf.j2
     dest: /etc/rsnapshot.conf
+
 - name: list of scheduled services
   set_fact:
     scheduled_service_names:
@@ -443,6 +675,7 @@
       - rsnapshot_alpha
       - rsnapshot_beta
       - rsnapshot_gamma
+
 - name: scheduled service files
   loop: "{{ scheduled_service_names }}"
   template:
diff --git a/cluster-setup/deployment/roles/kive_server/templates/001-kive.conf.j2 b/cluster-setup/deployment/roles/kive_server/templates/001-kive.conf.j2
new file mode 100644
index 000000000..991574ea0
--- /dev/null
+++ b/cluster-setup/deployment/roles/kive_server/templates/001-kive.conf.j2
@@ -0,0 +1,15 @@
+WSGIScriptAlias / {{ kive_root }}/kive/kive/wsgi.py
+WSGIPythonPath {{ kive_root }}/kive:{{ kive_venv }}/lib/{{ kive_python_executable }}/site-packages
+
+<Directory {{ kive_root }}/kive/kive>
+<Files wsgi.py>
+Require all granted
+</Files>
+</Directory>
+
+Alias /static {{ kive_static_root }}
+
+<Directory {{ kive_static_root }}>
+Order deny,allow
+Allow from all
+</Directory>
diff --git a/roles/kive_server/templates/barman_backup.service.j2 b/cluster-setup/deployment/roles/kive_server/templates/barman_backup.service.j2
similarity index 82%
rename from roles/kive_server/templates/barman_backup.service.j2
rename to cluster-setup/deployment/roles/kive_server/templates/barman_backup.service.j2
index 0a0bd07e4..de64f46d3 100644
--- a/roles/kive_server/templates/barman_backup.service.j2
+++ b/cluster-setup/deployment/roles/kive_server/templates/barman_backup.service.j2
@@ -7,7 +7,7 @@ User=barman
 ExecStart=/opt/venv_kive/bin/python /opt/crontab_mail.py \
     --log /var/log/barman/kive.log \
     --level ERROR \
-    --subject {{ (kive_subject_prefix + "barman") | quote }} \
+    --subject {{ (kive_subject_prefix + " barman") | quote }} \
     --from {{ kive_server_email | quote }} \
     {{ mail_admins_emails | join(',') | quote }} \
     /bin/barman backup kive
diff --git a/roles/kive_server/templates/kive_apache.conf.j2 b/cluster-setup/deployment/roles/kive_server/templates/kive_apache.conf.j2
similarity index 100%
rename from roles/kive_server/templates/kive_apache.conf.j2
rename to cluster-setup/deployment/roles/kive_server/templates/kive_apache.conf.j2
diff --git a/roles/kive_server/templates/kive_purge.conf.j2 b/cluster-setup/deployment/roles/kive_server/templates/kive_purge.conf.j2
similarity index 72%
rename from roles/kive_server/templates/kive_purge.conf.j2
rename to cluster-setup/deployment/roles/kive_server/templates/kive_purge.conf.j2
index 70b53edaf..aa9ad0eec 100644
--- a/roles/kive_server/templates/kive_purge.conf.j2
+++ b/cluster-setup/deployment/roles/kive_server/templates/kive_purge.conf.j2
@@ -8,7 +8,9 @@ KIVE_SERVER_EMAIL={{ kive_server_email | quote }}
 KIVE_ADMINS={{ kive_admins | quote }}
 KIVE_SUBJECT_PREFIX={{ kive_subject_prefix | quote }}
 
-# Set these in /root/ansible-rundir/env_vars.yml if you don't like the defaults in settings.py
+# The KIVE_PURGE_START, KIVE_PURGE_STOP, and KIVE_LOG_LEVEL variables
+# can be set in Ansible prior to deployment if you don't like the defaults
+# in settings.py.
 # KIVE_PURGE_START=20GB
 # KIVE_PURGE_STOP=15GB
 # KIVE_PURGE_DATASET_AGING=1.0
@@ -16,15 +18,15 @@ KIVE_SUBJECT_PREFIX={{ kive_subject_prefix | quote }}
 # KIVE_PURGE_CONTAINER_AGING=10.0
 # KIVE_PURGE_WAIT='0 days, 1:00:00'
 # KIVE_PURGE_BATCH_SIZE=100
-# KIVE_LOG_LEVEL=WARN
+# KIVE_LOG_LEVEL=WARNING
 {% if kive_purge_start is defined %}
-KIVE_PURGE_START={{kive_purge_start}}
+KIVE_PURGE_START={{ kive_purge_start }}
 {% endif %}
 {% if kive_purge_stop is defined %}
-KIVE_PURGE_STOP={{kive_purge_stop}}
+KIVE_PURGE_STOP={{ kive_purge_stop }}
 {% endif %}
 {% if kive_log_level is defined %}
-KIVE_LOG_LEVEL={{kive_log_level}}
+KIVE_LOG_LEVEL={{ kive_log_level }}
 {% endif %}
 
 # KIVE_LOG is set separately for each service in the .service files.
diff --git a/roles/kive_server/templates/rsnapshot.conf.j2 b/cluster-setup/deployment/roles/kive_server/templates/rsnapshot.conf.j2
similarity index 100%
rename from roles/kive_server/templates/rsnapshot.conf.j2
rename to cluster-setup/deployment/roles/kive_server/templates/rsnapshot.conf.j2
diff --git a/roles/kive_server/templates/rsnapshot_alpha.service.j2 b/cluster-setup/deployment/roles/kive_server/templates/rsnapshot_alpha.service.j2
similarity index 92%
rename from roles/kive_server/templates/rsnapshot_alpha.service.j2
rename to cluster-setup/deployment/roles/kive_server/templates/rsnapshot_alpha.service.j2
index 4158512e4..fa2bfaee9 100644
--- a/roles/kive_server/templates/rsnapshot_alpha.service.j2
+++ b/cluster-setup/deployment/roles/kive_server/templates/rsnapshot_alpha.service.j2
@@ -8,4 +8,4 @@ ExecStart=/opt/venv_kive/bin/python /opt/crontab_mail.py \
     --subject {{ (kive_subject_prefix + "rsnapshot alpha") | quote }} \
     --from {{ kive_server_email | quote }} \
     {{ mail_admins_emails | join(',') | quote }} \
-    /bin/rsnapshot alpha
+    /usr/bin/rsnapshot alpha
diff --git a/roles/kive_server/templates/rsnapshot_beta.service.j2 b/cluster-setup/deployment/roles/kive_server/templates/rsnapshot_beta.service.j2
similarity index 92%
rename from roles/kive_server/templates/rsnapshot_beta.service.j2
rename to cluster-setup/deployment/roles/kive_server/templates/rsnapshot_beta.service.j2
index ed3f0f081..eab8867ec 100644
--- a/roles/kive_server/templates/rsnapshot_beta.service.j2
+++ b/cluster-setup/deployment/roles/kive_server/templates/rsnapshot_beta.service.j2
@@ -8,4 +8,4 @@ ExecStart=/opt/venv_kive/bin/python /opt/crontab_mail.py \
     --subject {{ (kive_subject_prefix + "rsnapshot beta") | quote }} \
     --from {{ kive_server_email | quote }} \
     {{ mail_admins_emails | join(',') | quote }} \
-    /bin/rsnapshot beta
+    /usr/bin/rsnapshot beta
diff --git a/roles/kive_server/templates/rsnapshot_gamma.service.j2 b/cluster-setup/deployment/roles/kive_server/templates/rsnapshot_gamma.service.j2
similarity index 92%
rename from roles/kive_server/templates/rsnapshot_gamma.service.j2
rename to cluster-setup/deployment/roles/kive_server/templates/rsnapshot_gamma.service.j2
index 3a9b3e90e..7f905c815 100644
--- a/roles/kive_server/templates/rsnapshot_gamma.service.j2
+++ b/cluster-setup/deployment/roles/kive_server/templates/rsnapshot_gamma.service.j2
@@ -8,4 +8,4 @@ ExecStart=/opt/venv_kive/bin/python /opt/crontab_mail.py \
     --subject {{ (kive_subject_prefix + "rsnapshot gamma") | quote }} \
     --from {{ kive_server_email | quote }} \
     {{ mail_admins_emails | join(',') | quote }} \
-    /bin/rsnapshot gamma
+    /usr/bin/rsnapshot gamma
diff --git a/cluster-setup/deployment/roles/mount_network_drives/defaults/main.yml b/cluster-setup/deployment/roles/mount_network_drives/defaults/main.yml
new file mode 100644
index 000000000..e717a8a4e
--- /dev/null
+++ b/cluster-setup/deployment/roles/mount_network_drives/defaults/main.yml
@@ -0,0 +1,6 @@
+---
+
+cifs_credentials: /opt/smbcredentials
+cifs_credentials_volume: /opt
+mount_point: /media/macdatafile
+network_share: //localhost/macdatafile  # this won't work, you have to customize this
diff --git a/cluster-setup/deployment/roles/mount_network_drives/tasks/main.yml b/cluster-setup/deployment/roles/mount_network_drives/tasks/main.yml
new file mode 100644
index 000000000..2dc84c253
--- /dev/null
+++ b/cluster-setup/deployment/roles/mount_network_drives/tasks/main.yml
@@ -0,0 +1,19 @@
+---
+
+- name: set mount options for the network drive
+  set_fact:
+    mount_options: "credentials={{ cifs_credentials }},x-systemd.requires-mounts-for={{ cifs_credentials_volume }},noperm,file_mode=0777,dir_mode=0777"
+
+- name: set read-only if configured
+  when: read_only | bool
+  set_fact:
+    mount_options: "{{ mount_options }},ro"
+
+- name: mount macdatafile with the appropriate options
+  become: true
+  mount:
+    path: "{{ mount_point }}"
+    src: "{{ network_share }}"
+    opts: "{{ mount_options }}"
+    fstype: cifs
+    state: mounted
diff --git a/roles/munge_node/README.md b/cluster-setup/deployment/roles/munge_node/README.md
similarity index 100%
rename from roles/munge_node/README.md
rename to cluster-setup/deployment/roles/munge_node/README.md
diff --git a/roles/munge_node/files/munge-test.key b/cluster-setup/deployment/roles/munge_node/files/munge-test.key
similarity index 100%
rename from roles/munge_node/files/munge-test.key
rename to cluster-setup/deployment/roles/munge_node/files/munge-test.key
diff --git a/roles/munge_node/tasks/main.yml b/cluster-setup/deployment/roles/munge_node/tasks/main.yml
similarity index 87%
rename from roles/munge_node/tasks/main.yml
rename to cluster-setup/deployment/roles/munge_node/tasks/main.yml
index 2dfdd84cf..e69c8efe3 100644
--- a/roles/munge_node/tasks/main.yml
+++ b/cluster-setup/deployment/roles/munge_node/tasks/main.yml
@@ -2,10 +2,10 @@
   become: true
   block:
     - name: install munge
-      dnf:
+      apt:
         name:
           - munge
-          - munge-libs
+          - libmunge2
         state: present
     - name: deploy munge testing key
       copy:
@@ -17,5 +17,5 @@
     - name: start munge service
       systemd:
         name: munge
-        state: started
+        state: restarted
         enabled: true
diff --git a/roles/singularity_node/README.md b/cluster-setup/deployment/roles/singularity_node/README.md
similarity index 100%
rename from roles/singularity_node/README.md
rename to cluster-setup/deployment/roles/singularity_node/README.md
diff --git a/cluster-setup/deployment/roles/singularity_node/tasks/main.yml b/cluster-setup/deployment/roles/singularity_node/tasks/main.yml
new file mode 100644
index 000000000..d32abed87
--- /dev/null
+++ b/cluster-setup/deployment/roles/singularity_node/tasks/main.yml
@@ -0,0 +1,9 @@
+---
+
+# This role installs Singularity from the released .deb file.
+
+- name: install singularity
+  become: true
+  apt:
+    deb: https://github.com/sylabs/singularity/releases/download/v3.11.4/singularity-ce_3.11.4-jammy_amd64.deb
+    state: present
diff --git a/cluster-setup/deployment/roles/slurm_builder/README.md b/cluster-setup/deployment/roles/slurm_builder/README.md
new file mode 100644
index 000000000..dc4d0a025
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_builder/README.md
@@ -0,0 +1,12 @@
+This role fetches the Slurm source code and builds
+Slurm.  It depends on the [slurm dependencies] role to install the dependencies
+needed to build Slurm.
+
+It's used by the [slurm controller] role to build Slurm and place it on /usr/local,
+which is shared via NFS with the worker nodes.  The [slurm node] role fails if 
+`/usr/local/lib/systemd/system/slurmd.service` isn't present (it should be if you've 
+run this role on the head node).
+
+[slurm node]: ../slurm_node
+[slurm controller]: ../slurm_controller
+[slurm dependencies]: ../slurm_dependencies
diff --git a/cluster-setup/deployment/roles/slurm_builder/meta/main.yml b/cluster-setup/deployment/roles/slurm_builder/meta/main.yml
new file mode 100644
index 000000000..0fda72054
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_builder/meta/main.yml
@@ -0,0 +1,4 @@
+---
+
+dependencies:
+  - slurm_dependencies
diff --git a/cluster-setup/deployment/roles/slurm_builder/tasks/main.yml b/cluster-setup/deployment/roles/slurm_builder/tasks/main.yml
new file mode 100644
index 000000000..d6b0ad4dd
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_builder/tasks/main.yml
@@ -0,0 +1,58 @@
+---
+
+# Installing packages used in the building of Slurm, as per
+# https://slurm.schedmd.com/quickstart_admin.html
+# Certain packages need to be installed when Slurm is compiled to enable
+# support for certain features.  In the first play we indicate with
+# comments which packages are used for which features.
+
+- name: check if slurm source files are already downloaded
+  stat:
+    path: "/usr/local/src/{{ slurm_tarball }}"
+  register: slurm_download
+
+- name: fetch slurm source files
+  become: true
+  get_url:
+    url: "{{ slurm_source_url }}"
+    dest: "/usr/local/src/{{ slurm_tarball }}"
+    checksum: "sha1:{{ slurm_sha1_checksum }}"
+  when: not slurm_download.stat.exists
+
+- name: decompress Slurm tarball
+  become: true
+  unarchive:
+    remote_src: true
+    src: "/usr/local/src/{{ slurm_tarball }}"
+    dest: "/usr/local/src"
+    owner: root
+    group: root
+
+- name: make a link to the Slurm source code directory
+  become: true
+  file:
+    src: "/usr/local/src/{{ slurm_src_basename }}"
+    dest: "/usr/local/src/slurm"
+    state: link
+
+- name: configure Slurm build
+  become: true
+  command:
+    argv:
+      - "/usr/local/src/slurm/configure"
+      - "--sysconfdir=/usr/local/etc/slurm"
+      - "--with-systemdsystemunitdir=/usr/local/lib/systemd/system"
+    chdir: "/usr/local/src/slurm"
+    creates: "/usr/local/src/slurm/Makefile"
+
+- name: build and install Slurm
+  become: true
+  make:
+    chdir: "/usr/local/src/slurm"
+    target: install
+
+- name: make Slurm libraries accessible to the system
+  become: true
+  command:
+    cmd: "ldconfig -n /usr/local/lib"
+
diff --git a/cluster-setup/deployment/roles/slurm_configuration/README.md b/cluster-setup/deployment/roles/slurm_configuration/README.md
new file mode 100644
index 000000000..f647bc908
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_configuration/README.md
@@ -0,0 +1,2 @@
+This role sets up the `slurm` user on a host along with some system directories
+required for `slurmd` or `slurmctld` (if they aren't already set up).
diff --git a/cluster-setup/deployment/roles/slurm_configuration/tasks/main.yml b/cluster-setup/deployment/roles/slurm_configuration/tasks/main.yml
new file mode 100644
index 000000000..3555c4c67
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_configuration/tasks/main.yml
@@ -0,0 +1,22 @@
+---
+
+- name: create slurm user on all hosts
+  become: true
+  user:
+    name: slurm
+    system: yes
+    create_home: no
+    uid: 9634
+  register: slurm_user
+
+- name: create directories used by both slurmd and slurmctld
+  become: true
+  loop:
+    - /usr/local/etc/slurm
+    - /var/log/slurm
+  file:
+    path: "{{ item }}"
+    owner: slurm
+    group: slurm
+    mode: '0755'
+    state: directory
diff --git a/cluster-setup/deployment/roles/slurm_controller/README.md b/cluster-setup/deployment/roles/slurm_controller/README.md
new file mode 100644
index 000000000..91c8ecce3
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_controller/README.md
@@ -0,0 +1,21 @@
+This role sets up `slurmctld` on the node it runs on.
+
+Note that this does *not* set up `slurmd`; a node that should function
+as a Slurm compute node should also run the [slurm node] role to set up `slurmd`.
+
+Like the [slurm node] role, this node depends on:
+- the [munge node] role to set up the MUNGE authentication service;
+- the [slurm dependencies] role to install slurmctld's dependencies; and
+- the [slurm configuration] role to create the `slurm` user and system directories
+  used by slurmctld.
+
+[slurm node]: ../slurm_node
+
+To set up the Slurm controller and database daemons, it will:
+
+- install and configure a MariaDB server;
+- deploy required configuration files (including those needed for `slurmd`); and
+- spin up `slurmctld`.
+
+Note that the config files deployed by this role are the ones required for
+`slurmd`, and in our cluster compute nodes will use these files via NFS mounts.
diff --git a/cluster-setup/deployment/roles/slurm_controller/defaults/main.yml b/cluster-setup/deployment/roles/slurm_controller/defaults/main.yml
new file mode 100644
index 000000000..68610077f
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_controller/defaults/main.yml
@@ -0,0 +1,12 @@
+---
+
+slurmctlnode: head
+slurm_nodes:
+  - name: head
+    memory: 4000
+    cpus: 2
+    sockets: 2
+  - name: worker
+    memory: 4000
+    cpus: 2
+    sockets: 2
diff --git a/roles/slurm_node/files/cgroup.conf b/cluster-setup/deployment/roles/slurm_controller/files/cgroup.conf
similarity index 69%
rename from roles/slurm_node/files/cgroup.conf
rename to cluster-setup/deployment/roles/slurm_controller/files/cgroup.conf
index 4b8e9192b..2b79b7410 100644
--- a/roles/slurm_node/files/cgroup.conf
+++ b/cluster-setup/deployment/roles/slurm_controller/files/cgroup.conf
@@ -5,6 +5,9 @@
 # See man slurm.conf and man cgroup.conf for further
 # information on cgroup configuration parameters
 #--
-CgroupAutomount=no
+CgroupAutomount=yes
+# CgroupPlugin="cgroup/v2"
+ConstrainCores=yes
+ConstrainDevices=yes
 ConstrainRAMSpace=yes
 ConstrainSwapSpace=yes
diff --git a/roles/slurm_controller/files/slurmdbd.conf b/cluster-setup/deployment/roles/slurm_controller/files/slurmdbd.conf
similarity index 96%
rename from roles/slurm_controller/files/slurmdbd.conf
rename to cluster-setup/deployment/roles/slurm_controller/files/slurmdbd.conf
index b2c402d9c..1722ca565 100644
--- a/roles/slurm_controller/files/slurmdbd.conf
+++ b/cluster-setup/deployment/roles/slurm_controller/files/slurmdbd.conf
@@ -31,7 +31,7 @@ SlurmUser=slurm
 DebugLevel=4
 #DefaultQOS=normal,standby
 LogFile=/var/log/slurm/slurmdbd.log
-PidFile=/var/run/slurm/slurmdbd.pid
+PidFile=/var/run/slurmdbd.pid
 #PluginDir=/usr/lib/slurm
 #PrivateData=accounts,users,usage,jobs
 #TrackWCKey=yes
diff --git a/roles/slurm_node/handlers/main.yml b/cluster-setup/deployment/roles/slurm_controller/handlers/main.yml
similarity index 78%
rename from roles/slurm_node/handlers/main.yml
rename to cluster-setup/deployment/roles/slurm_controller/handlers/main.yml
index bb5a524e8..5db8706ab 100644
--- a/roles/slurm_node/handlers/main.yml
+++ b/cluster-setup/deployment/roles/slurm_controller/handlers/main.yml
@@ -1,3 +1,4 @@
+# FIXME we may not need this
 - name: reconfigure slurm
   become: true
   become_user: root
diff --git a/cluster-setup/deployment/roles/slurm_controller/meta/main.yml b/cluster-setup/deployment/roles/slurm_controller/meta/main.yml
new file mode 100644
index 000000000..e8045539d
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_controller/meta/main.yml
@@ -0,0 +1,6 @@
+---
+
+dependencies:
+  - munge_node
+  - slurm_dependencies
+  - slurm_configuration
diff --git a/cluster-setup/deployment/roles/slurm_controller/tasks/main.yml b/cluster-setup/deployment/roles/slurm_controller/tasks/main.yml
new file mode 100644
index 000000000..c90b2cc22
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_controller/tasks/main.yml
@@ -0,0 +1,100 @@
+---
+
+- name: install and start mariadb
+  become: true
+  become_user: root
+  tags: slurmdb
+  block:
+    - name: install mariadb
+      apt:
+        name:
+          - mariadb-server
+          - libmariadb-dev
+        state: present
+    - name: start mariadb service
+      systemd:
+        name: mariadb
+        state: started
+        enabled: true
+    - name: create slurm database user
+      tags: slurmdb
+      block:
+        - mysql_db:
+            name: slurm_acct_db
+            login_unix_socket: /var/run/mysqld/mysqld.sock
+            check_implicit_admin: true
+            config_file: ''
+            state: present
+        - mysql_user:
+            name: slurm
+            login_unix_socket: /var/run/mysqld/mysqld.sock
+            check_implicit_admin: true
+            config_file: ''
+            priv: "slurm_acct_db.*:all"
+
+- name: add slurmctld config files
+  become: true
+  block:
+    - name: copy cgroup config file
+      notify: reconfigure slurm
+      copy:
+        src: cgroup.conf
+        dest: /usr/local/etc/slurm/
+        owner: slurm
+        group: slurm
+        mode: "644"
+    - name: generate and copy slurm config file
+      notify: reconfigure slurm
+      template:
+        src: slurm.conf.j2
+        dest: /usr/local/etc/slurm/slurm.conf
+        owner: slurm
+        group: slurm
+        mode: "644"
+
+- name: copy slurmdbd configuration
+  become: true
+  copy:
+    src: "slurmdbd.conf"
+    dest: /usr/local/etc/slurm/
+    owner: slurm
+    group: slurm
+    mode: "600"
+
+- name: create directory for slurmctld spooling
+  become: true
+  file:
+    path: /var/spool/slurmctld
+    owner: slurm
+    group: slurm
+    mode: '0755'
+    state: directory
+
+- name: enable Slurm head-node-only services
+  become: true
+  block:
+    - name: check if the slurmctld service is in place
+      stat: path=/usr/local/lib/systemd/system/slurmctld.service
+      register: slurmctld_service
+
+    - name: fail if the slurmctld service isn't there
+      fail:
+        msg: "slurmctld service is not installed."
+      when: not slurmctld_service.stat.exists
+
+    - name: check if the slurmdbd service is in place
+      stat: path=/usr/local/lib/systemd/system/slurmdbd.service
+      register: slurmdbd_service
+
+    - name: fail if the slurmdbd service isn't there
+      fail:
+        msg: "slurmdbd service is not installed."
+      when: not slurmdbd_service.stat.exists
+
+    - loop:
+      - slurmdbd
+      - slurmctld
+      systemd:
+        name: "{{ item }}"
+        state: started
+        enabled: true
diff --git a/roles/slurm_node/templates/slurm.conf.j2 b/cluster-setup/deployment/roles/slurm_controller/templates/slurm.conf.j2
similarity index 72%
rename from roles/slurm_node/templates/slurm.conf.j2
rename to cluster-setup/deployment/roles/slurm_controller/templates/slurm.conf.j2
index 23847c750..3c731b7bc 100644
--- a/roles/slurm_node/templates/slurm.conf.j2
+++ b/cluster-setup/deployment/roles/slurm_controller/templates/slurm.conf.j2
@@ -1,34 +1,38 @@
-ControlMachine={{slurmctlnode}}
-#ControlAddr=
-#BackupController=
-#BackupAddr=
+# slurm.conf file generated by https://slurm.schedmd.com/configurator.html
+# and manually edited to work as an Ansible template.
+
+# This file is generated by Ansible. If you edit it by hand your
+# changes may be overwritten next time this cluster's playbook is run.
+#
+# To change the settings in this file, see the template at
+#
+#     roles/slurm_node/templates/slurm.conf.j2
 #
-AuthType=auth/munge
-CacheGroups=0
-#CheckpointType=checkpoint/none
-CryptoType=crypto/munge
+# To change the list of nodes, edit the `slurm_nodes` setting in this cluster's
+# environment variables file.
+
+ClusterName=kivecluster
+SlurmctldHost={{ slurmctlnode }}
+
 #DisableRootJobs=NO
 #EnforcePartLimits=NO
 #Epilog=
 #EpilogSlurmctld=
 #FirstJobId=1
-#MaxJobId=999999
+#MaxJobId=67043328
 #GresTypes=
 #GroupUpdateForce=0
 #GroupUpdateTime=600
-#JobCheckpointDir=/var/slurm/checkpoint
-#JobCredentialPrivateKey=
-#JobCredentialPublicCertificate=
 #JobFileAppend=0
 #JobRequeue=1
-#JobSubmitPlugins=1
+#JobSubmitPlugins=lua
 #KillOnBadExit=0
 #LaunchType=launch/slurm
 #Licenses=foo*4,bar
 #MailProg=/bin/mail
-#MaxJobCount=5000
+#MaxJobCount=10000
 #MaxStepCount=40000
-#MaxTasksPerNode=128
+#MaxTasksPerNode=512
 MpiDefault=none
 #MpiParams=ports=#-#
 #PluginDir=
@@ -43,21 +47,19 @@ ProctrackType=proctrack/cgroup
 #PropagateResourceLimitsExcept=
 #RebootProgram=
 ReturnToService=1
-#SallocDefaultCommand=
-SlurmctldPidFile=/var/run/slurm/slurmctld.pid
+SlurmctldPidFile=/var/run/slurmctld.pid
 SlurmctldPort=6817
-SlurmdPidFile=/var/run/slurm/slurmd.pid
+SlurmdPidFile=/var/run/slurmd.pid
 SlurmdPort=6818
-SlurmdSpoolDir=/var/lib/slurm/slurmd
+SlurmdSpoolDir=/var/spool/slurmd
 SlurmUser=slurm
 #SlurmdUser=root
 #SrunEpilog=
 #SrunProlog=
-StateSaveLocation=/var/lib/slurm/slurmctld
+StateSaveLocation=/var/spool/slurmctld
 SwitchType=switch/none
 #TaskEpilog=
 TaskPlugin=task/cgroup
-#TaskPluginParam=
 #TaskProlog=
 #TopologyPlugin=topology/tree
 #TmpFS=/tmp
@@ -90,11 +92,8 @@ Waittime=0
 # SCHEDULING
 #DefMemPerCPU=0
 #MaxMemPerCPU=0
-#SchedulerRootFilter=1
 #SchedulerTimeSlice=30
 SchedulerType=sched/builtin
-SchedulerPort=7321
-#SelectType=select/linear
 SelectType=select/cons_res
 SelectTypeParameters=CR_CPU_Memory
 #
@@ -116,17 +115,15 @@ SelectTypeParameters=CR_CPU_Memory
 #
 # LOGGING AND ACCOUNTING
 #AccountingStorageEnforce=0
-AccountingStorageHost={{slurmctlnode}}
-#AccountingStorageLoc=/var/log/slurm/accounting
+AccountingStorageHost={{ slurmctlnode }}
 #AccountingStoragePass=
 AccountingStoragePort=6819
 AccountingStorageType=accounting_storage/slurmdbd
 #AccountingStorageUser=
-AccountingStoreJobComment=YES
-ClusterName=kivetestcluster
-#DebugFlags=
+AccountingStoreFlags=job_comment
 #JobCompHost=
 JobCompLoc=/var/log/slurm/job_completions
+#JobCompParams=
 #JobCompPass=
 #JobCompPort=
 JobCompType=jobcomp/filetxt
@@ -134,12 +131,13 @@ JobCompType=jobcomp/filetxt
 #JobContainerType=job_container/none
 JobAcctGatherFrequency=30
 JobAcctGatherType=jobacct_gather/linux
-SlurmctldDebug=3
+SlurmctldDebug=info
 SlurmctldLogFile=/var/log/slurm/slurmctld.log
-SlurmdDebug=3
+SlurmdDebug=info
 SlurmdLogFile=/var/log/slurm/slurmd.log
 #SlurmSchedLogFile=
 #SlurmSchedLogLevel=
+#DebugFlags=
 #
 #
 # POWER SAVE SUPPORT FOR IDLE NODES (optional)
@@ -153,20 +151,10 @@ SlurmdLogFile=/var/log/slurm/slurmd.log
 #SuspendRate=
 #SuspendTime=
 #
-
-# NOTE(nknight): This file is generated by Ansible. If you edit it by hand your
-# changes may be overwritten next time this cluster's playbook is run. 
 #
-# To change the settings in this file, see the template at
-#
-#     roles/slurm_node/templates/slurm.conf.j2
-#
-# To change the list of nodes, edit the `slurm_nodes` setting in this cluster's
-# environment variables file.
-
 # COMPUTE NODES
 {% for node in slurm_nodes %}
-NodeName={{ node.name }} CPUs={{ node.cpus | default('1') }} Sockets={{ node.sockets | default('1') }} CoresPerSocket={{ node.cores_per_socket | default('1') }} ThreadsPerCore={{ node.threads_per_core | default('1') }} RealMemory={{ node.memory }} State=UNKNOWN
+NodeName={{ node.name }} CPUs={{ node.cpus | default('1') }} RealMemory={{ node.memory }} Sockets={{ node.sockets | default('1') }} CoresPerSocket={{ node.cores_per_socket | default('1') }} ThreadsPerCore={{ node.threads_per_core | default('1') }} State=UNKNOWN
 {% endfor %}
 
 PartitionName=debug       Priority=3500 Nodes=ALL Default=YES MaxTime=INFINITE State=UP
diff --git a/cluster-setup/deployment/roles/slurm_dependencies/README.md b/cluster-setup/deployment/roles/slurm_dependencies/README.md
new file mode 100644
index 000000000..a4ddae8fa
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_dependencies/README.md
@@ -0,0 +1 @@
+This role installs system dependencies for Slurm using apt.
diff --git a/cluster-setup/deployment/roles/slurm_dependencies/tasks/main.yml b/cluster-setup/deployment/roles/slurm_dependencies/tasks/main.yml
new file mode 100644
index 000000000..bfc2d8667
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_dependencies/tasks/main.yml
@@ -0,0 +1,56 @@
+---
+
+# Installing packages used in the building and running of Slurm, as per
+# https://slurm.schedmd.com/quickstart_admin.html
+# Certain packages need to be installed when Slurm is compiled to enable
+# support for certain features.  In the first play we indicate with
+# comments which packages are used for which features.
+
+- name: install Slurm dependencies
+  become: true
+  apt:
+    name:
+      # cgroups:
+      - libdbus-1-dev
+      - hwloc
+      - libhwloc-dev
+      # AMD GPU support:
+      # FIXME this does not compile correctly; can we find a proper dev package?
+      - rocm-device-libs
+      # HDF5 job profiling:
+      - libhdf5-dev
+      # To generate HTML man pages:
+      - man2html
+      # InfiniBand accounting:
+      - libibmad-dev
+      - libibumad-dev
+      # Intel GPU support:
+      - libvpl-dev
+      # IPMI energy consumption:
+      # FIXME this does not compile correctly; are all the required headers in place?
+      - libfreeipmi-dev
+      # lua support:
+      - liblua5.4-dev
+      # MUNGE support:
+      - libmunge-dev
+      # MariaDB support:
+      - libmariadb-dev
+      # NUMA affinity:
+      - libnuma-dev
+      # NVIDIA GPU support:
+      - libnvidia-ml-dev
+      # PAM support:
+      - libpam0g-dev
+      # PMIx support:
+      - libpmix-dev
+      # Readline support:
+      - libreadline-dev
+      # REST API:
+      - libhttp-parser-dev
+      - libjson-c-dev
+      - libyaml-dev
+      - libjwt-dev
+      # RRD external sensor data collection:
+      - librrd-dev
+      # sview:
+      - libgtk2.0-dev
diff --git a/cluster-setup/deployment/roles/slurm_node/README.md b/cluster-setup/deployment/roles/slurm_node/README.md
new file mode 100644
index 000000000..f46cc6860
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_node/README.md
@@ -0,0 +1,13 @@
+This role confirms that configuration files required by slurmd are in place
+and then spins up the slurmd service.  It depends on:
+- the [munge node] role to set up the MUNGE authentication service;
+- the [slurm dependencies] role to install slurmd's dependencies; and
+- the [slurm configuration] role to create the `slurm` user and system directories
+  used by slurmd.
+
+In a typical cluster configuration, the configuration files required will be mounted
+via NFS, so we don't actually install them in this role or in the dependencies.
+
+[munge node]: ../munge_node
+[slurm dependencies]: ../slurm_dependencies
+[slurm configuration]: ../slurm_configuration
diff --git a/cluster-setup/deployment/roles/slurm_node/meta/main.yml b/cluster-setup/deployment/roles/slurm_node/meta/main.yml
new file mode 100644
index 000000000..e8045539d
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_node/meta/main.yml
@@ -0,0 +1,6 @@
+---
+
+dependencies:
+  - munge_node
+  - slurm_dependencies
+  - slurm_configuration
diff --git a/cluster-setup/deployment/roles/slurm_node/tasks/main.yml b/cluster-setup/deployment/roles/slurm_node/tasks/main.yml
new file mode 100644
index 000000000..c938d9453
--- /dev/null
+++ b/cluster-setup/deployment/roles/slurm_node/tasks/main.yml
@@ -0,0 +1,45 @@
+---
+
+- name: check if Slurm is ready to go
+  block:
+    - name: check if the Slurm systemd service exists
+      stat: path=/usr/local/lib/systemd/system/slurmd.service
+      register: slurmd_service
+
+    - name: fail if the systemd service isn't there
+      fail:
+        msg: "slurmd systemd service is not in place."
+      when: not slurmd_service.stat.exists
+
+    - name: check if slurm.conf exists
+      stat: path=/usr/local/etc/slurm/slurm.conf
+      register: slurm_conf
+
+    - name: fail if the conf file isn't there
+      fail:
+        msg: "slurm.conf is not in place."
+      when: not slurm_conf.stat.exists
+
+    - name: check if cgroup.conf exists
+      stat: path=/usr/local/etc/slurm/cgroup.conf
+      register: cgroup_conf
+
+    - name: fail if cgroup.conf file isn't there
+      fail:
+        msg: "cgroup.conf is not in place."
+      when: not cgroup_conf.stat.exists
+
+- name: create directory for Slurm spooling
+  become: true
+  file:
+    path: /var/spool/slurmd
+    owner: slurm
+    group: slurm
+    mode: '0755'
+    state: directory
+
+- name: enable slurmd service
+  systemd:
+    name: slurmd
+    state: started
+    enabled: true
diff --git a/cluster-setup/deployment/roles/worker_node_networking/README.md b/cluster-setup/deployment/roles/worker_node_networking/README.md
new file mode 100644
index 000000000..cb4677d30
--- /dev/null
+++ b/cluster-setup/deployment/roles/worker_node_networking/README.md
@@ -0,0 +1,5 @@
+This role sets up the networking infrastructure used by worker nodes, such as:
+- the NFS client is installed and NFS volumes are mounted;
+- ports are opened for slurmd and ssh;
+- the original `/home` directory is moved aside so that `/data/home` on the head node
+  will be used as the home directory.
diff --git a/cluster-setup/deployment/roles/worker_node_networking/tasks/main.yml b/cluster-setup/deployment/roles/worker_node_networking/tasks/main.yml
new file mode 100644
index 000000000..2802588a9
--- /dev/null
+++ b/cluster-setup/deployment/roles/worker_node_networking/tasks/main.yml
@@ -0,0 +1,107 @@
+---
+
+- name: set timezone
+  block:
+    - name: change the timezone
+      community.general.timezone:
+        name: America/Vancouver
+    - name: restart cron to reflect the new timezone
+      systemd:
+        name: cron
+        state: restarted
+
+- name: install NFS client software
+  become: true
+  apt:
+    name:
+      - nfs-common
+    state: present
+
+- name: ensure ufw is running
+  become: true
+  systemd:
+    name: ufw
+    state: started
+    enabled: true
+
+- name: open port for SSH access
+  become: true
+  community.general.ufw:
+    rule: allow
+    port: ssh
+    protocol: tcp
+
+# Originally this task opened ports:
+# - 6817-6819/tcp
+# - 6817-6819/udp
+# - 7321/tcp
+- name: open port for slurmctld to communicate with slurmd
+  become: true
+  community.general.ufw:
+    rule: allow
+    port: 6818
+    protocol: tcp
+
+- name: mount /data
+  become: true
+  block:
+    - name: create the mount point
+      file:
+        path: /data
+        state: directory
+    - name: mount the drive
+      ansible.posix.mount:
+        path: /data
+        src: "{{ head_internal_address }}:/data"
+        fstype: nfs
+        state: mounted
+
+- name: mount /opt
+  become: true
+  ansible.posix.mount:
+    path: /opt
+    src: "{{ head_internal_address }}:/opt"
+    fstype: nfs
+    state: mounted
+    opts: ro
+
+- name: mount /usr/local
+  become: true
+  ansible.posix.mount:
+    path: /usr/local
+    src: "{{ head_internal_address }}:/usr/local"
+    fstype: nfs
+    state: mounted
+    opts: ro
+
+- name: use /data/home as the home directory
+  block:
+    - name: check if /data/home exists
+      stat: path=/data/home
+      register: data_home
+
+    - name: fail if /data/home isn't prepared
+      fail:
+        msg: "/data/home is not in place."
+      when: not data_home.stat.exists
+
+  rescue:
+    - name: inform user to set up the head node first and propagate failure to stop the playbook
+      fail:
+        msg: "Before setting up this node, make sure the head node is configured first!"
+
+- name: check if original /home has been renamed
+  stat: path=/original_home
+  register: home_backed_up
+
+- name: rename original /home
+  become: true
+  command: mv /home /original_home
+  when: not home_backed_up.stat.exists
+
+- name: symbolic link for /home
+  become: true
+  file:
+    path: /home
+    src: /data/home
+    state: link
diff --git a/cluster-setup/deployment/set_locale_to_canada.yml b/cluster-setup/deployment/set_locale_to_canada.yml
new file mode 100644
index 000000000..4d58a5f5f
--- /dev/null
+++ b/cluster-setup/deployment/set_locale_to_canada.yml
@@ -0,0 +1,14 @@
+---
+
+- name: change the default locale to Canada English
+  hosts: all
+  tasks:
+    - name: make the locale available
+      become: true
+      community.general.locale_gen:
+        name: "en_CA.UTF-8"
+        state: present
+
+    - name: set the default locale
+      become: true
+      command: update-locale LANG=en_CA.UTF-8
diff --git a/cluster-setup/deployment/slurm_setup.yml b/cluster-setup/deployment/slurm_setup.yml
new file mode 100644
index 000000000..da41d1c66
--- /dev/null
+++ b/cluster-setup/deployment/slurm_setup.yml
@@ -0,0 +1,30 @@
+---
+
+- name: configure head node
+  hosts: head
+  tasks:
+    - name: set up head node networking
+      include_role:
+        name: head_node_networking
+    - name: build Slurm
+      include_role:
+        name: slurm_builder
+    - name: configure and start slurmctld and supporting services
+      include_role:
+        name: slurm_controller
+    - name: configure and start slurmd
+      include_role:
+        name: slurm_node
+
+- name: configure workers
+  hosts: workers
+  tasks:
+    - name: synchronize users and groups from the head node
+      include_role:
+        name: copy_users_and_groups
+    - name: set up worker node networking
+      include_role:
+        name: worker_node_networking
+    - name: configure and start slurmd
+      include_role:
+        name: slurm_node
diff --git a/cluster-setup/deployment/templates/cifs_credentials.j2 b/cluster-setup/deployment/templates/cifs_credentials.j2
new file mode 100644
index 000000000..2cb02ed30
--- /dev/null
+++ b/cluster-setup/deployment/templates/cifs_credentials.j2
@@ -0,0 +1,3 @@
+username={{ cifs_username }}
+password={{ cifs_password }}
+domain={{ cifs_domain }}
diff --git a/cluster-setup/export_users_and_groups.py b/cluster-setup/export_users_and_groups.py
new file mode 100644
index 000000000..f44a63168
--- /dev/null
+++ b/cluster-setup/export_users_and_groups.py
@@ -0,0 +1,294 @@
+#! /usr/bin/env python
+
+import csv
+from typing import Optional, Iterable, Mapping, TypedDict
+from collections.abc import Container
+from dataclasses import dataclass, field, asdict
+from io import TextIOBase
+
+import argparse
+import yaml
+
+
+@dataclass
+class ShadowEntry:
+    name: str
+    hashed_password: str
+    last_changed: Optional[int]
+    min: Optional[int]
+    max: Optional[int]
+    warn: Optional[int]
+    inactive: Optional[int]
+    expire: Optional[int]
+
+def int_or_none(possible_int_string: str) -> Optional[int]:
+    try:
+        return int(possible_int_string)
+    except ValueError:
+        return None
+
+def parse_shadow(shadow_file: TextIOBase) -> dict[str, ShadowEntry]:
+    shadow_csv = csv.reader(shadow_file, delimiter=":")
+    shadow_entries: dict[str, ShadowEntry] = {}
+    for row in shadow_csv:
+        name: str = row[0]
+        shadow_entries[name] = ShadowEntry(
+            name=name,
+            hashed_password=row[1],
+            last_changed=int_or_none(row[2]),
+            min=int_or_none(row[3]),
+            max=int_or_none(row[4]),
+            warn=int_or_none(row[5]),
+            inactive=int_or_none(row[6]),
+            expire=int_or_none(row[7]),
+        )
+    return shadow_entries
+
+@dataclass
+class PasswdEntry:
+    name: str
+    passwdx: str
+    uid: int
+    gid: int
+    info: str
+    home: str
+    shell: str
+
+def parse_passwd(passwd_file: TextIOBase) -> dict[str, PasswdEntry]:
+    passwd_csv = csv.reader(passwd_file, delimiter=":")
+    passwd_entries: dict[str, PasswdEntry] = {}
+    for row in passwd_csv:
+        name: str = row[0]
+        passwd_entries[name] = PasswdEntry(
+            name=name,
+            passwdx=row[1],
+            uid=int(row[2]),
+            gid=int(row[3]),
+            info=row[4],
+            home=row[5],
+            shell=row[6],
+        )
+    return passwd_entries
+
+@dataclass
+class GroupEntry:
+    name: str
+    passwdx: str
+    gid: int
+    users: list[str]
+
+def parse_group(group_file: TextIOBase) -> dict[int, GroupEntry]:
+    group_csv = csv.reader(group_file, delimiter=":")
+    group_entries: dict[GroupEntry] = {}
+    for row in group_csv:
+        gid: int = int(row[2])
+        group_entries[gid] = GroupEntry(
+            name=row[0],
+            passwdx=row[1],
+            gid=gid,
+            users=row[3].split(","),
+        )
+    return group_entries
+
+def get_other_groups_by_user(
+    users_to_export: Iterable[str],
+    groups_to_export: Container[str],
+    passwd_entries: dict[int, PasswdEntry],
+    group_entries: dict[int, GroupEntry],
+    old_sudo: Optional[str],
+    new_sudo: Optional[str],
+) -> dict[str, list[str]]:
+    """
+    Assemble a mapping of username to the (non-primary) groups that this user belongs to.
+    """
+    if old_sudo is not None:
+        assert new_sudo is not None, "Either both old and new sudo group names must be specified or neither"
+
+    groups_by_user: dict[str, list[str]] = {}
+    for user in users_to_export:
+        groups_by_user[user] = []
+
+    for gid, group_entry in group_entries.items():
+        group_name: str = group_entry.name
+
+        if old_sudo is not None and group_name == old_sudo:
+            for user in group_entry.users:
+                if user in groups_by_user:
+                    groups_by_user[user].append(new_sudo)
+
+        elif group_name in groups_to_export:
+            for user in group_entry.users:
+                if user in groups_by_user:
+                    passwd_entry: PasswdEntry = passwd_entries[user]
+                    if gid != passwd_entry.gid:  # check if this is the primary group
+                        groups_by_user[user].append(group_name)
+
+    return groups_by_user
+
+
+@dataclass
+class User:
+    name: str
+    hashed_password: str
+    uid: int
+    home: str
+    primary_group: str
+    groups: list[str] = field(default_factory=list)
+
+def create_user(
+    name: str,
+    passwd_entries: dict[str, PasswdEntry],
+    shadow_entries: dict[str, ShadowEntry],
+    group_entries: dict[str, GroupEntry],
+    groups_by_user: dict[str, list[str]],
+) -> User:
+    passwd_entry: PasswdEntry = passwd_entries[name]
+    return User(
+        name=name,
+        hashed_password=shadow_entries[name].hashed_password,
+        uid=passwd_entry.uid,
+        home=passwd_entry.home,
+        primary_group=group_entries[passwd_entry.gid].name,
+        groups=groups_by_user[name]
+    )
+
+
+def get_user_primary_groups(
+    users: Iterable[User],
+    group_entries: dict[int, GroupEntry],
+) -> dict[str, GroupEntry]:
+    primary_groups: dict[str, GroupEntry] = {}
+    groups_by_name: dict[str, GroupEntry] = {}
+    for group_entry in group_entries.values():
+        groups_by_name[group_entry.name] = group_entry
+    for user in users:
+        primary_groups[user.name] = groups_by_name[user.primary_group]
+    return primary_groups
+
+
+@dataclass
+class ExportedUsersAndGroups:
+    users: list[User]
+    primary_groups: list[GroupEntry]
+    other_groups: list[GroupEntry]
+
+
+def exported_users_and_groups(
+    users_to_export: Iterable[str],
+    groups_to_export: Container[str],
+    passwd_entries: Mapping[str, PasswdEntry],
+    shadow_entries: Mapping[str, ShadowEntry],
+    group_entries: Mapping[int, GroupEntry],
+    old_sudo: Optional[str],
+    new_sudo: Optional[str],
+) -> ExportedUsersAndGroups:
+
+    other_groups_by_user: dict[str, list[str]] = get_other_groups_by_user(
+        users_to_export,
+        groups_to_export,
+        passwd_entries,
+        group_entries,
+        old_sudo,
+        new_sudo,
+    )
+
+    users: dict[str, User] = {}
+    for username in users_to_export:
+        users[username] = create_user(
+            username,
+            passwd_entries,
+            shadow_entries,
+            group_entries,
+            other_groups_by_user,
+        )
+
+    primary_groups: dict[int, GroupEntry] = get_user_primary_groups(
+        users.values(),
+        group_entries,
+    )
+    other_groups: dict[int, GroupEntry] = {}
+    for gid, group_entry in group_entries.items():
+        if gid in primary_groups or group_entry.name not in groups_to_export:
+            continue
+        other_groups[gid] = group_entry
+
+    return ExportedUsersAndGroups(
+        list(users.values()),
+        list(primary_groups.values()),
+        list(other_groups.values()),
+    )
+
+
+class SudoGroup(TypedDict):
+    old: Optional[str] = None
+    new: Optional[str] = None
+
+def main():
+    parser = argparse.ArgumentParser(
+        "Collate user and group information for recreating them on a new server"
+    )
+    parser.add_argument(
+        "--passwd",
+        help="The passwd file (as it appears in /etc/passwd on the original server)",
+        default="/etc/passwd",
+    )
+    parser.add_argument(
+        "--shadow",
+        help="The shadow file (as it appears in /etc/shadow on the original server)",
+        default="/etc/shadow",
+    )
+    parser.add_argument(
+        "--group",
+        help="The group file (as it appears in /etc/group on the original server)",
+        default="/etc/group",
+    )
+    parser.add_argument(
+        "--out",
+        help="File to write the output YAML to (default out.yaml)",
+        default="out.yaml",
+    )
+    parser.add_argument(
+        "users_and_groups",
+        help="YAML file with `users` (list of usernames to export) and `groups` (list of group names to export)",
+    )
+    args = parser.parse_args()
+
+    with open(args.users_and_groups, "r") as f:
+        users_and_groups = yaml.safe_load(f)
+
+    users_to_export: list[str] = users_and_groups["users"]
+    groups_to_export: list[str] = users_and_groups["groups"]
+    sudo_group: SudoGroup = SudoGroup()
+    if users_and_groups.get("sudo_group") is not None:
+        sudo_group["old"] = users_and_groups["sudo_group"]["old"]
+        sudo_group["new"] = users_and_groups["sudo_group"]["new"]
+
+    with open(args.passwd, "r") as f:
+        passwd_entries: dict[str, PasswdEntry] = parse_passwd(f)
+
+    with open(args.shadow, "r") as f:
+        shadow_entries: dict[str, ShadowEntry] = parse_shadow(f)
+
+    with open(args.group, "r") as f:
+        group_entries: dict[str, GroupEntry] = parse_group(f)
+
+    for_export: ExportedUsersAndGroups = exported_users_and_groups(
+        users_to_export,
+        groups_to_export,
+        passwd_entries=passwd_entries,
+        shadow_entries=shadow_entries,
+        group_entries=group_entries,
+        old_sudo=sudo_group["old"],
+        new_sudo=sudo_group["new"],
+    )
+    serialized = {
+        "users": [asdict(x) for x in for_export.users],
+        "primary_groups": [asdict(x) for x in for_export.primary_groups],
+        "other_groups": [asdict(x) for x in for_export.other_groups],
+    }
+    with open(args.out, "w") as f:
+        yaml.dump(serialized, f)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cluster-setup/initialization/head/cluster_hosts b/cluster-setup/initialization/head/cluster_hosts
new file mode 100644
index 000000000..f5e637fd7
--- /dev/null
+++ b/cluster-setup/initialization/head/cluster_hosts
@@ -0,0 +1,14 @@
+192.168.69.179  bulbasaur bulby
+192.168.69.86   octomore octy
+
+192.168.1.1 octomore head
+192.168.1.2 b01
+192.168.1.3 b02
+192.168.1.4 b03
+192.168.1.5 b04
+192.168.1.6 b05
+192.168.1.7 b06
+192.168.1.8 b07a
+192.168.1.9 b07b
+192.168.1.10    b08a
+192.168.1.11    b08b
diff --git a/cluster-setup/initialization/head/create_head_user_data.py b/cluster-setup/initialization/head/create_head_user_data.py
new file mode 100644
index 000000000..9c15619ed
--- /dev/null
+++ b/cluster-setup/initialization/head/create_head_user_data.py
@@ -0,0 +1,40 @@
+#! /usr/bin/env python
+
+import argparse
+import textwrap
+
+import yaml
+
+
+def main():
+    parser = argparse.ArgumentParser("Create cloud-init user-data for the head node")
+    parser.add_argument(
+        "--template",
+        help="Template file to insert the host mappings into",
+        default="user-data.template",
+    )
+    parser.add_argument(
+        "--output",
+        help="File to write the resulting user-data file to",
+        default="user-data",
+    )
+    parser.add_argument(
+        "host_mapping_yaml",
+        help="YAML file containing the compute node details in `compute_nodes`",
+    )
+    args = parser.parse_args()
+
+    with open(args.host_mapping_yaml, "r") as f:
+        host_mappings = yaml.safe_load(f)["compute_nodes"]
+
+    host_mapping_str: str = "\n".join(
+        [f'{hm["name"]}\t{hm["ip"]}' for hm in host_mappings]
+    )
+    host_mapping_str = textwrap.indent(host_mapping_str, "      ")
+    with open(args.template, "r") as template:
+        with open(args.output, "w") as output:
+            output.write(template.read().format(host_mappings=host_mapping_str))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cluster-setup/initialization/head/head_configuration.bash b/cluster-setup/initialization/head/head_configuration.bash
new file mode 100644
index 000000000..21cf04e66
--- /dev/null
+++ b/cluster-setup/initialization/head/head_configuration.bash
@@ -0,0 +1,12 @@
+#! /usr/bin/bash
+
+# Run this as root on a vanilla installation of Jammy.
+
+apt update -y
+apt upgrade -y
+apt install -y python3 python3-pip
+
+python3 -m pip install -r requirements.txt
+ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -N ""
+cat /root/.ssh/id_ed25519.pub >> /root/.ssh/authorized_keys
+cat cluster_hosts >> /etc/hosts
diff --git a/cluster-setup/requirements.txt b/cluster-setup/initialization/head/requirements.txt
similarity index 100%
rename from cluster-setup/requirements.txt
rename to cluster-setup/initialization/head/requirements.txt
diff --git a/cluster-setup/initialization/head/user-data.template b/cluster-setup/initialization/head/user-data.template
new file mode 100644
index 000000000..0a3d5f727
--- /dev/null
+++ b/cluster-setup/initialization/head/user-data.template
@@ -0,0 +1,25 @@
+#cloud-config
+
+package_update: true
+
+packages:
+  - python3
+  - python3-pip
+
+write_files:
+  - content: |
+      ansible==8.0.0
+      PyMySQL==1.0.3
+      psycopg2-binary==2.9.6
+    path: /usr/local/src/requirements.txt
+    owner: root
+    permissions: '0644'
+  - content: |
+{host_mappings}
+    path: /etc/hosts
+    append: true
+
+runcmd:
+  - [python3, -m, pip, install, -r, /usr/local/src/requirements.txt]
+  - [sudo, ssh-keygen, -t, ed25519, -f, /root/.ssh/id_ed25519, -N, ""]
+  - "sudo cat /root/.ssh/id_ed25519.pub >> /root/.ssh/authorized_keys"
diff --git a/cluster-setup/initialization/worker/cluster_hosts_bulbasaur b/cluster-setup/initialization/worker/cluster_hosts_bulbasaur
new file mode 100644
index 000000000..aa7ce4293
--- /dev/null
+++ b/cluster-setup/initialization/worker/cluster_hosts_bulbasaur
@@ -0,0 +1,14 @@
+192.168.69.179  bulbasaur bulby
+192.168.69.86   octomore octy
+
+192.168.1.1 bulbasaur head
+192.168.1.2 b01
+192.168.1.3 b02
+192.168.1.4 b03
+192.168.1.5 b04
+192.168.1.6 b05
+192.168.1.7 b06
+192.168.1.8 b07a
+192.168.1.9 b07b
+192.168.1.10    b08a
+192.168.1.11    b08b
diff --git a/cluster-setup/initialization/worker/cluster_hosts_octomore b/cluster-setup/initialization/worker/cluster_hosts_octomore
new file mode 100644
index 000000000..f5e637fd7
--- /dev/null
+++ b/cluster-setup/initialization/worker/cluster_hosts_octomore
@@ -0,0 +1,14 @@
+192.168.69.179  bulbasaur bulby
+192.168.69.86   octomore octy
+
+192.168.1.1 octomore head
+192.168.1.2 b01
+192.168.1.3 b02
+192.168.1.4 b03
+192.168.1.5 b04
+192.168.1.6 b05
+192.168.1.7 b06
+192.168.1.8 b07a
+192.168.1.9 b07b
+192.168.1.10    b08a
+192.168.1.11    b08b
diff --git a/cluster-setup/initialization/worker/create_worker_user_data.py b/cluster-setup/initialization/worker/create_worker_user_data.py
new file mode 100644
index 000000000..398689832
--- /dev/null
+++ b/cluster-setup/initialization/worker/create_worker_user_data.py
@@ -0,0 +1,33 @@
+#! /usr/bin/env python
+
+import argparse
+
+
+def main():
+    parser = argparse.ArgumentParser("Create cloud-init user-data.template for the worker nodes")
+    parser.add_argument(
+        "--template",
+        help="Template file to insert the root SSH public key into",
+        default="user-data.template",
+    )
+    parser.add_argument(
+        "--output",
+        help="File to write the resulting user-data.template file to",
+        default="user-data",
+    )
+    parser.add_argument(
+        "ssh_public_key",
+        help="SSH public key file to insert into the template"
+    )
+    args = parser.parse_args()
+
+    with open(args.ssh_public_key, "r") as f:
+        ssh_key: str = f.read().strip()
+
+    with open(args.template, "r") as template:
+        with open(args.output, "w") as output:
+            output.write(template.read().format(root_ssh_public_key=ssh_key))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cluster-setup/initialization/worker/user-data.template b/cluster-setup/initialization/worker/user-data.template
new file mode 100644
index 000000000..03dba0edf
--- /dev/null
+++ b/cluster-setup/initialization/worker/user-data.template
@@ -0,0 +1,11 @@
+#cloud-config
+
+package_update: true
+
+users:
+  - name: root
+    ssh_authorized_keys:
+      - {root_ssh_public_key}
+
+packages:
+  - python3
diff --git a/cluster-setup/initialization/worker/worker_configuration.bash b/cluster-setup/initialization/worker/worker_configuration.bash
new file mode 100644
index 000000000..6277cc959
--- /dev/null
+++ b/cluster-setup/initialization/worker/worker_configuration.bash
@@ -0,0 +1,10 @@
+#! /usr/bin/bash
+
+# Run this as root on a vanilla installation of Jammy on the compute nodes.
+
+apt update -y
+apt upgrade -y
+apt install -y python3
+
+cat head_node_root_id_ed25519.pub >> /root/.ssh/authorized_keys
+cat cluster_hosts >> /etc/hosts
diff --git a/cluster-setup/setup_ssh_access.bash b/cluster-setup/setup_ssh_access.bash
new file mode 100644
index 000000000..ee514aeec
--- /dev/null
+++ b/cluster-setup/setup_ssh_access.bash
@@ -0,0 +1,6 @@
+#! /usr/bin/env bash
+
+# Run this as root to set up passwordless SSH access.
+
+cat /vagrant/setupfiles/vagrant_testkey.pub >> /root/.ssh/authorized_keys
+chmod 600 /root/.ssh/authorized_keys
diff --git a/cluster-setup/setup_ssh_keys.bash b/cluster-setup/setup_ssh_keys.bash
new file mode 100644
index 000000000..9f9b1c1da
--- /dev/null
+++ b/cluster-setup/setup_ssh_keys.bash
@@ -0,0 +1,17 @@
+#! /usr/bin/env bash
+
+# Run this as root (using sudo) to install our "stock" SSH keys.
+if [ -f /root/.ssh/id_ed25519 ]
+then
+  cp /root/.ssh/id_ed25519 /root/.ssh/id_ed25519.bak
+fi
+
+if [ -f /root/.ssh/id_ed25519.pub ]
+then
+  cp /root/.ssh/id_ed25519.pub /root/.ssh/id_ed25519.pub.bak
+fi
+
+cp /vagrant/setupfiles/vagrant_testkey /root/.ssh/id_ed25519
+cp /vagrant/setupfiles/vagrant_testkey.pub /root/.ssh/id_ed25519.pub
+chmod 600 /root/.ssh/id_ed25519
+chmod 644 /root/.ssh/id_ed25519.pub
diff --git a/cluster-setup/setupfiles/install-ansible.sh b/cluster-setup/setupfiles/install-ansible.sh
index b005716e3..8080985f3 100644
--- a/cluster-setup/setupfiles/install-ansible.sh
+++ b/cluster-setup/setupfiles/install-ansible.sh
@@ -2,12 +2,14 @@
 set -eu -o pipefail
 IFS=$'\t\n'
 
-# Enable extra repositories
-dnf install -q -y epel-release
-dnf config-manager --set-enabled PowerTools
+# # Enable extra repositories
+# dnf install -q -y epel-release
+# dnf config-manager --set-enabled PowerTools
 
 # Install Python3
-dnf install -q -y python3
+# dnf install -q -y python3
+apt update
+apt install -y python3 python3-pip
 
 # Install Python packages
 python3 -m pip install -r /vagrant/requirements.txt
\ No newline at end of file
diff --git a/cluster-setup/testenv/ansible.cfg b/cluster-setup/testenv/ansible.cfg
deleted file mode 100644
index 4a56a6b93..000000000
--- a/cluster-setup/testenv/ansible.cfg
+++ /dev/null
@@ -1,7 +0,0 @@
-# Ansible configuration for the test environment.
-# See the following for available sections and keys:
-# https://docs.ansible.com/ansible/latest/reference_appendices/config.html
-
-[defaults]
-inventory = ./inventory.ini
-interpreter_python = /usr/bin/python3.6
\ No newline at end of file
diff --git a/cluster-setup/testenv/inventory.ini b/cluster-setup/testenv/inventory.ini
deleted file mode 100644
index e38bf32f7..000000000
--- a/cluster-setup/testenv/inventory.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-# Documentation on this file:
-# https://docs.ansible.com/ansible/latest/user_guide/intro_inventory.html#adding-variables-to-inventory
-
-head
-
-[workers]
-worker
diff --git a/cluster-setup/testenv/kive_dev_vars.yml b/cluster-setup/testenv/kive_dev_vars.yml
deleted file mode 100644
index b8e0761ad..000000000
--- a/cluster-setup/testenv/kive_dev_vars.yml
+++ /dev/null
@@ -1,26 +0,0 @@
----
-# Variables needed to set up Kive.
-kive_allowed_hosts: "[\"*\"]"
-kive_listen_port: 8080
-update_kive_source: yes
-
-# The following are sensitive, and should be kept secret for a production system.
-kive_db_password: fixme-14mPdzu5vTOQG2DgtDG1inghQpMX0TBdUqEK6nVNHVo
-kive_server_secret_key: fixme-kpXk1iKLbHn6-T7zieLHgADFA8ZSh5itd8k_Sp932fM
-
-# The following are defaults, and probably don't need to be changed.
-# - DJango app settings
-kive_venv: /opt/venv_kive
-kive_slurm_path: "{{ kive_venv }}/bin"
-kive_db_name: kive
-kive_db_user: kive
-kive_db_host: head
-kive_media_root: /data/kive/media_root
-kive_static_root: /var/www/html/kive/static
-kive_root: /usr/local/share/Kive
-# - httpd configuration
-kive_httpd_user: kive
-kive_httpd_group: kive
-# - package variables
-slurmbuilddir: "/root"
-
diff --git a/kive/kive/settings.py b/kive/kive/settings.py
index 9aa6dc8f9..21962a0ea 100644
--- a/kive/kive/settings.py
+++ b/kive/kive/settings.py
@@ -212,7 +212,7 @@
     LOG_HANDLER_NAMES.append('console')
 if ADMINS:
     LOG_HANDLER_NAMES.append('mail_admins')
-LOG_LEVEL = os.environ.get('KIVE_LOG_LEVEL', 'WARN')
+LOG_LEVEL = os.environ.get('KIVE_LOG_LEVEL', 'WARNING')
 
 # See http://docs.djangoproject.com/en/dev/topics/logging for
 # more details on how to customize your logging configuration.
diff --git a/roles/kive_node/tasks/main.yml b/roles/kive_node/tasks/main.yml
deleted file mode 100644
index b06806060..000000000
--- a/roles/kive_node/tasks/main.yml
+++ /dev/null
@@ -1,130 +0,0 @@
----
-
-- name: create kive user
-  become: true
-  user:
-    name: kive
-    system: yes
-    uid: 762  # random uid in system uid range (200, 999); hard-coded for consistency across hosts
-
-
-# NOTE(nknight): this is done with `file` instead of during user creation so that we
-# can set the permissions explicitly.
-- name: create kive home directory
-  file:
-    path: /home/kive/
-    state: directory
-    mode: "go-rx"
-    group: kive
-    owner: kive
-
-
-- name: create kive app directories
-  become: true
-  loop:
-    - /etc/kive/
-    - /var/kive/
-    - /var/log/kive/
-    - "{{ kive_media_root }}"
-  file:
-    path: "{{ item }}"
-    state: directory
-    mode: "2770"
-    owner: kive
-    group: kive
-
-
-- name: kive environment configuration
-  become: true
-  become_user: kive
-  block:
-    - name: set kive environment variables
-      blockinfile:
-        path: /home/kive/.bash_profile
-        block: |
-          export KIVE_DB_NAME={{ kive_db_name }}
-          export KIVE_DB_USER={{ kive_db_user }}
-          export KIVE_DB_HOST={{ kive_db_host }}
-          export KIVE_DB_PASSWORD={{ kive_db_password }}
-
-          export KIVE_MEDIA_ROOT={{ kive_media_root }}
-          export KIVE_STATIC_ROOT={{ kive_static_root }}
-          export KIVE_SLURM_PATH={{ kive_slurm_path }}
-        create: true  # create the file if it doesn't exist
-        backup: true
-        owner: kive
-        group: kive
-
-
-- name: fetch kive source code
-  become: true
-  git:
-    dest: "{{ kive_root }}"
-    repo: https://github.com/cfe-lab/Kive.git
-    version: "{{ kive_version | default('master') }}"
-    update: "{{ update_kive_source | default('no') }}"
-
-
-- name: kive package dependencies
-  become: true
-  dnf:
-    name:
-      - platform-python-devel
-      - sqlite-devel
-      - words
-      - lsof
-      - graphviz
-      - graphviz-devel
-
-
-- name: install kive python dependencies
-  become: true
-  block:
-    - name: create directory for virtualenv
-      file:
-        path: "{{ kive_venv }}"
-        state: directory
-    - name: copy requirements file to track changes
-      register: kive_requirements
-      copy:
-        dest: "{{ kive_venv }}/requirements.txt"
-        src: "{{ kive_root }}/requirements.txt"
-    - name: kive python dependencies
-      when: kive_requirements.changed
-      pip:
-        requirements: "{{ kive_root }}/requirements.txt"
-        virtualenv: "{{ kive_venv }}"
-
-
-- name: install postgres database libraries
-  become: true
-  block:
-    - name: add postgresql GPG key
-      rpm_key:
-        state: present
-        key: https://download.postgresql.org/pub/repos/yum/RPM-GPG-KEY-PGDG
-    - name: add postgresql package repository
-      dnf:
-        name: https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm
-    - name: check if postgres is disabled
-      register: is_builtin_postgresql_disabled
-      # Check mode means this never writes the line, just checks if it's there.
-      check_mode: yes
-      lineinfile:
-        name: /etc/dnf/modules.d/postgresql.module
-        line: state=disabled
-    - name: disable built-in postgres module
-      when: (is_builtin_postgresql_disabled is changed) or (is_builtin_postgresql_disabled is failed)
-      command:
-        cmd: dnf -qy module disable postgresql
-        warn: false  # dnf module doesn't include this sub-command, so have to use command directly
-    - name: install client libraries
-      dnf:
-        update_cache: true
-        name: postgresql12
-
-- name: configure mail service for error logging
-  systemd:
-    name: postfix
-    state: started
-    enabled: true
\ No newline at end of file
diff --git a/roles/kive_server/files/001-kive.conf b/roles/kive_server/files/001-kive.conf
deleted file mode 100644
index aa8bbca78..000000000
--- a/roles/kive_server/files/001-kive.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-WSGIScriptAlias / /usr/local/share/Kive/kive/kive/wsgi.py
-WSGIPythonPath /usr/local/share/Kive/kive:/opt/venv_kive/lib/python3.6/site-packages
-
-<Directory /usr/local/share/Kive/kive/kive>
-<Files wsgi.py>
-Require all granted
-</Files>
-</Directory>
-
-Alias /static/ /var/www/html/kive/static/
-
-<Directory /var/www/html/kive/static>
-Order deny,allow
-Allow from all
-</Directory>
diff --git a/roles/singularity_node/tasks/main.yml b/roles/singularity_node/tasks/main.yml
deleted file mode 100644
index 01db83ad4..000000000
--- a/roles/singularity_node/tasks/main.yml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-
-# This role installs Singularity from dnf.
-
-- name: install singularity
-  become: true
-  dnf:
-    name: singularity-3.7.1
-    state: present
diff --git a/roles/slurm_controller/README.md b/roles/slurm_controller/README.md
deleted file mode 100644
index b22837c1c..000000000
--- a/roles/slurm_controller/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-This role sets up a Slurm controller node. It builds on the [slurm node]
-role.
-
-[slurm node]: ../slurm_node
-
-To set up the slurm controller and database daemons, it will:
-
-- Install and configure a MariaDB server
-- Deploy additional configuration files
-- Install the Slurm controller components
diff --git a/roles/slurm_controller/meta/main.yml b/roles/slurm_controller/meta/main.yml
deleted file mode 100644
index 4375d8034..000000000
--- a/roles/slurm_controller/meta/main.yml
+++ /dev/null
@@ -1,4 +0,0 @@
----
-dependencies:
-  - role: slurm_rpms
-  - role: slurm_node
\ No newline at end of file
diff --git a/roles/slurm_controller/tasks/main.yml b/roles/slurm_controller/tasks/main.yml
deleted file mode 100644
index 7c8970fdd..000000000
--- a/roles/slurm_controller/tasks/main.yml
+++ /dev/null
@@ -1,103 +0,0 @@
----
-
-- name: NFS exports file
-  register: nfs_exports_file
-  blockinfile:
-    path: /etc/exports
-    block: |
-      /data              192.168.1.0/255.255.255.0(rw,sync,no_all_squash,no_root_squash)
-      /usr/local         192.168.1.0/255.255.255.0(ro,sync,no_root_squash)
-      /opt               192.168.1.0/255.255.255.0(ro,sync,no_root_squash)
-- name: reload NFS exports
-  when: nfs_exports_file.changed
-  command: exportfs -r
-- name: start NFS service
-  systemd:
-    name: nfs-server
-    state: started
-    enabled: true
-
-- name: install and start mariadb
-  become: true
-  become_user: root
-  tags: slurmdb
-  block:
-    - name: install mariadb
-      dnf:
-        name:
-          - mariadb-server
-          - mariadb-devel
-        state: present
-    - name: start mariadb service
-      systemd:
-        name: mariadb
-        state: started
-        enabled: true
-    - name: create slurm database user
-      tags: slurmdb
-      block:
-        - mysql_db:
-            name: slurm_acct_db
-        - mysql_user:
-            name: slurm
-            priv: "slurm_acct_db.*:all"
-
-
-- name: copy slurmdbd configuration
-  become: true
-  copy:
-    src: "slurmdbd.conf"
-    dest: /etc/slurm/
-    owner: slurm
-    group: slurm
-    mode: "644"
-
-
-- name: install slurm
-  become: true
-  block:
-    - name: install slurm runtime requirements
-      dnf:
-        name:
-          - hwloc
-          - libibmad
-          - libibumad
-          - lua
-          - man2html
-          - numactl
-          - openssl
-          - pam-devel
-          - perl-devel
-          - rpm-build
-          - rrdtool-devel
-    - name: install slurm from rpm files
-      dnf:
-        name:
-          - "{{ slurmbuilddir }}/rpmbuild/RPMS/x86_64/slurm-20.02.2-1.el8.x86_64.rpm"
-          - "{{ slurmbuilddir }}/rpmbuild/RPMS/x86_64/slurm-example-configs-20.02.2-1.el8.x86_64.rpm"
-          - "{{ slurmbuilddir }}/rpmbuild/RPMS/x86_64/slurm-slurmctld-20.02.2-1.el8.x86_64.rpm"
-          - "{{ slurmbuilddir }}/rpmbuild/RPMS/x86_64/slurm-slurmdbd-20.02.2-1.el8.x86_64.rpm"
-    - name: configure slurm tmpfiles
-      copy:
-        content: "d /var/run/slurm 0755 slurm slurm"
-        dest: /usr/lib/tmpfiles.d/slurm.conf
-    - block:
-        - name: fix slurmdbd pidfile path in systemd unit
-          replace:
-            path: /usr/lib/systemd/system/slurmdbd.service
-            regexp: /var/run/slurmdbd.pid
-            replace: /var/run/slurm/slurmdbd.pid
-        - name: fix slurmctld pidfile path in systemd unit
-          replace:
-            path: /usr/lib/systemd/system/slurmctld.service
-            regexp: /var/run/slurmctld.pid
-            replace: /var/run/slurm/slurmctld.pid
-    - name: enable slurm services
-      loop:
-        - slurmdbd
-        - slurmctld
-      systemd:
-        daemon_reload: true  # necessary because we've edited the unit file
-        name: "{{ item }}"
-        state: started
-        enabled: true
diff --git a/roles/slurm_node/README.md b/roles/slurm_node/README.md
deleted file mode 100644
index d6c0bfdb7..000000000
--- a/roles/slurm_node/README.md
+++ /dev/null
@@ -1,14 +0,0 @@
-This role creates users and deploys files that are common to the [Slurm
-Controller] and [Slurm Workers]. It uses the [munge node] role to set up
-the Munge authentication service
-
-[munge node]: ../munge_node
-[Slurm Controller]: ../slurm_controller
-[SLurm Workers]: ../slurm_worker
-
-It will:
-
-- Create a user called `slurm` with a consistent UID
-- Creates the directories that Slurm requires
-- Put copies of the shared configuration files (that `slurmd` and
-  `slurmctld` both use) in the appropriate places
\ No newline at end of file
diff --git a/roles/slurm_node/meta/main.yml b/roles/slurm_node/meta/main.yml
deleted file mode 100644
index f6707d8e4..000000000
--- a/roles/slurm_node/meta/main.yml
+++ /dev/null
@@ -1,4 +0,0 @@
----
-
-dependencies:
-  - munge_node
\ No newline at end of file
diff --git a/roles/slurm_node/tasks/main.yml b/roles/slurm_node/tasks/main.yml
deleted file mode 100644
index e46d6ec74..000000000
--- a/roles/slurm_node/tasks/main.yml
+++ /dev/null
@@ -1,83 +0,0 @@
----
-
-- name: create slurm user
-  become: true
-  user:
-    name: slurm
-    system: yes
-    create_home: no
-    uid: 9634
-
-- name: create slurm data directories
-  become: true
-  loop:
-    - /var/log/slurm
-    - /var/lib/slurm
-    - /etc/slurm
-  file:
-    path: "{{ item }}"
-    state: directory
-    owner: slurm
-    group: slurm
-
-- name: add slurm config files
-  become: true
-  block:
-    - name: copy cgroup config file
-      notify: reconfigure slurm
-      copy:
-        src: cgroup.conf
-        dest: /etc/slurm/
-        owner: slurm
-        group: slurm
-        mode: "644"
-    - name: generate and copy slurm config file
-      notify: reconfigure slurm
-      template:
-        src: slurm.conf.j2
-        dest: /etc/slurm/slurm.conf
-        owner: slurm
-        group: slurm
-        mode: "644"
-
-
-- name: install slurmd
-  become: true
-  block:
-    - name: install slurm from rpm files
-      dnf:
-        disable_gpg_check: yes  # We built these rpm files, so they're unsigned.
-        name:
-          - "{{ slurmbuilddir }}/rpmbuild/RPMS/x86_64/slurm-20.02.2-1.el8.x86_64.rpm"
-          - "{{ slurmbuilddir }}/rpmbuild/RPMS/x86_64/slurm-slurmd-20.02.2-1.el8.x86_64.rpm"
-    - name: fix slurmd pidfile path in systemd unit
-      replace:
-        path: /usr/lib/systemd/system/slurmd.service
-        regexp: /var/run/slurmd.pid
-        replace: /var/run/slurm/slurmd.pid
-    - name: Create /var/run/slurm folder
-      blockinfile:
-        path: /usr/lib/systemd/system/slurmd.service
-        insertafter: \[Service\]
-        block: |
-          RuntimeDirectory=slurm
-
-    - name: enable slurmd service
-      systemd:
-        daemon_reload: true  # necessary because we edited the unit file
-        name: slurmd
-        state: started
-        enabled: true
-
-
-- name: network drive mounts
-  become: true
-  block:
-  - loop: "{{ network_mounts }}"
-    name: Load network drive mounts from env_vars.yml
-    ansible.posix.mount:
-      path: "{{ item.path }}"
-      src: "{{ item.src }}"
-      fstype: "{{ item.fstype }}"
-      state: "{{ item.state }}"
-      opts: "{{ item.opts }}"
diff --git a/roles/slurm_rpms/README.md b/roles/slurm_rpms/README.md
deleted file mode 100644
index 7879401be..000000000
--- a/roles/slurm_rpms/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-This role fetches the Slurm source code, installs its dependencies, and builds
-RPMs that can install the Slurm controller or worker daemons.
-
-It's used by the [slurm node] and [slurm controller] roles to install
-different components of Slurm.
-
-[slurm node]: ../slurm_node
-[slurm controller]: ../slurm_controller
\ No newline at end of file
diff --git a/roles/slurm_rpms/tasks/main.yml b/roles/slurm_rpms/tasks/main.yml
deleted file mode 100644
index 58eb3609c..000000000
--- a/roles/slurm_rpms/tasks/main.yml
+++ /dev/null
@@ -1,49 +0,0 @@
----
-
-- name: build slurm RPM files
-  become: true
-  block:
-    - name: install development tools
-      dnf:
-        name: "@Development Tools"
-    - name: install slurm build requirements
-      dnf:
-        name:
-          - hwloc
-          - hwloc-devel
-          - libibmad
-          - libibumad
-          - lua
-          - lua-devel
-          - man2html
-          - mariadb-server
-          - mariadb-devel
-          - munge-devel
-          - ncurses-devel
-          - numactl
-          - numactl-devel
-          - openssl
-          - openssl-devel
-          - pam-devel
-          - perl-devel
-          - readline-devel
-          - rpm-build
-          - rrdtool-devel
-    - name: create temporary build directory
-      file:
-        path: "{{ slurmbuilddir }}"
-        state: directory
-    - name: check if slurm source files are already downloaded
-      stat:
-        path: "{{ slurmbuilddir }}/slurm-20.02.2.tar.bz2"
-      register: slurm_download
-    - name: fetch slurm source files
-      get_url:
-        url: "https://download.schedmd.com/slurm/slurm-20.02.2.tar.bz2"
-        dest: "{{ slurmbuilddir }}/slurm-20.02.2.tar.bz2"
-      when: not slurm_download.stat.exists
-    - name: build slurm rpm file
-      command:
-        cmd: rpmbuild -ta slurm-20.02.2.tar.bz2
-        chdir: "{{ slurmbuilddir }}"
-        creates: "{{ slurmbuilddir }}/rpmbuild/"
\ No newline at end of file
diff --git a/roles/slurm_worker/README.md b/roles/slurm_worker/README.md
deleted file mode 100644
index 4bf3fd6b7..000000000
--- a/roles/slurm_worker/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-This role installs and enables the Slurm worker daemon, `slurmd`. It uses the
-[slurm node] role to perform the tasks that are common between Slurm node types.
-This role is not needed on the head node.
-
-[slurm node]: ../slurm_rpms
diff --git a/roles/slurm_worker/meta/main.yml b/roles/slurm_worker/meta/main.yml
deleted file mode 100644
index 4375d8034..000000000
--- a/roles/slurm_worker/meta/main.yml
+++ /dev/null
@@ -1,4 +0,0 @@
----
-dependencies:
-  - role: slurm_rpms
-  - role: slurm_node
\ No newline at end of file
diff --git a/roles/slurm_worker/tasks/main.yml b/roles/slurm_worker/tasks/main.yml
deleted file mode 100644
index 2bc199b04..000000000
--- a/roles/slurm_worker/tasks/main.yml
+++ /dev/null
@@ -1,127 +0,0 @@
----
-
-- become: true
-  block:
-    - name: ensure firewalld is running
-      systemd:
-        name: firewalld
-        state: started
-        enabled: true
-    - name: open slurm ports
-      loop:
-        - 6817-6819/tcp
-        - 6817-6819/udp
-        - 7321/tcp
-      ansible.posix.firewalld:
-        port: "{{ item }}"
-        state: enabled
-        permanent: true
-        immediate: true
-    - name: mount /data
-      ansible.posix.mount:
-        path: /data
-        src: "{{kive_db_host}}:/data"
-        fstype: nfs
-        state: mounted
-
-    - name: mount /opt
-      ansible.posix.mount:
-        path: /opt
-        src: "{{kive_db_host}}:/opt"
-        fstype: nfs
-        state: mounted
-        opts: ro
-
-    - name: mount /usr/local
-      ansible.posix.mount:
-        path: /usr/local
-        src: "{{kive_db_host}}:/usr/local"
-        fstype: nfs
-        state: mounted
-        opts: ro
-
-    - name: check if original /home has been renamed
-      stat: path=/original_home
-      register: home_backed_up
-
-    - name: rename original /home
-      command: mv /home /original_home
-      when: not home_backed_up.stat.exists
-
-    - name: symbolic link for /home
-      file:
-        path: /home
-        src: /data/home
-        state: link
-
-    - name: read system users
-      delegate_to: localhost
-      register: user_list
-      community.general.read_csv:
-        path: /etc/passwd
-        delimiter: ":"
-        fieldnames:
-          - name
-          - passwdx
-          - uid
-          - gid
-          - info
-          - home
-          - shell
-    - name: read system groups
-      delegate_to: localhost
-      register: group_list
-      community.general.read_csv:
-        path: /etc/group
-        delimiter: ":"
-        fieldnames:
-          - name
-          - passwdx
-          - gid
-          - users
-    - name: record group members
-      loop: "{{ group_list.list }}"
-      when: item.name in copied_groups
-      set_fact:
-        group_name: "{{ item.name }}"
-        group_members: "{{ item.users.split(',') }}"
-      register: system_groups
-    - name: build user groups
-      with_subelements:
-        - "{{ system_groups.results }}"
-        - ansible_facts.group_members
-      when: item.1 != ''
-      set_fact:
-        user_groups: "{{ user_groups | default({}) | combine({ item.1: [item.0.ansible_facts.group_name] }, list_merge='append') }}"
-    - name: read system passwords
-      delegate_to: localhost
-      register: shadow_dict
-      community.general.read_csv:
-        path: /etc/shadow
-        delimiter: ":"
-        key: name
-        fieldnames:
-          - name
-          - passwd
-          - lastchanged
-          - min
-          - max
-          - warn
-          - inactive
-          - expire
-    - name: copy system groups
-      loop: "{{ group_list.list }}"
-      when: >
-        (item.name in copied_groups) or
-        (item.name in shadow_dict.dict and shadow_dict.dict[item.name]['passwd'].startswith("$"))
-      group:
-        gid: "{{ item.gid }}"
-        name: "{{ item.name }}"
-    - name: copy system users
-      loop: "{{ user_list.list }}"
-      when: shadow_dict.dict[item.name]['passwd'].startswith("$")
-      user:
-        uid: "{{ item.uid }}"
-        create_home: no
-        name: "{{ item.name }}"
-        password: "{{ shadow_dict.dict[item.name]['passwd'] }}"