Skip to content

Commit

Permalink
Further progress: the "kive_setup" playbook runs on Octomore, but Kiv…
Browse files Browse the repository at this point in the history
…e still isn't working.
  • Loading branch information
Richard Liang committed Oct 26, 2023
1 parent 59aa7f0 commit 401b668
Show file tree
Hide file tree
Showing 17 changed files with 131 additions and 71 deletions.
9 changes: 9 additions & 0 deletions cluster-setup/cloud-init/worker/worker_configuration.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#! /usr/bin/bash

# Run this as root on a vanilla installation of Jammy on the compute nodes.

apt update -y
apt upgrade -y
apt install -y python3

cat head_node_root_id_ed25519.pub >> /root/.ssh/authorized_keys
8 changes: 8 additions & 0 deletions cluster-setup/deployment/ansible_dev.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# The main Ansible configuration file. Copy this to "ansible.cfg" and
# fill in the appropriate inventory file to use.
# See the following for available sections and keys:
# https://docs.ansible.com/ansible/latest/reference_appendices/config.html

[defaults]
inventory = ./inventory_dev.ini
interpreter_python = /usr/bin/python3
3 changes: 3 additions & 0 deletions cluster-setup/deployment/ansible_octomore.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[defaults]
inventory = ./inventory_octomore.ini
interpreter_python = /usr/bin/python3
4 changes: 2 additions & 2 deletions cluster-setup/deployment/group_vars/default_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ kive_server_email: [email protected]
kive_admins: "[[\"kive\", \"[email protected]\"]]"
kive_subject_prefix: "Kive server"
kive_backup_path: /data/kive_db_backup
kive_python_package: python3.7
kive_python_executable: python3.7

# Settings for network services running on the head node,
# e.g. firewall, NFS, and PostgreSQL.
kive_internal_interface: 192.168.64.9
kive_external_interface: 192.168.64.9
nfs_export_to_hosts: 192.168.64.0/255.255.255.0
kive_db_host: 192.168.64.9
kive_db_subnet: 192.168.64.0/24
Expand Down
7 changes: 5 additions & 2 deletions cluster-setup/deployment/group_vars/octomore_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ kive_server_email: [email protected]
kive_admins: "[[\"kive\", \"[email protected]\"]]"
kive_subject_prefix: "Kive server"
kive_backup_path: /media/backup/
kive_version: v0.16.2
kive_python_package: python3.7
kive_python_executable: python3.7

# Settings for network services running on the head node,
# e.g. firewall, NFS, and PostgreSQL.
kive_internal_interface: 192.168.1.1
kive_external_interface: 192.168.69.86
nfs_export_to_hosts: 192.168.1.0/255.255.255.0
kive_db_host: 192.168.1.1
kive_db_subnet: 192.168.1.0/24
Expand Down Expand Up @@ -112,5 +113,7 @@ copied_groups:
- kive
- sudo

default_shell: /usr/bin/bash

data_physical_volumes:
- /dev/sdb
18 changes: 9 additions & 9 deletions cluster-setup/deployment/inventory_octomore.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
localhost

[workers]
b01
b02
b03
b04
# b01
# b02
# b03
# b04
b05
b06
b07a
b07b
b08a
b08b
# b06
# b07a
# b07b
# b08a
# b08b
3 changes: 0 additions & 3 deletions cluster-setup/deployment/kive_setup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
- name: configure head node
hosts: head
tasks:
- name: set up head node networking
include_role:
name: head_node_networking
- name: build Slurm
include_role:
name: slurm_builder
Expand Down
6 changes: 3 additions & 3 deletions cluster-setup/deployment/octomore_preliminary_setup.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
vars:
data_physical_volumes:
- /dev/sdb
# roles:
# - create_data_filesystem
# - head_node_internal_interface
roles:
- create_data_filesystem
- head_node_internal_interface
tasks:
- name: create /opt
file:
Expand Down
22 changes: 22 additions & 0 deletions cluster-setup/deployment/reassign_bootstrap_user_uid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---

- name: assign a different UID and GID to the bootstrap user
hosts: all
vars:
user_name: ubuntu
group_name: ubuntu
old_gid: 1000
new_uid: 1020
new_gid: 1020
tasks:
- name: change UID
user:
name: "{{ user_name }}"
uid: "{{ new_uid }}"
- name: change GID
group:
name: "{{ group_name }}"
gid: "{{ new_gid }}"
- name: change group perms of the home folders
become: true
command: find /home/{{ user_name }} -group {{ old_gid }} -exec chgrp -h {{ group_name }} {} \;
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
default-lease-time 600;
max-lease-time 7200;

option domain-name-servers 192.168.168.101;

subnet {{ internal_subnet }} netmask {{ internal_netmask }} {
range {{ internal_dhcp_range[0] }} {{ internal_dhcp_range [1] }};
option routers {{ head_internal_address }};
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@
group: "{{ item.primary_group }}"
groups: "{{ item.groups }}"
append: true
shell: "{{ default_shell }}"
28 changes: 26 additions & 2 deletions cluster-setup/deployment/roles/kive_node/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,28 @@
---

- name: add the deadsnakes PPA to get old versions of Python
become: true
apt_repository:
repo: ppa:deadsnakes/ppa

- name: install the version of Python required by kive
become: true
apt:
name:
- "{{ kive_python_package }}"
- "{{ kive_python_package }}-distutils"
- "{{ kive_python_package }}-venv"

- name: install pip for this version of Python
become: true
shell: "curl https://bootstrap.pypa.io/get-pip.py | sudo {{ kive_python_executable }}"

- name: kive package dependencies
become: true
apt:
name:
- python3-dev
- python3-venv
# - python3-dev
# - python3-venv
- libsqlite3-dev
- wcanadian
- lsof
Expand All @@ -31,9 +48,16 @@
state: started
enabled: true

- name: create kive group
become: true
group:
name: kive
gid: 762 # random gid in system uid range (200, 999); hard-coded for consistency across hosts

- name: create kive user
become: true
user:
name: kive
system: yes
uid: 762 # random uid in system uid range (200, 999); hard-coded for consistency across hosts
group: kive
15 changes: 0 additions & 15 deletions cluster-setup/deployment/roles/kive_server/files/001-kive.conf

This file was deleted.

42 changes: 23 additions & 19 deletions cluster-setup/deployment/roles/kive_server/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
when: kive_requirements.changed
pip:
requirements: "{{ kive_root }}/requirements.txt"
virtualenv_command: "python3 -m venv"
virtualenv_command: "{{ kive_python_executable }} -m venv"
virtualenv: "{{ kive_venv }}"


Expand Down Expand Up @@ -128,9 +128,12 @@
- name: Kive-specific configuration
block:
- name: install Kive-specific config file
copy:
src: 001-kive.conf
dest: /etc/apache2/conf-available
template:
src: 001-kive.conf.j2
dest: /etc/apache2/conf-available/001-kive.conf
owner: root
group: root
mode: "644"
- name: enable Kive-specific configuration
command:
cmd: "a2enconf 001-kive"
Expand All @@ -145,7 +148,7 @@

- name: server environment variables via systemd
blockinfile:
path: /etc/systemd/system/httpd.service.d/override.conf
path: /etc/systemd/system/apache2.service.d/override.conf
create: true
backup: true
mode: "644"
Expand All @@ -161,12 +164,12 @@
export APACHE_RUN_USER=kive
export APACHE_RUN_GROUP=kive
- name: update apache2 port
blockinfile:
path: /etc/apache2/ports.conf
backup: true
block: |
Listen {{ kive_listen_port }}
# - name: update apache2 port
# blockinfile:
# path: /etc/apache2/ports.conf
# backup: true
# block: |
# Listen {{ kive_listen_port }}

- name: set up rotating apache logs
block:
Expand All @@ -186,32 +189,33 @@
- name: enable apache2
systemd:
name: apache2
state: started
state: restarted
enabled: true
daemon_reload: true

- name: ensure ufw (the firewall) is running
systemd:
name: ufw
state: started
state: restarted
enabled: true

# - name: firewall's internal interface should ACCEPT
# register: internal_accept
# community.general.ufw:
# default: allow
# interface: "{{ kive_internal_interface }}"
# interface: "{{ head_internal_interface }}"
# direction: incoming
# state: enabled
#
# - name: reload internal interface firewall config
# when: internal_accept.changed
# community.general.ufw:
# interface: "{{ kive_internal_interface }}"
# interface: "{{ head_internal_interface }}"
# state: reloaded

- name: permit access to Postgres on the internal interface
community.general.ufw:
interface: "{{ kive_internal_interface }}"
interface: "{{ head_internal_interface }}"
direction: in
port: 5432
protocol: tcp
Expand All @@ -220,7 +224,7 @@

- name: permit https service through firewall
community.general.ufw:
interface: "{{ kive_external_interface }}"
interface: "{{ head_external_interface }}"
direction: in
port: https
protocol: tcp
Expand All @@ -229,7 +233,7 @@

- name: permit http service through firewall
community.general.ufw:
interface: "{{ kive_external_interface }}"
interface: "{{ head_external_interface }}"
direction: in
port: http
protocol: tcp
Expand Down Expand Up @@ -355,7 +359,7 @@
chdir: "{{ kive_root }}/api/"
creates: "{{ kive_root }}/api/build/"
- name: collect kive's static files
notify: restart http server
notify: restart web server
environment:
KIVE_STATIC_ROOT: "{{ kive_static_root }}"
community.general.django_manage:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
WSGIScriptAlias / {{ kive_root }}/kive/kive/wsgi.py
WSGIPythonPath {{ kive_root }}/kive:{{ kive_venv }}/lib/{{ kive_python_executable }}/site-packages

<Directory {{ kive_root }}/kive/kive>
<Files wsgi.py>
Require all granted
</Files>
</Directory>

Alias /static {{ kive_static_root }}

<Directory {{ kive_static_root }}>
Order deny,allow
Allow from all
</Directory>
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@
- name: mount the drive
ansible.posix.mount:
path: /data
src: "{{ kive_internal_interface }}:/data"
src: "{{ head_internal_address }}:/data"
fstype: nfs
state: mounted

- name: mount /opt
become: true
ansible.posix.mount:
path: /opt
src: "{{ kive_internal_interface }}:/opt"
src: "{{ head_internal_address }}:/opt"
fstype: nfs
state: mounted
opts: ro
Expand All @@ -59,7 +59,7 @@
become: true
ansible.posix.mount:
path: /usr/local
src: "{{ kive_internal_interface }}:/usr/local"
src: "{{ head_internal_address }}:/usr/local"
fstype: nfs
state: mounted
opts: ro
Expand All @@ -75,15 +75,6 @@
msg: "/data/home is not in place."
when: not data_home.stat.exists

- name: check if SSH configuration is in place
stat: path=/data/home/vagrant/.ssh
register: ssh_configuration

- name: fail if SSH configuration isn't prepared
fail:
msg: "SSH configuration is not present in the shared home directory."
when: not ssh_configuration.stat.exists

rescue:
- name: inform user to set up the head node first and propagate failure to stop the playbook
fail:
Expand Down

0 comments on commit 401b668

Please sign in to comment.