This repository has been archived by the owner on Dec 12, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 41
/
install_everything.yml
223 lines (220 loc) · 8.31 KB
/
install_everything.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
---
# This Playbook sets up Prometheus on an OpenTLC Cluster
- hosts: localhost
gather_facts: false
tasks:
# Find out which nodes are Infranodes and add them to a new group: infranodes
- name: Add Infranodes to a new infranodes Group
add_host:
name: "{{ item }}"
groups: infranodes
with_items: "{{ groups['nodes'] }}"
when:
- item | match("^infranode.*")
- name: Add Masters to a new masters Group
add_host:
name: "{{ item }}"
groups: masters
with_items: "{{ groups['nodes'] }}"
when:
- item | match("^master.*")
- hosts: infranodes[0]
remote_user: ec2_user
become: yes
become_user: root
tasks:
# OpenShift Routers expose /metrics on port 1936. Therefore we need to open
# the port for both future and current sessions so that Prometheus can access
# the router metrics.
# Open Firewall Port 1936 for future sessions by adding the rule to
# the iptables file.
- name: Open Firewall port 1936 for future sessions
lineinfile:
dest: /etc/sysconfig/iptables
insertafter: '-A FORWARD -j REJECT --reject-with icmp-host-prohibited'
line: '-A OS_FIREWALL_ALLOW -p tcp -m state --state NEW -m tcp --dport 1936 -j ACCEPT'
state: present
tags:
- prometheus
# Open Firewall Port 1936 for current session by adding the rule to the
# current iptables configuration. We won't need to restart the iptables
# service - which will ensure all OpenShift rules stay in place.
- name: Open Firewall Port 1936 for current session
iptables:
action: insert
protocol: tcp
destination_port: 1936
state: present
chain: OS_FIREWALL_ALLOW
jump: ACCEPT
tags:
- prometheus
# Create Directory /var/lib/prometheus-data with correct permissions
# Make sure the directory has SELinux Type svirt_sandbox_file_t otherwise
# there is a permissions problem trying to mount it into the pod.
# If there are more than one infranodes this directory will be created on all
# infranodes - but only used on the first one
- name: Create directory /var/lib/prometheus-data
file:
path: /var/lib/prometheus-data
state: directory
group: root
owner: root
mode: 0777
setype: svirt_sandbox_file_t
tags:
- prometheus
# Configure all Nodes (including Infranodes and Masters) for monitoring
- hosts: nodes
remote_user: ec2_user
become: yes
become_user: root
tasks:
# Node Exporters on all Nodes liston on port 9100.
# Open Firewall Port 9100 for future sessions by adding the rule to
# the iptables file.
- name: Open Firewall port 9100 for future sessions
lineinfile:
dest: /etc/sysconfig/iptables
insertafter: '-A FORWARD -j REJECT --reject-with icmp-host-prohibited'
line: '-A OS_FIREWALL_ALLOW -p tcp -m state --state NEW -m tcp --dport 9100 -j ACCEPT'
state: present
tags:
- prometheus
# Open Firewall Port 9100 for current session by adding the rule to the
# current iptables configuration. We won't need to restart the iptables
# service - which will ensure all OpenShift rules stay in place.
- name: Open Firewall Port 9100 for current session
iptables:
action: insert
protocol: tcp
destination_port: 9100
state: present
chain: OS_FIREWALL_ALLOW
jump: ACCEPT
tags:
- prometheus
# The Node Exporter reads information from the Nodes. In addition it can
# read arbitrary information from a (properly formatted) text file.
# We have a shell script that puts information about Docker into a textfile
# to be read by the Node Exporter. Therefore we need to create the directory
# where the text file is to be written.
- name: Create textfile_collector directory
file:
path: /var/lib/node_exporter/textfile_collector
state: directory
owner: root
group: root
mode: 0775
tags:
- prometheus
# Copy the shell script to the nodes to collect docker information and write
# to a text file
- name: Copy dockerinfo to node
get_url:
url: https://raw.githubusercontent.com/wkulhanek/openshift-prometheus/master/node-exporter/dockerinfo/dockerinfo.sh
dest: /usr/local/bin/dockerinfo.sh
owner: root
group: root
mode: 0755
tags:
- prometheus
# Create a cron job to run the dockerinfo shell script periodically.
- name: Copy cron.d/docker_info.cron to node
get_url:
url: https://raw.githubusercontent.com/wkulhanek/openshift-prometheus/master/node-exporter/dockerinfo/dockerinfo.cron
dest: /etc/cron.d/dockerinfo.cron
owner: root
group: root
mode: 0644
tags:
- prometheus
# Restart crond service to pick up new cron job
- name: Restart crond service on node
systemd:
name: crond
state: restarted
tags:
- prometheus
# Finally create all the necessary OpenShift objects. This happens via the
# oc binary on the (first) master host.
- hosts: masters[0]
remote_user: ec2_user
become: yes
become_user: root
vars:
# The Web Hook URL for the Alert Manager to send alerts to Rocket Chat
# The default is #gpte-devops-prometheus channel on the Red Hat Chat instance
webhook_url: https://chat.consulting.redhat.com/hooks/LTtLntjbTBNvij6br/Rfa6WZSANJJBB8QDsu5nhynQ2sJG2wrSL3BLzbxYJqit3EGk
tasks:
# Add label "prometheus-host=true" to the first infranode
- name: Label Infranodes with prometheus-host=true
shell: oc label node {{ groups['infranodes'][0] }} prometheus-host=true --overwrite
tags:
- prometheus
# Check if there is already a prometheus project
- name: Check for prometheus project
command: "oc get project prometheus"
register: prometheus_project_present
ignore_errors: true
# Create the Prometheus Project if it's not there yet
- name: Create Prometheus Project
shell: oc new-project prometheus --display-name="Prometheus Monitoring"
when: prometheus_project_present | failed
tags:
- prometheus
- name: Set Node Selectors to empty on Prometheus Project
shell: oc annotate namespace prometheus openshift.io/node-selector=""
when: prometheus_project_present | failed
tags:
- prometheus
- name: Determine Router Password
shell: oc set env dc router -n default --list|grep STATS_PASSWORD|awk -F"=" '{print $2}'
when: prometheus_project_present | failed
register: router_password
tags:
- prometheus
- name: Deploy Prometheus
shell: oc new-app -f https://raw.githubusercontent.com/wkulhanek/openshift-prometheus/master/prometheus.yaml --param ROUTER_PASSWORD={{ router_password.stdout }}
when: prometheus_project_present | failed
tags:
- prometheus
- name: Grant privileged SCC to Prometheus Service account
shell: oc adm policy add-scc-to-user privileged system:serviceaccount:prometheus:prometheus
when: prometheus_project_present | failed
tags:
- prometheus
- name: Grant privileged SCC to default service account for Node Exporter
shell: oc adm policy add-scc-to-user privileged system:serviceaccount:prometheus:default
when: prometheus_project_present | failed
tags:
- prometheus
- name: Deploy Node Exporter Daemon Set
shell: oc new-app -f https://raw.githubusercontent.com/wkulhanek/openshift-prometheus/master/node-exporter/node-exporter.yaml
when: prometheus_project_present | failed
tags:
- prometheus
- name: Deploy Alertmanager
shell: oc new-app -f https://raw.githubusercontent.com/wkulhanek/openshift-prometheus/master/alertmanager/alertmanager.yaml -p "WEBHOOK_URL={{ webhook_url }}"
when: prometheus_project_present | failed
tags:
- prometheus
- alertmanager
- name: Move Alertmanager to an Infranode
command: "oc patch dc alertmanager --patch '{ \"spec\": { \"template\": { \"spec\": { \"nodeSelector\": { \"env\":\"infra\"}}}}}' "
when: prometheus_project_present | failed
tags:
- prometheus
- alertmanager
- name: Deploy Grafana
shell: oc new-app -f https://raw.githubusercontent.com/wkulhanek/docker-openshift-grafana/master/grafana.yaml
when: prometheus_project_present | failed
tags:
- prometheus
- grafana
- name: Move Grafana to an Infranode
command: "oc patch dc grafana --patch '{ \"spec\": { \"template\": { \"spec\": { \"nodeSelector\": { \"env\":\"infra\"}}}}}' "
when: prometheus_project_present | failed
tags:
- prometheus
- grafana