diff --git a/.drone.yml b/.drone.yml
index 4ad799e..11e2896 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -1,19 +1,19 @@
----
-# kind: pipeline
-# type: exec
-# name: default
-
-# platform:
-# os: linux
-# arch: amd64
-
-
-# clone:
-# # убрано так как сейчас не тестим ничего предварительно а сразу тянем в директорию
-# disable: true
-
-# steps:
-# - name: pull into the folduh
-# commands:
-# - cd /etc/ansible
+---
+# kind: pipeline
+# type: exec
+# name: default
+
+# platform:
+# os: linux
+# arch: amd64
+
+
+# clone:
+# # убрано так как сейчас не тестим ничего предварительно а сразу тянем в директорию
+# disable: true
+
+# steps:
+# - name: pull into the folduh
+# commands:
+# - cd /etc/ansible
# - git pull origin some-kind-of-lobster
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index dcb25f8..f203b47 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
-.vaulto
+.vaulto
asdf
\ No newline at end of file
diff --git a/ansible.cfg b/ansible.cfg
index e820ee7..37213da 100644
--- a/ansible.cfg
+++ b/ansible.cfg
@@ -1,44 +1,45 @@
-#### export ANSIBLE_CONFIG=./ansible.cfg
-
-[defaults]
-gathering = smart
-fact_caching = jsonfile
-fact_caching_connection = /tmp/facts_cache
-# two hours timeout
-fact_caching_timeout = 7200
-
-
-interpreter_python = auto_silent
-ansible_python_interpreter = auto_silent
-# Use the YAML callback plugin.
-stdout_callback = yaml
-# Use the stdout_callback when running ad-hoc commands.
-bin_ansible_callbacks = True
-
-host_key_checking = false
-
-#vault_password_file = /etc/ansible/.vaulto
-vault_password_file = /tmp/.vaulto
-
-# callback_plugins = /etc/ansible/plugins/callback
-# callback_whitelist = telegram
-# callbacks_enabled = telegram
-
-strategy_plugins = mitogen-0.3.9/ansible_mitogen/plugins/strategy
-strategy = mitogen_linear
-
-#### TODO чому-то не делается
-roles_path = roles:internal_roles
-# # [callback_telegram]
-# # tg_token = 6472915685:AAHPvgrQoqG7DxtfbnHWPe3Lfild-CGJ1j8
-# # tg_chat_id = -4023350326
-
-
-# добавление юзера
-# useradd -m hogweed1 -s /usr/bin/bash
-# passwd hogweed1
-# sudo adduser hogweed1 sudo
-
-[ssh_connection]
-# Enable pipelining, requires disabling requiretty in sudoers
+#### export ANSIBLE_CONFIG=./ansible.cfg
+
+[defaults]
+gathering = smart
+fact_caching = jsonfile
+fact_caching_connection = /tmp/facts_cache
+# two hours timeout
+fact_caching_timeout = 7200
+
+
+interpreter_python = auto_silent
+ansible_python_interpreter = auto_silent
+# Use the YAML callback plugin.
+stdout_callback = yaml
+# Use the stdout_callback when running ad-hoc commands.
+bin_ansible_callbacks = True
+
+host_key_checking = false
+
+#vault_password_file = /etc/ansible/.vaulto
+#vault_password_file = /tmp/.vaulto
+vault_password_file = /usr/share/.vaulto
+
+# callback_plugins = /etc/ansible/plugins/callback
+# callback_whitelist = telegram
+# callbacks_enabled = telegram
+
+strategy_plugins = mitogen-0.3.9/ansible_mitogen/plugins/strategy
+strategy = mitogen_linear
+
+#### TODO чому-то не делается
+roles_path = roles:internal_roles
+# # [callback_telegram]
+# # tg_token = 6472915685:AAHPvgrQoqG7DxtfbnHWPe3Lfild-CGJ1j8
+# # tg_chat_id = -4023350326
+
+
+# добавление юзера
+# useradd -m hogweed1 -s /usr/bin/bash
+# passwd hogweed1
+# sudo adduser hogweed1 sudo
+
+[ssh_connection]
+# Enable pipelining, requires disabling requiretty in sudoers
pipelining = True
\ No newline at end of file
diff --git a/environments/base/group_vars/all/ssh-creds.yml b/environments/base/group_vars/all/ssh-creds.yml
index bcdb961..e0b62bf 100644
--- a/environments/base/group_vars/all/ssh-creds.yml
+++ b/environments/base/group_vars/all/ssh-creds.yml
@@ -1,10 +1,10 @@
-$ANSIBLE_VAULT;1.1;AES256
-31363137313338616231343430646133386434313864323835633839353631313262313365396662
-6430623831346630336138613735653333386565346461300a326639383234383063646366343039
-32396436373561376431383338643464313131336135333864336530636164616661616261363930
-3662306533383232660a386464396437653835356564333032393063386532346463376332626536
-35373439633936396539383163396632313462626336363164353038643664633734326136356135
-36656235616231363234323632393833323035313739363565393932326535643834633464303361
-63643531643430336164336261653539353236346533653030336634383031663535383264383365
-32653235386436303133623233653235356131643633643937373630333166373063633731353661
-36393539333435366439313364633735326339646264626262633063633664626461
+$ANSIBLE_VAULT;1.1;AES256
+31363137313338616231343430646133386434313864323835633839353631313262313365396662
+6430623831346630336138613735653333386565346461300a326639383234383063646366343039
+32396436373561376431383338643464313131336135333864336530636164616661616261363930
+3662306533383232660a386464396437653835356564333032393063386532346463376332626536
+35373439633936396539383163396632313462626336363164353038643664633734326136356135
+36656235616231363234323632393833323035313739363565393932326535643834633464303361
+63643531643430336164336261653539353236346533653030336634383031663535383264383365
+32653235386436303133623233653235356131643633643937373630333166373063633731353661
+36393539333435366439313364633735326339646264626262633063633664626461
diff --git a/environments/base/hosts.yml b/environments/base/hosts.yml
index 7d094b6..eeac53f 100644
--- a/environments/base/hosts.yml
+++ b/environments/base/hosts.yml
@@ -1,18 +1,18 @@
----
-all: # keys must be unique, i.e. only one 'hosts' per group
- hosts:
- semyon-0x01.guaranteedstruggle.host:
- semyon-0x02.guaranteedstruggle.host:
- semyon-0x03.guaranteedstruggle.host:
- semyon-0x04.guaranteedstruggle.host:
- semyon-0x05.guaranteedstruggle.host:
-
- samehost-zero.guaranteedstruggle.host:
-
-puppets: # keys must be unique, i.e. only one 'hosts' per group
- hosts:
- semyon-0x01.guaranteedstruggle.host:
- semyon-0x02.guaranteedstruggle.host:
- semyon-0x03.guaranteedstruggle.host:
- semyon-0x04.guaranteedstruggle.host:
- semyon-0x05.guaranteedstruggle.host:
+---
+all: # keys must be unique, i.e. only one 'hosts' per group
+ hosts:
+ semyon-0x01.guaranteedstruggle.host:
+ semyon-0x02.guaranteedstruggle.host:
+ semyon-0x03.guaranteedstruggle.host:
+ semyon-0x04.guaranteedstruggle.host:
+ semyon-0x05.guaranteedstruggle.host:
+
+ samehost-zero.guaranteedstruggle.host:
+
+puppets: # keys must be unique, i.e. only one 'hosts' per group
+ hosts:
+ semyon-0x01.guaranteedstruggle.host:
+ semyon-0x02.guaranteedstruggle.host:
+ semyon-0x03.guaranteedstruggle.host:
+ semyon-0x04.guaranteedstruggle.host:
+ semyon-0x05.guaranteedstruggle.host:
diff --git a/environments/just-created/group_vars/all/ssh-creds.yml b/environments/just-created/group_vars/all/ssh-creds.yml
index 322f857..8b7deef 100644
--- a/environments/just-created/group_vars/all/ssh-creds.yml
+++ b/environments/just-created/group_vars/all/ssh-creds.yml
@@ -1,5 +1,5 @@
----
-ansible_ssh_user: root
-ansible_ssh_pass: admin
-ansible_sudo_pass: admin
+---
+ansible_ssh_user: root
+ansible_ssh_pass: admin
+ansible_sudo_pass: admin
ansible_ssh_private_key_file: '/home/hogweed1/id25519.key'
\ No newline at end of file
diff --git a/environments/just-created/group_vars/lxc/ssh-creds.yml b/environments/just-created/group_vars/lxc/ssh-creds.yml
new file mode 100644
index 0000000..8b7deef
--- /dev/null
+++ b/environments/just-created/group_vars/lxc/ssh-creds.yml
@@ -0,0 +1,5 @@
+---
+ansible_ssh_user: root
+ansible_ssh_pass: admin
+ansible_sudo_pass: admin
+ansible_ssh_private_key_file: '/home/hogweed1/id25519.key'
\ No newline at end of file
diff --git a/environments/just-created/group_vars/lxc.yml b/environments/just-created/host_vars/192.168.0.32.yml
similarity index 60%
rename from environments/just-created/group_vars/lxc.yml
rename to environments/just-created/host_vars/192.168.0.32.yml
index e907b26..a0cc394 100644
--- a/environments/just-created/group_vars/lxc.yml
+++ b/environments/just-created/host_vars/192.168.0.32.yml
@@ -1,5 +1,5 @@
----
-ansible_ssh_user: hogweed1
-ansible_ssh_pass: coloredhorses
-ansible_sudo_pass: coloredhorses
-ansible_ssh_private_key_file: '/home/hogweed1/id25519.key'
\ No newline at end of file
+---
+ansible_ssh_user: hogweed1
+ansible_ssh_pass: coloredhorses
+ansible_sudo_pass: coloredhorses
+#ansible_ssh_private_key_file: '/home/hogweed1/id25519.key'
\ No newline at end of file
diff --git a/environments/just-created/host_vars/k3s-rancher.guaranteedstruggle.host.yml b/environments/just-created/host_vars/k3s-rancher.guaranteedstruggle.host.yml
new file mode 100644
index 0000000..a0cc394
--- /dev/null
+++ b/environments/just-created/host_vars/k3s-rancher.guaranteedstruggle.host.yml
@@ -0,0 +1,5 @@
+---
+ansible_ssh_user: hogweed1
+ansible_ssh_pass: coloredhorses
+ansible_sudo_pass: coloredhorses
+#ansible_ssh_private_key_file: '/home/hogweed1/id25519.key'
\ No newline at end of file
diff --git a/environments/just-created/hosts.yml b/environments/just-created/hosts.yml
index a37d47c..fe6872f 100644
--- a/environments/just-created/hosts.yml
+++ b/environments/just-created/hosts.yml
@@ -1,11 +1,24 @@
----
-# all: # keys must be unique, i.e. only one 'hosts' per group
-# hosts:
-# #nexus.guaranteedstruggle.host:
-# #printing-slut.guaranteedstruggle.host:
-# harbor.guaranteedstruggle.host:
-
-lxc: # keys must be unique, i.e. only one 'hosts' per group
- hosts:
- ### but its a vm wtf
- harbor.guaranteedstruggle.host:
\ No newline at end of file
+---
+all: # keys must be unique, i.e. only one 'hosts' per group
+ hosts:
+ #k3s-rancher.guaranteedstruggle.host:
+# #nexus.guaranteedstruggle.host:
+# #printing-slut.guaranteedstruggle.host:
+# harbor.guaranteedstruggle.host:
+
+ #192.168.0.26
+ #192.168.0.32:
+lxc: # keys must be unique, i.e. only one 'hosts' per group
+ hosts:
+ ### but its a vm wtf
+ #harbor.guaranteedstruggle.host:
+
+ #etcd.guaranteedstruggle.host:
+ #prometheus.guaranteedstruggle.host:
+ # 192.168.0.240
+ #192.168.0.251
+ #192.168.0.40
+ #192.168.0.88
+ #192.168.0.52
+ #192.168.0.113
+ #recording-slut.guaranteedstruggle.host:
\ No newline at end of file
diff --git a/environments/proxmoxes/group_vars/all/all.yml b/environments/proxmoxes/group_vars/all/all.yml
index 48b125b..2168980 100644
--- a/environments/proxmoxes/group_vars/all/all.yml
+++ b/environments/proxmoxes/group_vars/all/all.yml
@@ -1,2 +1,2 @@
-# отключаем ворнинг с митогена - https://github.com/mitogen-hq/mitogen/issues/740#issuecomment-731513058
+# отключаем ворнинг с митогена - https://github.com/mitogen-hq/mitogen/issues/740#issuecomment-731513058
ansible_python_interpreter: /usr/bin/python3
\ No newline at end of file
diff --git a/environments/proxmoxes/group_vars/all/ssh-creds.yml b/environments/proxmoxes/group_vars/all/ssh-creds.yml
index b5cc034..c65882d 100644
--- a/environments/proxmoxes/group_vars/all/ssh-creds.yml
+++ b/environments/proxmoxes/group_vars/all/ssh-creds.yml
@@ -1,4 +1,4 @@
----
-ansible_ssh_user: hogweed1
-ansible_ssh_pass: coloredhorses
+---
+ansible_ssh_user: hogweed1
+ansible_ssh_pass: coloredhorses
ansible_sudo_pass: coloredhorses
\ No newline at end of file
diff --git a/environments/proxmoxes/hosts.yml b/environments/proxmoxes/hosts.yml
index d8edfc3..2b36858 100644
--- a/environments/proxmoxes/hosts.yml
+++ b/environments/proxmoxes/hosts.yml
@@ -1,55 +1,71 @@
----
-physical_machines:
- hosts:
- cyberbully.guaranteedstruggle.host:
- gpu-slut.guaranteedstruggle.host:
- children:
- proxmoxes:
-
-proxmoxes: # keys must be unique, i.e. only one 'hosts' per group
- hosts:
- king-albert.guaranteedstruggle.host:
- children:
- semyons:
-
-semyons: # keys must be unique, i.e. only one 'hosts' per group
- hosts:
- semyon-0x01.guaranteedstruggle.host:
- semyon-0x02.guaranteedstruggle.host:
- semyon-0x03.guaranteedstruggle.host:
- semyon-0x04.guaranteedstruggle.host:
- semyon-0x05.guaranteedstruggle.host:
-vms:
- children:
- printer:
- kubernetes:
- docker:
-
-docker:
- hosts:
- swarm-node1.guaranteedstruggle.host:
- swarm-node2.guaranteedstruggle.host:
- swarm-node3.guaranteedstruggle.host:
-
- harbor.guaranteedstruggle.host:
-
-kubernetes:
- hosts:
- rke2-master1.guaranteedstruggle.host:
- rke2-master2.guaranteedstruggle.host:
- rke2-master3.guaranteedstruggle.host:
- rke2-worker1.guaranteedstruggle.host:
- rke2-worker2.guaranteedstruggle.host:
- rke2-worker3.guaranteedstruggle.host:
- rke2-worker4.guaranteedstruggle.host:
- rke2-worker5.guaranteedstruggle.host:
-
- k3s-rancher.guaranteedstruggle.host:
- k3s-awx.guaranteedstruggle.host:
-
-printer:
- hosts:
- printing-slut.guaranteedstruggle.host:
-
-#### TODO
-# lxc:
+---
+physical_machines:
+ hosts:
+ cyberbully.guaranteedstruggle.host:
+ #
+ gpu-slut.guaranteedstruggle.host:
+ children:
+ proxmoxes:
+
+proxmoxes: # keys must be unique, i.e. only one 'hosts' per group
+ hosts:
+ king-albert.guaranteedstruggle.host:
+ children:
+ semyons:
+
+semyons: # keys must be unique, i.e. only one 'hosts' per group
+ hosts:
+ semyon-0x01.guaranteedstruggle.host:
+ semyon-0x02.guaranteedstruggle.host:
+ semyon-0x03.guaranteedstruggle.host:
+ semyon-0x04.guaranteedstruggle.host:
+ semyon-0x05.guaranteedstruggle.host:
+vms:
+ hosts:
+ #recording-slut.guaranteedstruggle.host:
+ #192.168.0.26
+ children:
+ printer:
+ kubernetes:
+ docker:
+
+
+docker:
+ hosts:
+ swarm-node1.guaranteedstruggle.host:
+ swarm-node2.guaranteedstruggle.host:
+ swarm-node3.guaranteedstruggle.host:
+
+ harbor.guaranteedstruggle.host:
+
+kubernetes:
+ hosts:
+ rke2-master1.guaranteedstruggle.host:
+ rke2-master2.guaranteedstruggle.host:
+ rke2-master3.guaranteedstruggle.host:
+ rke2-worker1.guaranteedstruggle.host:
+ rke2-worker2.guaranteedstruggle.host:
+ rke2-worker3.guaranteedstruggle.host:
+ rke2-worker4.guaranteedstruggle.host:
+ rke2-worker5.guaranteedstruggle.host:
+
+ k3s-rancher.guaranteedstruggle.host:
+ k3s-awx.guaranteedstruggle.host:
+
+printer:
+ hosts:
+ printing-slut.guaranteedstruggle.host:
+
+#### TODO
+lxc:
+ hosts:
+ ### but its a vm wtf
+ #harbor.guaranteedstruggle.host:
+ #etcd.guaranteedstruggle.host:
+ prometheus.guaranteedstruggle.host:
+ recording-slut.guaranteedstruggle.host:
+
+ pg.just-for-me.internal:
+ grafana.just-for-me.internal:
+ price-loader.just-for-me.internal:
+
diff --git a/files/alertmanager/alertmanager.service b/files/alertmanager/alertmanager.service
new file mode 100644
index 0000000..b63d8fe
--- /dev/null
+++ b/files/alertmanager/alertmanager.service
@@ -0,0 +1,21 @@
+[Unit]
+Description=Prometheus alertmanager
+Wants=network-online.target
+After=network-online.target
+
+[Service]
+User=prometheus
+Group=prometheus
+EnvironmentFile=-/etc/sysconfig/alertmanager
+ExecStart=/usr/sbin/alertmanager \
+ --config.file=/etc/alertmanager/alertmanager.yaml \
+ --storage.path=/base/alertmanager \
+ --web.config.file=/etc/prometheus/web-config.yaml
+
+ExecReload=/bin/kill -HUP $MAINPID
+KillMode=process
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/files/alertmanager/alertmanager.yaml b/files/alertmanager/alertmanager.yaml
new file mode 100644
index 0000000..d0fa591
--- /dev/null
+++ b/files/alertmanager/alertmanager.yaml
@@ -0,0 +1,50 @@
+global:
+ resolve_timeout: 5m
+
+route:
+ group_by: [ 'alertname', 'job' ]
+ group_wait: 30s
+ group_interval: 5m
+ repeat_interval: 1h
+ receiver: what-went-wrong
+
+# routes:
+
+receivers:
+# /dev/null receiver
+- name: 'blackhole'
+
+# конфа
+- name: 'what-went-wrong'
+ telegram_configs:
+ - send_resolved: true
+ bot_token: '6472915685:AAHPvgrQoqG7DxtfbnHWPe3Lfild-CGJ1j8'
+ chat_id: -4023350326
+ message: '{{ template "teletempl" . }}'
+ api_url: https://api.telegram.org
+ parse_mode: HTML
+# - name: 'vdk2ch'
+# telegram_configs:
+# - send_resolved: true
+# bot_token: '5724991559:AAEuLvpLsgP6LHRGMSyFtQLlR5qPQUO4b_w'
+# chat_id: -1001355646177
+# message: '{{ template "teletempl" . }}'
+# api_url: https://api.telegram.org
+# parse_mode: HTML
+
+# A list of inhibition rules.
+#inhibit_rules:
+
+templates:
+ - '/etc/alertmanager/templates/my.tmpl'
+
+# A list of time intervals for muting/activating routes.
+# time_intervals:
+# - name: business_hours
+# time_intervals:
+# - weekdays: ['monday:friday']
+# times:
+# # Начало в 10:00 Asia/Vladivostok
+# - start_time: '00:00'
+# # Заканчивается в 19:00 Asia/Vladivostok
+# end_time: '09:00'
diff --git a/files/alertmanager/simple_telegram.tmpl b/files/alertmanager/simple_telegram.tmpl
new file mode 100644
index 0000000..0adc0ce
--- /dev/null
+++ b/files/alertmanager/simple_telegram.tmpl
@@ -0,0 +1,33 @@
+{{ define "teletempl" }}
+{{ .CommonLabels.alertname }} :
+{{- if eq .Status "firing" -}}
+ {{ .Status | toUpper}} 🔥
+{{- end -}}
+{{- if eq .Status "resolved" -}}
+ {{ .Status | toUpper}} ✅
+{{- end -}}
+{{ $alerts_count := len .Alerts }}
+{{ if eq $alerts_count 1 -}} {{/* Single alert block */}}
+{{ .CommonAnnotations.summary }}
+
+Host: {{ .CommonLabels.host }}
+Instance: {{ .CommonLabels.instance }}
+Job: {{ .CommonLabels.job }}
+
+Details:
+{{ .CommonAnnotations.description }}
+
+Alert started: [ {{ .CommonAnnotations.alert_started_vl_time }} ]
+
+{{- else -}} {{/* Grouped alert block */}}
+{{ .CommonAnnotations.summary }}
+
+Job: {{ .CommonLabels.job }}
+
+
+Instances:
+{{- range .Alerts }}
+{{ .Labels.instance }} [ {{ .Annotations.alert_started_vl_time }} ]
+{{- end }}
+{{ end }}
+{{ end }}
diff --git a/files/prometheus/alerts.yaml b/files/prometheus/alerts.yaml
new file mode 100644
index 0000000..1d7f6df
--- /dev/null
+++ b/files/prometheus/alerts.yaml
@@ -0,0 +1,188 @@
+---
+# можно набирать примеров отсюда
+# https://awesome-prometheus-alerts.grep.to/rules.html
+
+
+groups:
+
+
+ - name: standard
+
+ rules:
+ - alert: _plchldr
+ expr: up == -999
+ for: 999m
+ labels:
+ severity: info
+ annotations: &anno
+ alert_started_vl_time: "{{ with $b := printf `ALERTS_FOR_STATE{job=\"%s\",instance=\"%s\"} + 36000` $labels.job $labels.instance | query }}{{if $b}}{{ with $a := $b | first | value | humanizeTimestamp }}{{- slice $a 0 19 -}}{{end}}{{end}}{{end}}"
+
+
+ - alert: jackbot failed
+ expr: node_systemd_unit_state{ name="jack_bot.service", state="active" } != 1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ <<: *anno
+ summary: "PIPISA IS DOWN!"
+ description: "Pipisa on {{ $labels.instance }} does not working!"
+
+ - alert: jackbot failed
+ expr: node_systemd_unit_state{ name="jack_bot.service", state="active" } != 1
+ for: 5m
+ labels:
+ severity: cricical
+ annotations:
+ <<: *anno
+ summary: "PIPISA IS DOWN!"
+ description: "Pipisa on {{ $labels.instance }} does not working!"
+
+
+ ### отслужило своё, майнеров больше нет.
+ # - alert: MAINER JACK KURWA!!
+ # expr: node_load15 > 2
+ # for: 20m
+ # labels:
+ # severity: cricical
+ # annotations:
+ # <<: *anno
+ # summary: "It THAT shit again!"
+ # description: "Kill fucking mainer processes!"
+
+
+
+ - alert: Uptime
+ expr: floor((time() - node_boot_time_seconds)) < 3600
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ <<: *anno
+ summary: "Uptime less than 1 hour"
+ description: "Uptime on {{ $labels.instance }} is less than 1 hour"
+
+ - alert: LoadAverage
+ expr: (node_load5{}) > ( instance:node_cpus:count{} )
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ <<: *anno
+ summary: "High LoadAverage5"
+ description: |
+ {{ $labels.host }} [{{ printf `instance:node_cpus:count{host='%s', instance='%s'}` .Labels.host .Labels.instance | query | first | value }} CPU] LA: {{ printf `node_load1{host='%s', instance='%s'}` .Labels.host .Labels.instance | query | first | value }} {{ printf `node_load5{host='%s', instance='%s'}` .Labels.host .Labels.instance | query | first | value }} {{ printf `node_load15{host='%s', instance='%s'}` .Labels.host .Labels.instance | query | first | value }}
+
+ - alert: LoadAverage
+ expr: (node_load15{}) > ( instance:node_cpus:count{} )
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ <<: *anno
+ summary: "High LoadAverage15"
+ description: |
+ {{ $labels.host }} [{{ printf `instance:node_cpus:count{host='%s', instance='%s'}` .Labels.host .Labels.instance | query | first | value }} CPU] LA: {{ printf `node_load1{host='%s', instance='%s'}` .Labels.host .Labels.instance | query | first | value }} {{ printf `node_load5{host='%s', instance='%s'}` .Labels.host .Labels.instance | query | first | value }} {{ printf `node_load15{host='%s', instance='%s'}` .Labels.host .Labels.instance | query | first | value }}
+
+ - alert: RAM
+ expr: node_memory_MemAvailable_bytes{ } / node_memory_MemTotal_bytes * 100 < 10
+ for: 10m
+ labels:
+ severity: warning
+ annotations:
+ <<: *anno
+ summary: "Low available memory"
+ description: "Free RAM: {{ printf `%.2f` $value }}% Свободно {{ printf `node_memory_MemAvailable_bytes{instance='%s'}` .Labels.instance | query | first | value | humanize1024 }} из {{ printf `node_memory_MemTotal_bytes{instance='%s'}` .Labels.instance | query | first | value | humanize1024 }}"
+
+ - alert: RAM
+ expr: node_memory_MemAvailable_bytes{ } / node_memory_MemTotal_bytes * 100 < 5
+ for: 10m
+ labels:
+ severity: critical
+ annotations:
+ <<: *anno
+ summary: "Low available memory"
+ description: "Free RAM: {{ printf `%.2f` $value }}% Свободно {{ printf `node_memory_MemAvailable_bytes{instance='%s'}` .Labels.instance | query | first | value | humanize1024 }} из {{ printf `node_memory_MemTotal_bytes{instance='%s'}` .Labels.instance | query | first | value | humanize1024 }}"
+
+ - alert: iNodes
+ expr: (node_filesystem_files_free{fstype!~"rootfs|fuse.lxcfs|squashfs",mountpoint!~"/boot|boot/efi|/backup|/swap"} / node_filesystem_files) * 100 < 10
+ for: 10m
+ labels:
+ severity: warning
+ annotations:
+ <<: *anno
+ summary: "[WARN] Low available inodes"
+ description: "Available i-nodes: {{ printf `%.2f` $value }}%\n"
+
+ - alert: iNodes
+ expr: (node_filesystem_files_free{fstype!~"rootfs|fuse.lxcfs|squashfs",mountpoint!~"/boot|boot/efi|/backup|/swap"} / node_filesystem_files) * 100 < 5
+ for: 10m
+ labels:
+ severity: critical
+ annotations:
+ <<: *anno
+ summary: "[CRIT] Host out of inodes"
+ description: "Available i-nodes: {{ printf `%.2f` $value }}%\n"
+
+
+ - alert: DiskUsage
+ expr: ( node_filesystem_avail_bytes{mountpoint!~"/boot|boot/efi|/backup|/swap", fstype!~"rootfs|fuse.lxcfs|squashfs"}/ node_filesystem_size_bytes ) * 100 < 10
+ for: 5m
+ labels:
+ severity: info
+ annotations:
+ <<: *anno
+ summary: "Disk usage is more than 90%"
+ description: |
+ {{ $labels.device }} ({{ $labels.mountpoint }}): {{ printf `node_filesystem_avail_bytes{mountpoint='%s', device='%s', instance='%s'}` .Labels.mountpoint .Labels.device .Labels.instance | query | first | value | humanize1024 }} / {{ printf `node_filesystem_size_bytes{mountpoint='%s', device='%s', instance='%s'}` .Labels.mountpoint .Labels.device .Labels.instance | query | first | value | humanize1024 }}
+ Свободного места: {{ printf `%.2f` $value }}%
+
+ - alert: DiskUsagePredict
+ expr: |
+ (node_filesystem_avail_bytes{mountpoint!~"/boot|boot/efi|/backup", fstype!~"rootfs|fuse.lxcfs|squashfs"}/ node_filesystem_size_bytes) * 100 < 10
+ and
+ predict_linear(node_filesystem_avail_bytes{fstype!~"rootfs|fuse.lxcfs|squashfs"}[1h], 4 * 3600) < 0
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ <<: *anno
+ summary: "Disk usage is more than 90% and will fill soon"
+ description: "{{ $labels.mountpoint }} usage is more than 90% and will fill soon on {{ $labels.instance }}"
+
+ - name: Prometheus
+ rules:
+ - alert: PrometheusAlertmanagerNotificationFailing
+ expr: rate(alertmanager_notifications_failed_total[1m]) > 0
+ for: 0m
+ labels:
+ severity: cricical
+ annotations:
+ <<: *anno
+ summary: Prometheus AlertManager notification failing (instance {{ $labels.instance }})
+ description: "Alertmanager is failing sending notifications on {{ $labels.host }}"
+
+ - alert: PrometheusConfigurationReloadFailure
+ expr: prometheus_config_last_reload_successful != 1
+ for: 0m
+ labels:
+ severity: warning
+ annotations:
+ <<: *anno
+ summary: Prometheus configuration reload failure (instance {{ $labels.instance }})
+ description: "Prometheus configuration reload error on {{ $labels.host }}"
+
+ - alert: PrometheusConsulServiceDiscoveryError
+ expr: increase(prometheus_sd_consul_rpc_failures_total[15m]) > 0
+ for: 0m
+ labels:
+ severity: critical
+ annotations:
+ <<: *anno
+ summary: Prometheus consul_sd many failures (instance {{ $labels.instance }})
+ description: "Prometheus consul_sd many failures on {{ $labels.host }}"
+
+
+
+
+
\ No newline at end of file
diff --git a/files/prometheus/prometheus.service b/files/prometheus/prometheus.service
new file mode 100644
index 0000000..e67ca33
--- /dev/null
+++ b/files/prometheus/prometheus.service
@@ -0,0 +1,30 @@
+[Unit]
+Description=Prometheus
+Wants=network-online.target
+After=network-online.target
+
+[Service]
+User=prometheus
+Group=prometheus
+Type=simple
+Restart=always
+OOMScoreAdjust=-1000
+LimitNOFILE=16384
+ExecStart=/usr/sbin/prometheus \
+ --config.file /etc/prometheus/prometheus.yaml \
+ --web.config.file=/etc/prometheus/web-config.yaml \
+ --storage.tsdb.path /prometheus-data/ \
+ --storage.tsdb.retention.time 180d \
+ --storage.tsdb.max-block-duration=2h \
+ --storage.tsdb.min-block-duration=2h \
+ --web.enable-remote-write-receiver \
+ --web.console.templates=/etc/prometheus/consoles \
+ --web.console.libraries=/etc/prometheus/console_libraries \
+ --web.enable-admin-api \
+ --query.max-samples=50000000
+
+ExecReload=/usr/bin/kill -s HUP $MAINPID
+ExecStop=/usr/bin/kill -s QUIT $MAINPID
+
+[Install]
+WantedBy=multi-user.target
diff --git a/files/prometheus/prometheus.yaml b/files/prometheus/prometheus.yaml
new file mode 100644
index 0000000..9e6595c
--- /dev/null
+++ b/files/prometheus/prometheus.yaml
@@ -0,0 +1,179 @@
+# my global config
+global:
+ scrape_interval: 10s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
+ evaluation_interval: 60s # Evaluate rules every 15 seconds. The default is every 1 minute.
+ #external_labels:
+
+ # scrape_timeout is set to the global default (10s).
+
+# Alertmanager configuration
+alerting:
+ alertmanagers:
+ - scheme: https
+ static_configs:
+ - targets: ['alertmanager.guaranteedstruggle.host']
+
+# Writing data to remote long-term storage (VictoriaMetrics)
+# remote_write:
+# - url:
+
+# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
+rule_files:
+ - '/etc/prometheus/alerts.yaml'
+ - '/etc/prometheus/service_alerts/*.yaml'
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+
+
+ #### TODO вынести в шаблоны сбор экспортеров на основе ролей машин
+
+ - job_name: 'node-exporters'
+ scheme: http
+ static_configs:
+ - targets:
+ - 'semyon-0x01:9100'
+ - 'semyon-0x02:9100'
+ - 'semyon-0x03:9100'
+ - 'semyon-0x04:9100'
+ - 'semyon-0x05:9100'
+
+ - 'king-albert:9100'
+ - 'gpu-slut:9100'
+ # relabel_configs:
+ # - target_label: instance
+ # replacement: 'cyberbully:9100'
+ # - target_label: host
+ # replacement: cyberbully
+
+ - job_name: 'node-exporters-vms'
+ scheme: http
+ static_configs:
+ - targets:
+ - 'printing-slut:9100'
+
+ - 'swarm-node1:9100'
+ - 'swarm-node2:9100'
+ - 'swarm-node3:9100'
+
+ - 'harbor:9100'
+
+ - 'rke2-master1:9100'
+ - 'rke2-master2:9100'
+ - 'rke2-master3:9100'
+ - 'rke2-worker1:9100'
+ - 'rke2-worker2:9100'
+ - 'rke2-worker3:9100'
+ - 'rke2-worker4:9100'
+ - 'rke2-worker5:9100'
+
+ - 'k3s-rancher:9100'
+ - 'k3s-awx:9100'
+
+ # # - job_name: 'node-exporters-lxc'
+ # # scheme: http
+ # # static_configs:
+ # # - targets:
+
+
+ - job_name: 'impi-exporters'
+ scheme: http
+ static_configs:
+ - targets:
+ #- 'cyberbully:9290'
+ - 'king-albert:9290'
+ # - 'semyon-0x01:9290'
+ # - 'semyon-0x02:9290'
+ # - 'semyon-0x03:9290'
+ # - 'semyon-0x04:9290'
+ # - 'semyon-0x05:9290'
+ # - 'gpu-slut:9290'
+
+ # пиписа-экспортер
+ # # - job_name: 'vdk2ch-pipisa-exporter'
+ # # scheme: http
+ # # static_configs:
+ # # - targets:
+ # # - '192.168.0.55:9992'
+ # # relabel_configs:
+ # # - target_label: instance
+ # # replacement: 'cyberbully:9992'
+ # # - target_label: host
+ # # replacement: cyberbully
+
+ # пиписа-экспортер
+ # - job_name: 'vllm-exporter'
+ # scheme: http
+ # static_configs:
+ # - targets:
+ # - '192.168.0.4:8000'
+ # relabel_configs:
+ # - target_label: instance
+ # replacement: 'new-computer-home:8000'
+ # - target_label: host
+ # replacement: new-computer-home
+
+
+ #
+ # # - job_name: 'nginx-vts-metrics'
+ # # scheme: http
+ # # metrics_path: /status/format/prometheus
+ # # static_configs:
+ # # - targets:
+ # # - '192.168.0.55:9042'
+ # # relabel_configs:
+ # # - target_label: instance
+ # # replacement: 'cyberbully:9042'
+ # - target_label: host
+ # replacement: cyberbully
+
+ # шиндоус-экспортер поверх ноута через домашний вайфай
+ # # - job_name: 'i-programmed-my-home-computer'
+ # # scheme: http
+ # # static_configs:
+ # # - targets:
+ # # - '192.168.0.2:9182'
+ # # - '192.168.0.3:9182'
+ # # relabel_configs:
+ # # - source_labels: [__address__]
+ # # regex: "(192.168.0.2.+)"
+ # # target_label: instance
+ # # replacement: 'Desktop-O50pt4s:9182'
+ # # - source_labels: [__address__]
+ # # regex: "(192.168.0.2.+)"
+ # # target_label: host
+ # # replacement: Desktop-O50pt4s
+ # # - source_labels: [__address__]
+ # # regex: "(192.168.0.3.+)"
+ # # target_label: instance
+ # # replacement: 'Desktop-edov3u5:9182'
+ # # - source_labels: [__address__]
+ # # regex: "(192.168.0.3.+)"
+ # # target_label: host
+ # # replacement: Desktop-edov3u5
+
+ #
+ # # - job_name: 'nvidia-gpu-metrics'
+ # # scheme: http
+ # # static_configs:
+ # # - targets:
+ # # - '192.168.0.2:9835'
+ # # relabel_configs:
+ # # - target_label: instance
+ # # replacement: 'Desktop-O50pt4s:9835'
+
+
+ # # # личный твиттус
+ # # - job_name: 'pleroma'
+ # # metrics_path: /api/pleroma/app_metrics
+ # # scheme: https
+ # # static_configs:
+ # # - targets: ['social.vdk2ch.ru']
+
+ # хайпервиза
+ - job_name: 'proxmox'
+ metrics_path: /pve
+ static_configs:
+ - targets:
+ - 'king-albert.guaranteedstruggle.host:9221'
diff --git a/files/prometheus/rules.yaml b/files/prometheus/rules.yaml
new file mode 100644
index 0000000..2050268
--- /dev/null
+++ b/files/prometheus/rules.yaml
@@ -0,0 +1,14 @@
+groups:
+ - name: node-exporter-rules
+ rules:
+
+ # CPU count
+ - record: instance:node_cpus:count
+ expr: count(node_cpu_seconds_total{mode="idle"}) without (cpu,mode)
+
+ #взято отсюда
+ # https://stackoverflow.com/questions/52480567/count-alerts-fired-by-prometheus
+ - name: alerts
+ rules:
+ - record: ALERTS_FOR_STATE:firing
+ expr: ALERTS_FOR_STATE and ignoring(alertstate) ALERTS{alertstate="firing"}
\ No newline at end of file
diff --git a/files/prometheus/web-config.yaml b/files/prometheus/web-config.yaml
new file mode 100644
index 0000000..33996b4
--- /dev/null
+++ b/files/prometheus/web-config.yaml
@@ -0,0 +1,3 @@
+# tls_server_config:
+# cert_file: /etc/prometheus/ssl/ .crt
+# key_file: /etc/prometheus/ssl/ .key
diff --git a/playbooks/_common-setup.yml b/playbooks/_common-setup.yml
new file mode 100644
index 0000000..22d537f
--- /dev/null
+++ b/playbooks/_common-setup.yml
@@ -0,0 +1,6 @@
+#### TODO обе роли - пакаджесы и юзеры
+---
+- import_playbook: packages.yml
+- import_playbook: resolvconf.yml
+- import_playbook: users.yml
+- import_playbook: exporters.yml
\ No newline at end of file
diff --git a/playbooks/exporters.yml b/playbooks/exporters.yml
index 1c850be..7e88cc3 100644
--- a/playbooks/exporters.yml
+++ b/playbooks/exporters.yml
@@ -1,21 +1,21 @@
----
-- name: node exporter!
- hosts: all
- gather_facts: yes
- become: yes
- roles:
- #- role:
- #- prometheus.prometheus.ipmi_exporter
- - prometheus.prometheus.node_exporter
- #node_exporter_local_cache_path: "/tmp/node_exporter_cache"
-- name: for hardware monitoring
- hosts: physical_machines
- gather_facts: yes
- become: yes
- roles:
- - role: prometheus.prometheus.ipmi_exporter
- ipmi_exporter_system_user: root
- ipmi_exporter_version: "1.9.0"
-
-
+---
+- name: node exporter!
+ hosts: all:!lxc
+ gather_facts: yes
+ become: yes
+ roles:
+ #- role:
+ #- prometheus.prometheus.ipmi_exporter
+ - prometheus.prometheus.node_exporter
+ #node_exporter_local_cache_path: "/tmp/node_exporter_cache"
+- name: for hardware monitoring
+ hosts: king-albert.guaranteedstruggle.host
+ gather_facts: yes
+ become: yes
+ roles:
+ - role: prometheus.prometheus.ipmi_exporter
+ ipmi_exporter_system_user: root
+ ipmi_exporter_version: "1.9.0"
+
+
\ No newline at end of file
diff --git a/playbooks/packages.yml b/playbooks/packages.yml
index 235ce91..933c2fb 100644
--- a/playbooks/packages.yml
+++ b/playbooks/packages.yml
@@ -1,59 +1,63 @@
----
-- name: packages
- hosts: all
- become: true
- become_method: sudo
- tasks:
- - name: Install the packages versions
- ansible.builtin.package:
- name:
- - htop
- #- iperf3
- - git
- - curl
- - net-tools
- - vim
- - sudo
- #state: latest
- state: present
-
-- name: check-stuff packages
- hosts: net-stuff
- become: true
- become_method: sudo
- tasks:
- - name: Install the packages versions
- ansible.builtin.package:
- name:
- - iperf3
- #state: latest
- state: present
-
-
-- name: ceph packages
- hosts: semyons
- become: true
- become_method: sudo
- tasks:
- - name: Install the packages versions
- ansible.builtin.package:
- name:
- - ceph
- #state: latest
- state: present
-
-
-
-- name: iptables
- hosts:
- - kubernetes
- - docker
- become: true
- become_method: sudo
- tasks:
- - name: Install the packages versions
- ansible.builtin.package:
- name:
- - iptables
- #state: latest
+---
+- name: packages
+ hosts: all
+ become: true
+ become_method: sudo
+ tasks:
+ - name: Install the packages versions
+ ansible.builtin.package:
+ name:
+ - htop
+ #- iperf3
+ - git
+ - curl
+ - net-tools
+ - vim
+ - sudo
+ - tree
+ - jq
+ - rsync
+
+ #state: latest
+ state: present
+
+- name: check-stuff packages
+ hosts: net-stuff
+ become: true
+ become_method: sudo
+ tasks:
+ - name: Install the packages versions
+ ansible.builtin.package:
+ name:
+ - iperf3
+ #state: latest
+ state: present
+
+
+- name: ceph packages
+ hosts: semyons
+ become: true
+ become_method: sudo
+ tasks:
+ - name: Install the packages versions
+ ansible.builtin.package:
+ name:
+ - ceph
+ #state: latest
+ state: present
+
+
+
+- name: iptables
+ hosts:
+ - kubernetes
+ - docker
+ become: true
+ become_method: sudo
+ tasks:
+ - name: Install the packages versions
+ ansible.builtin.package:
+ name:
+ - iptables
+ #state: latest
state: present
\ No newline at end of file
diff --git a/playbooks/pingo.yml b/playbooks/pingo.yml
index 2bc0082..c609e6f 100644
--- a/playbooks/pingo.yml
+++ b/playbooks/pingo.yml
@@ -1,20 +1,20 @@
----
-- name: pingu
- hosts: all
- # remote_user: root
-
- gather_facts: no
- become: yes
- tasks:
- - name: pingu!
- ansible.builtin.ping:
-
- # - name: Send notify to Telegram
- # community.general.telegram:
- # token: '6472915685:AAHPvgrQoqG7DxtfbnHWPe3Lfild-CGJ1j8'
- # api_args:
- # chat_id: -4023350326
- # parse_mode: "markdown"
- # text: "Your precious application has been deployed: https://example.com"
- # disable_web_page_preview: true
+---
+- name: pingu
+ hosts: all
+ # remote_user: root
+
+ gather_facts: no
+ become: no # yes
+ tasks:
+ - name: pingu!
+ ansible.builtin.ping:
+
+ # - name: Send notify to Telegram
+ # community.general.telegram:
+ # token: '6472915685:AAHPvgrQoqG7DxtfbnHWPe3Lfild-CGJ1j8'
+ # api_args:
+ # chat_id: -4023350326
+ # parse_mode: "markdown"
+ # text: "Your precious application has been deployed: https://example.com"
+ # disable_web_page_preview: true
# disable_notification: true
\ No newline at end of file
diff --git a/playbooks/resolvconf.yml b/playbooks/resolvconf.yml
new file mode 100644
index 0000000..0ee1141
--- /dev/null
+++ b/playbooks/resolvconf.yml
@@ -0,0 +1,65 @@
+---
+- name: make resolv.conf work fine
+ hosts: all
+ become: yes
+ tasks:
+ - name: Install the packages versions
+ ansible.builtin.package:
+ name:
+ - systemd-resolved
+ state: present
+ - name: Make small file
+ register: systemd_resolved_conf
+ copy:
+ dest: "/etc/systemd/resolved.conf"
+ content: |
+ # This file is part of systemd.
+ #
+ # systemd is free software; you can redistribute it and/or modify it under the
+ # terms of the GNU Lesser General Public License as published by the Free
+ # Software Foundation; either version 2.1 of the License, or (at your option)
+ # any later version.
+ #
+ # Entries in this file show the compile time defaults. Local configuration
+ # should be created by either modifying this file, or by creating "drop-ins" in
+ # the resolved.conf.d/ subdirectory. The latter is generally recommended.
+ # Defaults can be restored by simply deleting this file and all drop-ins.
+ #
+ # Use 'systemd-analyze cat-config systemd/resolved.conf' to display the full config.
+ # See resolved.conf(5) for details.
+
+ [Resolve]
+ # Some examples of DNS servers which may be used for DNS= and FallbackDNS=:
+ # Cloudflare: 1.1.1.1#cloudflare-dns.com 1.0.0.1#cloudflare-dns.com 2606:4700:4700::1111#cloudflare-dns.com 2606:4700:4700::1001#cloudflare-dns.com
+ # Google: 8.8.8.8#dns.google 8.8.4.4#dns.google 2001:4860:4860::8888#dns.google 2001:4860:4860::8844#dns.google
+ # Quad9: 9.9.9.9#dns.quad9.net 149.112.112.112#dns.quad9.net 2620:fe::fe#dns.quad9.net 2620:fe::9#dns.quad9.net
+ DNS=192.168.0.88
+ FallbackDNS=192.168.0.1
+ Domains=guaranteedstruggle.host,just-for-me.internal
+ #DNSSEC=no
+ #DNSOverTLS=no
+ #MulticastDNS=yes
+ #LLMNR=yes
+ #Cache=yes
+ #CacheFromLocalhost=no
+ DNSStubListener=yes
+ #DNSStubListenerExtra=
+ #ReadEtcHosts=yes
+ #ResolveUnicastSingleLabel=no
+
+
+ - name: Make fix for resolv-conf rewriting
+ copy:
+ dest: "/etc/dhcp/dhclient-enter-hooks.d/nodnsupdate"
+ content: |
+ #!/bin/sh
+ make_resolv_conf(){
+ :
+ }
+ mode: +x
+
+ - name: restart service
+ service:
+ name: systemd-resolved
+ state: restarted
+ when: systemd_resolved_conf.changed
\ No newline at end of file
diff --git a/playbooks/run-puppet.yml b/playbooks/run-puppet.yml
index 8d8d720..8f6403f 100644
--- a/playbooks/run-puppet.yml
+++ b/playbooks/run-puppet.yml
@@ -1,19 +1,19 @@
----
-- name: run it
- hosts: puppets
- gather_facts: no
- become: yes
- tasks:
- - name: run 'em
- community.general.puppet:
- summarize: true
-
-- name: run it 2
- hosts: samehost-zero.guaranteedstruggle.host
- gather_facts: no
- become: yes
- tasks:
- - name: run this
- community.general.puppet:
- summarize: yes
- certname: samehost-zero.guaranteedstuggle.host
+---
+- name: run it
+ hosts: puppets
+ gather_facts: no
+ become: yes
+ tasks:
+ - name: run 'em
+ community.general.puppet:
+ summarize: true
+
+- name: run it 2
+ hosts: samehost-zero.guaranteedstruggle.host
+ gather_facts: no
+ become: yes
+ tasks:
+ - name: run this
+ community.general.puppet:
+ summarize: yes
+ certname: samehost-zero.guaranteedstuggle.host
diff --git a/playbooks/software/prometheus.yml b/playbooks/software/prometheus.yml
new file mode 100644
index 0000000..4deae54
--- /dev/null
+++ b/playbooks/software/prometheus.yml
@@ -0,0 +1,133 @@
+---
+- name: prom
+ hosts:
+ - prometheus.guaranteedstruggle.host
+ vars:
+ prom_version: '2.55.1'
+ gather_facts: yes
+ become: yes
+ tasks:
+
+ - name: Ensure group "prometheus" exists
+ ansible.builtin.group:
+ name: prometheus
+ state: present
+ - name: Add user "prometheus"
+ ansible.builtin.user:
+ name: prometheus
+ groups: prometheus
+ shell: /sbin/nologin
+ create_home: no
+ append: yes
+ comment: "prometheus nologin User"
+ state: present
+
+ - name: Creates directory
+ ansible.builtin.file:
+ path: /etc/prometheus
+ state: directory
+ group: prometheus
+ owner: prometheus
+ - name: Creates directory
+ ansible.builtin.file:
+ path: /usr/share/prometheus
+ state: directory
+ group: prometheus
+ owner: prometheus
+ - name: Creates directory
+ ansible.builtin.file:
+ path: /prometheus-data
+ state: directory
+ group: prometheus
+ owner: prometheus
+
+ - name: Unarchive a file that needs to be downloaded (added in 2.0)
+ ansible.builtin.unarchive:
+ src: https://github.com/prometheus/prometheus/releases/download/v{{prom_version}}/prometheus-{{prom_version}}.linux-amd64.tar.gz
+ dest: /usr/share/prometheus
+ creates: /usr/share/prometheus/prometheus-{{prom_version}}.linux-amd64
+ remote_src: yes
+
+
+ - name: Create a symbolic link
+ ansible.builtin.file:
+ src: /usr/share/prometheus/prometheus-{{prom_version}}.linux-amd64/prometheus
+ dest: /usr/sbin/prometheus
+ owner: prometheus
+ group: prometheus
+ state: link
+ - name: Create a symbolic link
+ ansible.builtin.file:
+ src: /usr/share/prometheus/prometheus-{{prom_version}}.linux-amd64/promtool
+ dest: /usr/sbin/promtool
+ owner: prometheus
+ group: prometheus
+ state: link
+
+ - name: Copy prometheus.yaml
+ register: prometheus_config_file
+ copy:
+ src: ../../files/prometheus/prometheus.yaml
+ dest: /etc/prometheus/prometheus.yaml
+ notify:
+ - reload prometheus
+ - name: Copy web-config
+ register: web_config_file
+ copy:
+ src: ../../files/prometheus/web-config.yaml
+ dest: /etc/prometheus/web-config.yaml
+ notify:
+ - reload prometheus
+ - name: Copy rules.yaml
+ register: rules_file
+ copy:
+ src: ../../files/prometheus/rules.yaml
+ dest: /etc/prometheus/rules.yaml
+ notify:
+ - reload prometheus
+ - name: Copy alerts.yaml
+ register: alerts_file
+ copy:
+ src: ../../files/prometheus/alerts.yaml
+ dest: /etc/prometheus/alerts.yaml
+ notify:
+ - reload prometheus
+
+
+ - name: Copy prometheus.service
+ register: prometheus_service_file
+ copy:
+ src: ../../files/prometheus/prometheus.service
+ dest: /etc/systemd/system/prometheus.service
+
+
+ - name: ensure service
+ ansible.builtin.systemd_service:
+ name: prometheus
+ state: started
+ enabled: true
+
+ # - name: reload service
+ # ansible.builtin.systemd_service:
+ # name: prometheus
+ # state: reloaded
+ # when:
+ # - rules_file.changed
+ # - alerts_file.changed
+ # - prometheus_service_file.changed
+ # - web_config_file.changed
+
+ - name: Just force systemd to reread configs
+ ansible.builtin.systemd_service:
+ daemon_reload: true
+ when: prometheus_service_file.changed
+
+
+
+ handlers:
+ - name: reload prometheus
+ ansible.builtin.systemd_service:
+ name: prometheus
+ state: reloaded
+
+#### TODO как откатывать неудачную проверку promtool'ом ?
\ No newline at end of file
diff --git a/playbooks/users.yml b/playbooks/users.yml
index 5ab9d64..1ccc3b9 100644
--- a/playbooks/users.yml
+++ b/playbooks/users.yml
@@ -1,28 +1,28 @@
----
-- name: users
- hosts: all
- become: yes
- tasks:
- - name: make-me
- ansible.builtin.user:
- name: hogweed1
- shell: /bin/bash
- create_home: yes
- # python -c 'import crypt; print crypt.crypt("This is my Password", "$1$SomeSalt$")'
- password: $6$KHOI$0Dq28VBwgtNFvfbQQ.4s6koctN6e5ZWRRBhWp0lkKKiel8y2qhc89E0CY479b4EX5.CnfDhS8rlaOATk/rXLu0
-
- - name: Set authorized key taken from file
- ansible.posix.authorized_key:
- user: hogweed1
- state: present
- key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINHTnXy693g6ivEJ6D5VrTBnjEjIe/a00cU7/9Hb79Zf hogweed1@vdk2ch.ru"
-
-
-
- - name: Make users passwordless for sudo in group wheel
- lineinfile:
- path: /etc/sudoers
- state: present
- regexp: '^%hogweed1'
- line: '%hogweed1 ALL=(ALL) NOPASSWD: ALL'
+---
+- name: users
+ hosts: all
+ become: yes
+ tasks:
+ - name: make-me
+ ansible.builtin.user:
+ name: hogweed1
+ shell: /bin/bash
+ create_home: yes
+ # python -c 'import crypt; print crypt.crypt("This is my Password", "$1$SomeSalt$")'
+ password: $6$KHOI$0Dq28VBwgtNFvfbQQ.4s6koctN6e5ZWRRBhWp0lkKKiel8y2qhc89E0CY479b4EX5.CnfDhS8rlaOATk/rXLu0
+
+ - name: Set authorized key taken from file
+ ansible.posix.authorized_key:
+ user: hogweed1
+ state: present
+ key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINHTnXy693g6ivEJ6D5VrTBnjEjIe/a00cU7/9Hb79Zf hogweed1@vdk2ch.ru"
+
+
+
+ - name: Make users passwordless for sudo in group wheel
+ lineinfile:
+ path: /etc/sudoers
+ state: present
+ regexp: '^%hogweed1'
+ line: '%hogweed1 ALL=(ALL) NOPASSWD: ALL'
validate: 'visudo -cf %s'
\ No newline at end of file
diff --git a/plugins/callback/telegram.py b/plugins/callback/telegram.py
index 75fd3e8..3e7bb08 100644
--- a/plugins/callback/telegram.py
+++ b/plugins/callback/telegram.py
@@ -1,210 +1,210 @@
-from __future__ import (absolute_import, division, print_function)
-__metaclass__ = type
-
-DOCUMENTATION = '''
- callback: telegram
- callback_type: notification
- requirements:
- - whitelist in configuration
- - telebot (pip install pyTelegramBotApi)
- - prettytable (pip install prettytable)
- - latest requests (pip install requests --upgrade)
- short_description: Sends play events to a telegram channel
- version_added: "2.1"
- description:
- - This is an ansible callback plugin that sends status updates to a telegram channel during playbook execution.
- - Before 2.4 only environment variables were available for configuring this plugin
- options:
- tg_token:
- required: True
- description: telegram bot token
- env:
- - name: TG_TOKEN
- ini:
- - section: callback_telegram
- key: tg_token
- tg_chat_id:
- required: True
- description: telegram chat id to post in.
- env:
- - name: TG_CHAT_ID
- ini:
- - section: callback_telegram
- key: tg_chat_id
- socks5_uri:
- description: socks5 proxy uri to bypass rkn's restarictions
- env:
- - name: SOCKS5_URI
- ini:
- - section: callback_telegram
- key: socks5_uri
-'''
-
-import os
-from datetime import datetime
-
-from ansible import context
-from ansible.module_utils._text import to_text
-from ansible.module_utils.urls import open_url
-from ansible.plugins.callback import CallbackBase
-
-try:
- import telebot
- from telebot import apihelper
- HAS_TELEBOT = True
-except ImportError:
- HAS_TELEBOT = False
-
-try:
- import prettytable
- HAS_PRETTYTABLE = True
-except ImportError:
- HAS_PRETTYTABLE = False
-
-class CallbackModule(CallbackBase):
- """This is an ansible callback plugin that sends status
- updates to a telegram channel during playbook execution.
- """
- CALLBACK_VERSION = 2.0
- CALLBACK_TYPE = 'notification'
- CALLBACK_NAME = 'telegram'
- CALLBACK_NEEDS_WHITELIST = True
-
- def __init__(self, display=None):
-
- super(CallbackModule, self).__init__(display=display)
-
- if not HAS_TELEBOT:
- self.disabled = True
- self._display.warning('The `telebot` python module is not '
- 'installed. Disabling the Slack callback '
- 'plugin.')
-
- if not HAS_PRETTYTABLE:
- self.disabled = True
- self._display.warning('The `prettytable` python module is not '
- 'installed. Disabling the Slack callback '
- 'plugin.')
-
- self.playbook_name = None
- self.play = None
- self.now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
- def set_options(self, task_keys=None, var_options=None, direct=None):
-
- super(CallbackModule, self).set_options(task_keys=task_keys, var_options=var_options, direct=direct)
-
- self.tg_token = self.get_option('tg_token')
- self.tg_chat_id = self.get_option('tg_chat_id')
- self.socks5_uri = self.get_option('socks5_uri')
-
- if self.tg_token is None:
- self.disabled = True
- self._display.warning('tg_token was not provided. The '
- 'tg_token can be provided using '
- 'the `TG_TOKEN` environment '
- 'variable.')
-
- if self.tg_chat_id is None:
- self.disabled = True
- self._display.warning('tg_chat_id was not provided. The '
- 'tg_chat_id can be provided using '
- 'the `TG_CHAT_ID` environment '
- 'variable.')
-
- def send_msg(self, msg):
- if self.socks5_uri is not None:
- apihelper.proxy = {'https': self.socks5_uri}
- # print(self.tg_token)
- bot = telebot.TeleBot(self.tg_token)
- # print(bot)
- # print(self.tg_chat_id)
- # print(msg)
- # bot.send_message(self.tg_chat_id, 'Hi! I\'m a Bot!')
- bot.send_message(self.tg_chat_id, msg, parse_mode='HTML')
-
- def v2_playbook_on_start(self, playbook):
-
- self.playbook_name = os.path.abspath(playbook._file_name)
-
- def v2_playbook_on_play_start(self, play):
- self.play = play
-
- title = [
- 'Ansible: STARTED ⚙️'
- ]
-
- msg_items = [' '.join(title)]
- msg_items.append('\n time: ' + '' + str(self.now) + '
')
- msg_items.append('playbook: ' + '' + self.playbook_name + '
')
- msg_items.append(' hosts:')
- for host in play.hosts:
- msg_items.append(' - ' + host + '
')
- msg_items.append(' tags:')
- for tag in play.only_tags:
- msg_items.append(' - ' + tag + '
')
- msg = '\n'.join(msg_items)
- self.send_msg(msg=msg)
-
- def v2_runner_on_failed(self, result, ignore_errors=False):
-
- msg = []
- title = [
- 'Ansible: FAILED ❌'
- ]
- msg_items = [' '.join(title)]
- msg_items.append('\n time: ' + '' + str(self.now) + '
')
- msg_items.append('playbook: ' + '' + self.playbook_name + '
')
- msg_items.append(' host: ' + '' + result._host.get_name() + '
')
- msg_items.append(' stderr: ' + '' + result._result['stderr'] + '
')
-
- msg = '\n'.join(msg_items)
-
- self.send_msg(msg=msg)
-
- def v2_playbook_on_stats(self, stats):
- """Display info about playbook statistics"""
-
- hosts = sorted(stats.processed.keys())
-
- t = prettytable.PrettyTable(['Host and state'] )
- #, 'Ok', 'Changed', 'Unreachable',
- # 'Failures', 'Rescued', 'Ignored'])
-
- failures = False
- unreachable = False
-
- for h in hosts:
- s = stats.summarize(h)
-
- if s['failures'] > 0:
- failures = True
- if s['unreachable'] > 0:
- unreachable = True
-
- print(s)
- t.add_row([h.replace('.guaranteedstruggle.host','')] )
- print([h.replace('.guaranteedstruggle.host','')])
- print(', '.join([ str(s[k]) for k in ['ok', 'changed', 'unreachable',
- 'failures', 'rescued', 'ignored']]))
- t.add_row( ["[" + ', '.join([ str(s[k]) for k in ['ok', 'changed', 'unreachable',
- 'failures', 'rescued', 'ignored']]) + "]"])
-
- msg = []
- title = 'Ansible: ENDED'
- if failures or unreachable:
- msg_items = [
- title + ' ❌'
- ]
- else:
- msg_items = [
- title + ' ✅'
- ]
- msg_items.append('\n time: ' + '' + str(self.now) + '
')
- msg_items.append('playbook: ' + '' + self.playbook_name + '
')
- msg_items.append('\n%s\n
' % t)
- msg_items.append('' + 'ok,chg,unr,fail,res,ign' + '
')
-
- msg = '\n'.join(msg_items)
-
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+DOCUMENTATION = '''
+ callback: telegram
+ callback_type: notification
+ requirements:
+ - whitelist in configuration
+ - telebot (pip install pyTelegramBotApi)
+ - prettytable (pip install prettytable)
+ - latest requests (pip install requests --upgrade)
+ short_description: Sends play events to a telegram channel
+ version_added: "2.1"
+ description:
+ - This is an ansible callback plugin that sends status updates to a telegram channel during playbook execution.
+ - Before 2.4 only environment variables were available for configuring this plugin
+ options:
+ tg_token:
+ required: True
+ description: telegram bot token
+ env:
+ - name: TG_TOKEN
+ ini:
+ - section: callback_telegram
+ key: tg_token
+ tg_chat_id:
+ required: True
+ description: telegram chat id to post in.
+ env:
+ - name: TG_CHAT_ID
+ ini:
+ - section: callback_telegram
+ key: tg_chat_id
+ socks5_uri:
+ description: socks5 proxy uri to bypass rkn's restarictions
+ env:
+ - name: SOCKS5_URI
+ ini:
+ - section: callback_telegram
+ key: socks5_uri
+'''
+
+import os
+from datetime import datetime
+
+from ansible import context
+from ansible.module_utils._text import to_text
+from ansible.module_utils.urls import open_url
+from ansible.plugins.callback import CallbackBase
+
+try:
+ import telebot
+ from telebot import apihelper
+ HAS_TELEBOT = True
+except ImportError:
+ HAS_TELEBOT = False
+
+try:
+ import prettytable
+ HAS_PRETTYTABLE = True
+except ImportError:
+ HAS_PRETTYTABLE = False
+
+class CallbackModule(CallbackBase):
+ """This is an ansible callback plugin that sends status
+ updates to a telegram channel during playbook execution.
+ """
+ CALLBACK_VERSION = 2.0
+ CALLBACK_TYPE = 'notification'
+ CALLBACK_NAME = 'telegram'
+ CALLBACK_NEEDS_WHITELIST = True
+
+ def __init__(self, display=None):
+
+ super(CallbackModule, self).__init__(display=display)
+
+ if not HAS_TELEBOT:
+ self.disabled = True
+ self._display.warning('The `telebot` python module is not '
+ 'installed. Disabling the Slack callback '
+ 'plugin.')
+
+ if not HAS_PRETTYTABLE:
+ self.disabled = True
+ self._display.warning('The `prettytable` python module is not '
+ 'installed. Disabling the Slack callback '
+ 'plugin.')
+
+ self.playbook_name = None
+ self.play = None
+ self.now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+ def set_options(self, task_keys=None, var_options=None, direct=None):
+
+ super(CallbackModule, self).set_options(task_keys=task_keys, var_options=var_options, direct=direct)
+
+ self.tg_token = self.get_option('tg_token')
+ self.tg_chat_id = self.get_option('tg_chat_id')
+ self.socks5_uri = self.get_option('socks5_uri')
+
+ if self.tg_token is None:
+ self.disabled = True
+ self._display.warning('tg_token was not provided. The '
+ 'tg_token can be provided using '
+ 'the `TG_TOKEN` environment '
+ 'variable.')
+
+ if self.tg_chat_id is None:
+ self.disabled = True
+ self._display.warning('tg_chat_id was not provided. The '
+ 'tg_chat_id can be provided using '
+ 'the `TG_CHAT_ID` environment '
+ 'variable.')
+
+ def send_msg(self, msg):
+ if self.socks5_uri is not None:
+ apihelper.proxy = {'https': self.socks5_uri}
+ # print(self.tg_token)
+ bot = telebot.TeleBot(self.tg_token)
+ # print(bot)
+ # print(self.tg_chat_id)
+ # print(msg)
+ # bot.send_message(self.tg_chat_id, 'Hi! I\'m a Bot!')
+ bot.send_message(self.tg_chat_id, msg, parse_mode='HTML')
+
+ def v2_playbook_on_start(self, playbook):
+
+ self.playbook_name = os.path.abspath(playbook._file_name)
+
+ def v2_playbook_on_play_start(self, play):
+ self.play = play
+
+ title = [
+ 'Ansible: STARTED ⚙️'
+ ]
+
+ msg_items = [' '.join(title)]
+ msg_items.append('\n time: ' + '' + str(self.now) + '
')
+ msg_items.append('playbook: ' + '' + self.playbook_name + '
')
+ msg_items.append(' hosts:')
+ for host in play.hosts:
+ msg_items.append(' - ' + host + '
')
+ msg_items.append(' tags:')
+ for tag in play.only_tags:
+ msg_items.append(' - ' + tag + '
')
+ msg = '\n'.join(msg_items)
+ self.send_msg(msg=msg)
+
+ def v2_runner_on_failed(self, result, ignore_errors=False):
+
+ msg = []
+ title = [
+ 'Ansible: FAILED ❌'
+ ]
+ msg_items = [' '.join(title)]
+ msg_items.append('\n time: ' + '' + str(self.now) + '
')
+ msg_items.append('playbook: ' + '' + self.playbook_name + '
')
+ msg_items.append(' host: ' + '' + result._host.get_name() + '
')
+ msg_items.append(' stderr: ' + '' + result._result['stderr'] + '
')
+
+ msg = '\n'.join(msg_items)
+
+ self.send_msg(msg=msg)
+
+ def v2_playbook_on_stats(self, stats):
+ """Display info about playbook statistics"""
+
+ hosts = sorted(stats.processed.keys())
+
+ t = prettytable.PrettyTable(['Host and state'] )
+ #, 'Ok', 'Changed', 'Unreachable',
+ # 'Failures', 'Rescued', 'Ignored'])
+
+ failures = False
+ unreachable = False
+
+ for h in hosts:
+ s = stats.summarize(h)
+
+ if s['failures'] > 0:
+ failures = True
+ if s['unreachable'] > 0:
+ unreachable = True
+
+ print(s)
+ t.add_row([h.replace('.guaranteedstruggle.host','')] )
+ print([h.replace('.guaranteedstruggle.host','')])
+ print(', '.join([ str(s[k]) for k in ['ok', 'changed', 'unreachable',
+ 'failures', 'rescued', 'ignored']]))
+ t.add_row( ["[" + ', '.join([ str(s[k]) for k in ['ok', 'changed', 'unreachable',
+ 'failures', 'rescued', 'ignored']]) + "]"])
+
+ msg = []
+ title = 'Ansible: ENDED'
+ if failures or unreachable:
+ msg_items = [
+ title + ' ❌'
+ ]
+ else:
+ msg_items = [
+ title + ' ✅'
+ ]
+ msg_items.append('\n time: ' + '' + str(self.now) + '
')
+ msg_items.append('playbook: ' + '' + self.playbook_name + '
')
+ msg_items.append('\n%s\n
' % t)
+ msg_items.append('' + 'ok,chg,unr,fail,res,ign' + '
')
+
+ msg = '\n'.join(msg_items)
+
self.send_msg(msg=msg)
\ No newline at end of file
diff --git a/requirements.yml b/requirements.yml
index 3f89fbc..93caaa2 100644
--- a/requirements.yml
+++ b/requirements.yml
@@ -1,9 +1,9 @@
----
-collections:
- - name: community.general
- version: 9.5.0
- - name: ansible.utils
- version: 4.1.0
-
- - name: prometheus.prometheus
- version: 0.18.0
\ No newline at end of file
+---
+collections:
+ - name: community.general
+ version: 9.5.0
+ - name: ansible.utils
+ version: 4.1.0
+
+ - name: prometheus.prometheus
+ version: 0.22.0
\ No newline at end of file