collectd configuration

This commit is contained in:
2025-01-03 09:47:10 +01:00
parent 02495dde81
commit f7941e8ef5
13 changed files with 321 additions and 1088 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
#LoadPlugin exec LoadPlugin exec
#
#<Plugin "exec"> <Plugin "exec">
# Exec nobody "/host/usr/local/bin/btrfs-data" Exec collectd "/host/usr/local/bin/btrfs-data"
#</Plugin> </Plugin>

View File

@@ -0,0 +1,5 @@
LoadPlugin exec
<Plugin "exec">
Exec collectd "/host/usr/local/bin/cpufreq-data"
</Plugin>

View File

@@ -1,6 +1,5 @@
<Plugin df> <Plugin df>
MountPoint "/media/docker" MountPoint "/media/docker"
MountPoint "/media/scratch"
FSType "ext4" FSType "ext4"
IgnoreSelected false IgnoreSelected false
</Plugin> </Plugin>

View File

@@ -1,6 +1,5 @@
LoadPlugin exec LoadPlugin exec
<Plugin "exec"> <Plugin "exec">
Exec nobody "/host/usr/local/bin/du-data" Exec collectd "/host/usr/local/bin/du-data"
</Plugin> </Plugin>

View File

@@ -3,4 +3,3 @@ LoadPlugin exec
<Plugin "exec"> <Plugin "exec">
Exec nobody "/host/usr/local/bin/speedtest-data" Exec nobody "/host/usr/local/bin/speedtest-data"
</Plugin> </Plugin>

View File

@@ -1,18 +1,18 @@
#!/usr/bin/python #!/usr/bin/python3
# #
# Imports # Imports
# #
import sys import sys
import time import time
import commands import subprocess
import argparse import argparse
# #
# Misc # Misc
# #
#sys.tracebacklimit = 0 # sys.tracebacklimit = 0
# #
@@ -28,21 +28,42 @@ size_snapshot_exclusive = 0
# Methods # Methods
# #
def get_subvol_list(path): def get_subvol_list(path):
command = "btrfs subvolume list -t %s" % (path) command = "sudo btrfs subvolume list -t %s" % (path)
status, output = commands.getstatusoutput(command) status, output = subprocess.getstatusoutput(command)
if status is not 0: if status != 0:
raise Exception(command) raise Exception(command)
# Every line contains the following values: subvol_id, gen, toplevel, path # Every line contains the following values: subvol_id, gen, toplevel, path
return output.splitlines()[2:] return output.splitlines()[2:]
def get_filesystem_size(path):
command = "sudo btrfs filesystem show --raw %s" % (path)
status, output = subprocess.getstatusoutput(command)
if status != 0:
# This command fails when running inside Docker container
# return maximum size of any filesystem instead
command = "sudo btrfs filesystem show --raw"
status, output = subprocess.getstatusoutput(command)
lines = output.splitlines()
lines = [x for x in lines if "devid" in x]
sizes = [int(line.split()[3]) for line in lines]
return max(sizes)
# The sizes are on the third line
line = output.splitlines()[2]
# Element 3 and 5 respectively contain total and used sizes
return int(line.split()[3])
def get_id_root(name, path): def get_id_root(name, path):
lines = get_subvol_list(path) lines = get_subvol_list(path)
# Filter lines where toplevel == 5 # Filter lines where toplevel == 5
subvol_ids = filter(lambda x: int(x.split()[2]) == 5, lines) subvol_ids = [x for x in lines if int(x.split()[2]) == 5]
# Try to retrieve the subvol_id for the root subvolume (if any) # Try to retrieve the subvol_id for the root subvolume (if any)
if len(subvol_ids) == 1: if len(subvol_ids) == 1:
@@ -51,28 +72,30 @@ def get_id_root(name, path):
else: else:
# The path contains a btrfs filesystem with multiple subvolumes for data # The path contains a btrfs filesystem with multiple subvolumes for data
try: try:
return int(filter(lambda x: x.split()[3] == name, subvol_ids)[0].split()[0]) return int(list(filter(lambda x: x.split()[3] == name, subvol_ids))[0].split()[0])
except IndexError: except IndexError:
pass pass
# Volume not found, root is probably the btrfs default (5) # Volume not found, root is probably the btrfs default (5)
return 5 return 5
def get_id_subvolumes(path, subvol_id): def get_id_subvolumes(path, subvol_id):
lines = get_subvol_list(path) lines = get_subvol_list(path)
lines = filter(lambda x: int(x.split()[2]) == subvol_id, lines) lines = [x for x in lines if int(x.split()[2]) == subvol_id]
return list(map(lambda x: int(x.split()[0]), lines)) return list([int(x.split()[0]) for x in lines])
def get_disk_usage(name, path): def get_disk_usage(name, path):
id_root = get_id_root(name, path) id_root = get_id_root(name, path)
id_subvolumes = get_id_subvolumes(path, id_root) id_subvolumes = get_id_subvolumes(path, id_root)
size_filesystem = get_filesystem_size(path)
command = "btrfs qgroup show --raw %s" % (path) # Get disk usage from quota
status, output = commands.getstatusoutput(command) command = "sudo btrfs qgroup show --raw %s" % (path)
status, output = subprocess.getstatusoutput(command)
if status is not 0: if status != 0:
raise Exception(command) raise Exception(command)
lines = output.splitlines()[2:] lines = output.splitlines()[2:]
@@ -94,16 +117,28 @@ def get_disk_usage(name, path):
# -> data that is not (yet) incorporated in a snapshot # -> data that is not (yet) incorporated in a snapshot
size_data_exclusive = 0 size_data_exclusive = 0
# Data exclusively available in snapshots # Data exclusively available in snapshots
# -> data that was removed from volume # -> data that was removed from volume
size_snapshot_exclusive = 0 size_snapshot_exclusive = 0
for line in lines: for line in lines:
split = line.split() split = line.split()
subvol_id = int(split[0].split("/")[1]) subvol_id = 0
size_total = float(split[1]) size_total = 0
size_exclusive = float(split[2]) size_exclusive = 0
try:
subvol_id = int(split[0].split("/")[1])
size_total = float(split[1])
size_exclusive = float(split[2])
except IndexError:
# ignore "WARNING: Quota disabled"
pass
# size_exclusive is incorrect when snapshot is
# removed and qgroups are not updated yet,
# ignore the value when it seems unrealistic
if size_exclusive > size_filesystem:
size_exclusive = 0
if subvol_id == id_root: if subvol_id == id_root:
size_data_total = size_total size_data_total = size_total
@@ -112,27 +147,57 @@ def get_disk_usage(name, path):
size_snapshot_total += size_total size_snapshot_total += size_total
size_snapshot_exclusive += size_exclusive size_snapshot_exclusive += size_exclusive
def rescan_quota(path): def rescan_quota(path):
command = "btrfs quota rescan %s" % (path) command = "sudo btrfs quota rescan %s" % (path)
status, output = commands.getstatusoutput(command) status, output = subprocess.getstatusoutput(command)
if status is not 0: if status != 0:
Exception(command) Exception(command)
def print_human_readable(name): def print_human_readable(name):
global size_data_total global size_data_total
global size_data_exclusive global size_data_exclusive
global size_snapshot_exclusive global size_snapshot_exclusive
size_data_total = size_data_total / (1024*1e6) size_data_total = size_data_total / (1024 * 1e6)
size_data_exclusive = size_data_exclusive / (1024*1e6) size_data_exclusive = size_data_exclusive / (1024 * 1e6)
size_snapshot_exclusive = size_snapshot_exclusive / (1024*1e6) size_snapshot_exclusive = size_snapshot_exclusive / (1024 * 1e6)
print "%10s: %6.1f Gb, %6.1f Gb, %6.1f Gb" % (name, size_data_total, size_data_exclusive, size_snapshot_exclusive) print(
"%10s: %6.1f Gb, %6.1f Gb, %6.1f Gb"
% (name, size_data_total, size_data_exclusive, size_snapshot_exclusive)
)
def print_rrd(name): def print_rrd(name):
timestamp = int(time.time()) timestamp = int(time.time())
print("PUTVAL {}/exec-btrfs_{}/gauge-data_total {}:{:.1f}".format(hostname, name, timestamp, size_data_total)) print(
print("PUTVAL {}/exec-btrfs_{}/gauge-data_exclusive {}:{:.1f}".format(hostname, name, timestamp, size_data_exclusive)) (
print("PUTVAL {}/exec-btrfs_{}/gauge-snapshot_total {}:{:.1f}".format(hostname, name, timestamp, size_snapshot_total)) "PUTVAL {}/exec-btrfs_{}/gauge-data_total {}:{:.1f}".format(
print("PUTVAL {}/exec-btrfs_{}/gauge-snapshot_exclusive {}:{:.1f}".format(hostname, name, timestamp, size_snapshot_exclusive)) hostname, name, timestamp, size_data_total
)
)
)
print(
(
"PUTVAL {}/exec-btrfs_{}/gauge-data_exclusive {}:{:.1f}".format(
hostname, name, timestamp, size_data_exclusive
)
)
)
print(
(
"PUTVAL {}/exec-btrfs_{}/gauge-snapshot_total {}:{:.1f}".format(
hostname, name, timestamp, size_snapshot_total
)
)
)
print(
(
"PUTVAL {}/exec-btrfs_{}/gauge-snapshot_exclusive {}:{:.1f}".format(
hostname, name, timestamp, size_snapshot_exclusive
)
)
)
# #
@@ -143,19 +208,18 @@ interval = 10
volumes = list() volumes = list()
# 275 GB SSD # 275 GB SSD
volumes.append(["@", "/host/root/"]) volumes.append(["home", "/host/root/home"])
volumes.append(["@home", "/host/root/home"])
volumes.append(["opt", "/host/root/opt"])
# 2x 4TB HDD # 2x 4TB HDD
volumes.append(["data", "/host/root/media/data"]) volumes.append(["data", "/host/root/media/data"])
volumes.append(["backup", "/host/root/media/backup"])
volumes.append(["seafile", "/host/root/media/seafile"])
# #
# Command line arguments # Command line arguments
# #
parser = argparse.ArgumentParser(description='Get BTRFS disk usage') parser = argparse.ArgumentParser(description="Get BTRFS disk usage")
parser.add_argument('-s', action='store_true', help='print in human readable format') parser.add_argument("-s", action="store_true", help="print in human readable format")
args = parser.parse_args() args = parser.parse_args()
human_readable = args.s human_readable = args.s
@@ -163,7 +227,7 @@ human_readable = args.s
# #
# Main # Main
# #
if (human_readable): if human_readable:
for (name, path) in volumes: for (name, path) in volumes:
get_disk_usage(name, path) get_disk_usage(name, path)
print_human_readable(name) print_human_readable(name)
@@ -172,9 +236,9 @@ else:
while True: while True:
for (name, path) in volumes: for (name, path) in volumes:
get_disk_usage(name, path) get_disk_usage(name, path)
print_rrd(name) print_rrd(name)
sys.stdout.flush() sys.stdout.flush()
time.sleep(interval) time.sleep(interval)
#rescan_quota(path) # rescan_quota(path)

View File

@@ -0,0 +1,57 @@
#!/usr/bin/python3
import argparse
import time
import sys
import os
hostname = "sepia"
measurement_interval = 5
def get_cpu_frequencies():
frequencies = []
try:
cpu_dirs = [
d
for d in os.listdir("/sys/devices/system/cpu/")
if d.startswith("cpu") and d[3:].isdigit()
]
for cpu_dir in cpu_dirs:
with open(
f"/sys/devices/system/cpu/{cpu_dir}/cpufreq/scaling_cur_freq", "r"
) as f:
frequency = int(f.read().strip()) / 1000 # Convert Hz to MHz
frequencies.append((int(cpu_dir[3:]), frequency))
except Exception as e:
print("Error:", e)
return frequencies
def main():
parser = argparse.ArgumentParser(description="Query CPU frequencies.")
parser.add_argument(
"-s",
"--human-readable",
action="store_true",
help="Print frequencies in human-readable format",
)
args = parser.parse_args()
if args.human_readable:
frequencies = get_cpu_frequencies()
for cpu, frequency in frequencies:
print(f"CPU{cpu} Frequency: {frequency:.2f} MHz")
else:
while True:
frequencies = get_cpu_frequencies()
timestamp = int(time.time())
for cpu, frequency in frequencies:
print(
f"PUTVAL {hostname}/cpu-frequency/gauge-cpu{cpu} {timestamp}:{frequency:.0f}"
)
sys.stdout.flush()
time.sleep(measurement_interval)
if __name__ == "__main__":
main()

View File

@@ -1,25 +1,77 @@
#!/bin/bash #!/usr/bin/python3
COLLECTION=sepia
INTERVAL=90
DIRS=$(cat <<LIST #
/host/root/media/data/Inverter # Imports
/host/root/media/data/Monique #
/host/root/media/data/Music import sys
/host/root/media/data/Peter import time
/host/root/media/data/Photographs import subprocess
/host/root/media/data/Raw import argparse
/host/root/media/data/Sanne
/host/root/media/data/Wii
LIST
)
while :; do
SECONDS=0 #
for DIR in $DIRS; do # Methods
SIZE=$(du -cs $DIR | tail -1 | awk '{print $1}') #
NAME=$(echo $DIR | sed 's/.//' | tr / - ) def get_disk_usage(path, human_readable):
echo "PUTVAL $COLLECTION/exec-du-$NAME/gauge-size interval=$INTERVAL N:$SIZE" """disk usage in human readable format (e.g. '2,1GB')"""
done arguments = "-sh" if human_readable else "-s"
sleep $((INTERVAL-$SECONDS)) command = "du %s %s" % (arguments, path)
done status, output = subprocess.getstatusoutput(command)
if status != 0:
raise Exception(command)
disk_usage = output.split()[0]
if not human_readable:
# du reports in units of 1024 bytes, convert to plain number of bytes
disk_usage = int(disk_usage) * 1024
return disk_usage
#
# Directories to scan
#
hostname = "sepia"
interval = 10
directories = list()
directories.append(["inverter", "/host/root/media/data/Inverter"])
directories.append(["monique", "/host/root/media/data/Monique"])
directories.append(["music", "/host/root/media/data/Music"])
directories.append(["peter", "/host/root/media/data/Peter"])
directories.append(["photographs", "/host/root/media/data/Photographs"])
directories.append(["sanne", "/host/root/media/data/Sanne"])
directories.append(["wii", "/host/root/media/data/Wii"])
#
# Command line arguments
#
parser = argparse.ArgumentParser(description="Get DU disk usage")
parser.add_argument("-s", action="store_true", help="print in human readable format")
args = parser.parse_args()
human_readable = args.s
#
# Main
#
if human_readable:
for (name, path) in directories:
disk_usage = get_disk_usage(path, human_readable)
print(("%s: %s" % (name, disk_usage)))
else:
# RRD mode
while True:
for (name, path) in directories:
disk_usage = get_disk_usage(path, human_readable)
timestamp = int(time.time())
size = float(disk_usage)
print(
(
"PUTVAL {}/exec-du_{}/gauge-size {}:{:.1f}".format(
hostname, name, timestamp, size
)
)
)
sys.stdout.flush()
time.sleep(interval)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/python3
# #
# Imports # Imports
@@ -6,61 +6,58 @@
import sys import sys
import time import time
import argparse import argparse
import pylikwid import pmt
# #
# Configuration # Configuration
# #
hostname = "sepia" hostname = "sepia"
cpuid = 0
pinfo = pylikwid.getpowerinfo()
domainid = pinfo.get("domains").get("PKG").get("ID")
measurement_duration = 5 measurement_duration = 5
measurement_interval = 15 measurement_interval = 15
dinfo = pinfo.get("domains") pm = pmt.create("rapl")
domain_names = dinfo.keys()
domain_ids = [domain['ID'] for domain in dinfo.values()]
# #
# Command line arguments # Command line arguments
# #
parser = argparse.ArgumentParser(description='Get CPU power consumption') parser = argparse.ArgumentParser(description="Get CPU power consumption")
parser.add_argument('-s', action='store_true', help='print in human readable format') parser.add_argument("-s", action="store_true", help="print in human readable format")
args = parser.parse_args() args = parser.parse_args()
human_readable = args.s human_readable = args.s
#
# Methods
#
def get_power():
start = list()
end = list()
power = list()
for domain_id in domain_ids:
e_start = pylikwid.startpower(cpuid, domain_id)
start.append(e_start)
time.sleep(measurement_duration)
for domain_id in domain_ids:
e_stop = pylikwid.stoppower(cpuid, domain_id)
end.append(e_stop)
for events in zip(start, end, domain_ids):
joules = pylikwid.getpower(events[0], events[1], events[2])
power.append(joules / measurement_duration)
return dict(zip(domain_names, power)) #
# Methods
#
def get_power():
time.sleep(measurement_duration)
measurements = dict()
state = pm.read()
for i in range(state.nr_measurements()):
name = state.name(i)
watts = state.watts(i)
measurements[name] = watts
return measurements
def print_rrd(measurements): def print_rrd(measurements):
timestamp = int(time.time()) timestamp = int(time.time())
for measurement in measurements.items(): for measurement in list(measurements.items()):
name = measurement[0].lower() name = measurement[0].lower()
power = measurement[1] power = measurement[1]
print("PUTVAL {}/exec-power/gauge-{} {}:{:.1f}".format(hostname, name, timestamp, power)) print(
(
"PUTVAL {}/exec-power/gauge-{} {}:{:.1f}".format(
hostname, name, timestamp, power
)
)
)
# #
# Main # Main
# #
if (human_readable): if human_readable:
print get_power() print(get_power())
else: else:
while True: while True:
power = get_power() power = get_power()

View File

@@ -1,12 +1,15 @@
#!/bin/bash #!/bin/bash
SPEEDTEST=/sbin/speedtest-cli SPEEDTEST=/usr/bin/speedtest-cli
COLLECTION=sepia COLLECTION=sepia
INTERVAL=900 INTERVAL=900
while :; do while :; do
SECONDS=0 SECONDS=0
RESULT=($($SPEEDTEST | grep Mbit | cut -d' ' -f 2)) RESULT=($($SPEEDTEST | grep Mbit | cut -d' ' -f 2))
echo "PUTVAL $COLLECTION/exec-speedtest/gauge-download interval=$INTERVAL N:${RESULT[0]}" TIMESTAMP=$(date +%s)
echo "PUTVAL $COLLECTION/exec-speedtest/gauge-upload interval=$INTERVAL N:${RESULT[1]}" #echo "PUTVAL $COLLECTION/exec-speedtest/gauge-download interval=$INTERVAL N:${RESULT[0]}"
#echo "PUTVAL $COLLECTION/exec-speedtest/gauge-upload interval=$INTERVAL N:${RESULT[1]}"
echo "PUTVAL $COLLECTION/exec-speedtest/gauge-download ${TIMESTAMP}:${RESULT[0]}"
echo "PUTVAL $COLLECTION/exec-speedtest/gauge-upload ${TIMESTAMP}:${RESULT[1]}"
sleep $((INTERVAL-$SECONDS)) sleep $((INTERVAL-$SECONDS))
done done

View File

@@ -1,15 +1,19 @@
services: services:
collectd: collectd:
container_name: collectd build:
image: collectd:latest context: /opt/collectd/docker
privileged: true dockerfile: Dockerfile
restart: unless-stopped container_name: collectd
volumes: image: collectd:bookworm
- /opt/collectd/etc:/etc/collectd privileged: true
- /opt/collectd/var:/var/lib/collectd restart: unless-stopped
- /opt/collectd/usr:/host/usr volumes:
- /root/scripts/speedtest-cli:/sbin/speedtest-cli - /opt/collectd/etc:/etc/collectd
- /:/host/root - /opt/collectd/var:/var/lib/collectd
- /media:/host/media - /opt/collectd/usr:/host/usr
- /var/lib/docker:/media/docker - /:/host/root
- /dev/mapper:/dev/mapper - /media:/host/media
- /media/jupiter/borg:/host/media/borg
- /media/jupiter/rsnapshot:/host/media/rsnapshot
- /var/lib/docker:/media/docker
- /dev/mapper:/dev/mapper

View File

@@ -12,7 +12,6 @@ include:
# Storage # Storage
- docker-compose.seafile.yaml - docker-compose.seafile.yaml
- docker-compose.collectd.yaml - docker-compose.collectd.yaml
# Sensors # Sensors