collectd configuration

This commit is contained in:
2025-01-03 09:47:10 +01:00
parent 02495dde81
commit f7941e8ef5
13 changed files with 321 additions and 1088 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
#LoadPlugin exec
#
#<Plugin "exec">
# Exec nobody "/host/usr/local/bin/btrfs-data"
#</Plugin>
LoadPlugin exec
<Plugin "exec">
Exec collectd "/host/usr/local/bin/btrfs-data"
</Plugin>

View File

@@ -0,0 +1,5 @@
LoadPlugin exec
<Plugin "exec">
Exec collectd "/host/usr/local/bin/cpufreq-data"
</Plugin>

View File

@@ -1,6 +1,5 @@
<Plugin df>
MountPoint "/media/docker"
MountPoint "/media/scratch"
FSType "ext4"
IgnoreSelected false
</Plugin>

View File

@@ -1,6 +1,5 @@
LoadPlugin exec
<Plugin "exec">
Exec nobody "/host/usr/local/bin/du-data"
Exec collectd "/host/usr/local/bin/du-data"
</Plugin>

View File

@@ -3,4 +3,3 @@ LoadPlugin exec
<Plugin "exec">
Exec nobody "/host/usr/local/bin/speedtest-data"
</Plugin>

View File

@@ -1,18 +1,18 @@
#!/usr/bin/python
#!/usr/bin/python3
#
# Imports
#
import sys
import time
import commands
import subprocess
import argparse
#
# Misc
#
#sys.tracebacklimit = 0
# sys.tracebacklimit = 0
#
@@ -28,21 +28,42 @@ size_snapshot_exclusive = 0
# Methods
#
def get_subvol_list(path):
command = "btrfs subvolume list -t %s" % (path)
status, output = commands.getstatusoutput(command)
command = "sudo btrfs subvolume list -t %s" % (path)
status, output = subprocess.getstatusoutput(command)
if status is not 0:
if status != 0:
raise Exception(command)
# Every line contains the following values: subvol_id, gen, toplevel, path
return output.splitlines()[2:]
def get_filesystem_size(path):
command = "sudo btrfs filesystem show --raw %s" % (path)
status, output = subprocess.getstatusoutput(command)
if status != 0:
# This command fails when running inside Docker container
# return maximum size of any filesystem instead
command = "sudo btrfs filesystem show --raw"
status, output = subprocess.getstatusoutput(command)
lines = output.splitlines()
lines = [x for x in lines if "devid" in x]
sizes = [int(line.split()[3]) for line in lines]
return max(sizes)
# The sizes are on the third line
line = output.splitlines()[2]
# Element 3 and 5 respectively contain total and used sizes
return int(line.split()[3])
def get_id_root(name, path):
lines = get_subvol_list(path)
# Filter lines where toplevel == 5
subvol_ids = filter(lambda x: int(x.split()[2]) == 5, lines)
subvol_ids = [x for x in lines if int(x.split()[2]) == 5]
# Try to retrieve the subvol_id for the root subvolume (if any)
if len(subvol_ids) == 1:
@@ -51,28 +72,30 @@ def get_id_root(name, path):
else:
# The path contains a btrfs filesystem with multiple subvolumes for data
try:
return int(filter(lambda x: x.split()[3] == name, subvol_ids)[0].split()[0])
return int(list(filter(lambda x: x.split()[3] == name, subvol_ids))[0].split()[0])
except IndexError:
pass
# Volume not found, root is probably the btrfs default (5)
return 5
def get_id_subvolumes(path, subvol_id):
lines = get_subvol_list(path)
lines = filter(lambda x: int(x.split()[2]) == subvol_id, lines)
return list(map(lambda x: int(x.split()[0]), lines))
lines = [x for x in lines if int(x.split()[2]) == subvol_id]
return list([int(x.split()[0]) for x in lines])
def get_disk_usage(name, path):
id_root = get_id_root(name, path)
id_subvolumes = get_id_subvolumes(path, id_root)
size_filesystem = get_filesystem_size(path)
command = "btrfs qgroup show --raw %s" % (path)
status, output = commands.getstatusoutput(command)
# Get disk usage from quota
command = "sudo btrfs qgroup show --raw %s" % (path)
status, output = subprocess.getstatusoutput(command)
if status is not 0:
if status != 0:
raise Exception(command)
lines = output.splitlines()[2:]
@@ -94,16 +117,28 @@ def get_disk_usage(name, path):
# -> data that is not (yet) incorporated in a snapshot
size_data_exclusive = 0
# Data exclusively available in snapshots
# -> data that was removed from volume
size_snapshot_exclusive = 0
for line in lines:
split = line.split()
subvol_id = int(split[0].split("/")[1])
size_total = float(split[1])
size_exclusive = float(split[2])
subvol_id = 0
size_total = 0
size_exclusive = 0
try:
subvol_id = int(split[0].split("/")[1])
size_total = float(split[1])
size_exclusive = float(split[2])
except IndexError:
# ignore "WARNING: Quota disabled"
pass
# size_exclusive is incorrect when snapshot is
# removed and qgroups are not updated yet,
# ignore the value when it seems unrealistic
if size_exclusive > size_filesystem:
size_exclusive = 0
if subvol_id == id_root:
size_data_total = size_total
@@ -112,27 +147,57 @@ def get_disk_usage(name, path):
size_snapshot_total += size_total
size_snapshot_exclusive += size_exclusive
def rescan_quota(path):
command = "btrfs quota rescan %s" % (path)
status, output = commands.getstatusoutput(command)
if status is not 0:
command = "sudo btrfs quota rescan %s" % (path)
status, output = subprocess.getstatusoutput(command)
if status != 0:
Exception(command)
def print_human_readable(name):
global size_data_total
global size_data_exclusive
global size_snapshot_exclusive
size_data_total = size_data_total / (1024*1e6)
size_data_exclusive = size_data_exclusive / (1024*1e6)
size_snapshot_exclusive = size_snapshot_exclusive / (1024*1e6)
print "%10s: %6.1f Gb, %6.1f Gb, %6.1f Gb" % (name, size_data_total, size_data_exclusive, size_snapshot_exclusive)
size_data_total = size_data_total / (1024 * 1e6)
size_data_exclusive = size_data_exclusive / (1024 * 1e6)
size_snapshot_exclusive = size_snapshot_exclusive / (1024 * 1e6)
print(
"%10s: %6.1f Gb, %6.1f Gb, %6.1f Gb"
% (name, size_data_total, size_data_exclusive, size_snapshot_exclusive)
)
def print_rrd(name):
timestamp = int(time.time())
print("PUTVAL {}/exec-btrfs_{}/gauge-data_total {}:{:.1f}".format(hostname, name, timestamp, size_data_total))
print("PUTVAL {}/exec-btrfs_{}/gauge-data_exclusive {}:{:.1f}".format(hostname, name, timestamp, size_data_exclusive))
print("PUTVAL {}/exec-btrfs_{}/gauge-snapshot_total {}:{:.1f}".format(hostname, name, timestamp, size_snapshot_total))
print("PUTVAL {}/exec-btrfs_{}/gauge-snapshot_exclusive {}:{:.1f}".format(hostname, name, timestamp, size_snapshot_exclusive))
print(
(
"PUTVAL {}/exec-btrfs_{}/gauge-data_total {}:{:.1f}".format(
hostname, name, timestamp, size_data_total
)
)
)
print(
(
"PUTVAL {}/exec-btrfs_{}/gauge-data_exclusive {}:{:.1f}".format(
hostname, name, timestamp, size_data_exclusive
)
)
)
print(
(
"PUTVAL {}/exec-btrfs_{}/gauge-snapshot_total {}:{:.1f}".format(
hostname, name, timestamp, size_snapshot_total
)
)
)
print(
(
"PUTVAL {}/exec-btrfs_{}/gauge-snapshot_exclusive {}:{:.1f}".format(
hostname, name, timestamp, size_snapshot_exclusive
)
)
)
#
@@ -143,19 +208,18 @@ interval = 10
volumes = list()
# 275 GB SSD
volumes.append(["@", "/host/root/"])
volumes.append(["@home", "/host/root/home"])
volumes.append(["opt", "/host/root/opt"])
volumes.append(["home", "/host/root/home"])
# 2x 4TB HDD
volumes.append(["data", "/host/root/media/data"])
volumes.append(["backup", "/host/root/media/backup"])
volumes.append(["seafile", "/host/root/media/seafile"])
#
# Command line arguments
#
parser = argparse.ArgumentParser(description='Get BTRFS disk usage')
parser.add_argument('-s', action='store_true', help='print in human readable format')
parser = argparse.ArgumentParser(description="Get BTRFS disk usage")
parser.add_argument("-s", action="store_true", help="print in human readable format")
args = parser.parse_args()
human_readable = args.s
@@ -163,7 +227,7 @@ human_readable = args.s
#
# Main
#
if (human_readable):
if human_readable:
for (name, path) in volumes:
get_disk_usage(name, path)
print_human_readable(name)
@@ -172,9 +236,9 @@ else:
while True:
for (name, path) in volumes:
get_disk_usage(name, path)
print_rrd(name)
sys.stdout.flush()
time.sleep(interval)
#rescan_quota(path)
# rescan_quota(path)

View File

@@ -0,0 +1,57 @@
#!/usr/bin/python3
import argparse
import time
import sys
import os
hostname = "sepia"
measurement_interval = 5
def get_cpu_frequencies():
frequencies = []
try:
cpu_dirs = [
d
for d in os.listdir("/sys/devices/system/cpu/")
if d.startswith("cpu") and d[3:].isdigit()
]
for cpu_dir in cpu_dirs:
with open(
f"/sys/devices/system/cpu/{cpu_dir}/cpufreq/scaling_cur_freq", "r"
) as f:
frequency = int(f.read().strip()) / 1000 # Convert Hz to MHz
frequencies.append((int(cpu_dir[3:]), frequency))
except Exception as e:
print("Error:", e)
return frequencies
def main():
parser = argparse.ArgumentParser(description="Query CPU frequencies.")
parser.add_argument(
"-s",
"--human-readable",
action="store_true",
help="Print frequencies in human-readable format",
)
args = parser.parse_args()
if args.human_readable:
frequencies = get_cpu_frequencies()
for cpu, frequency in frequencies:
print(f"CPU{cpu} Frequency: {frequency:.2f} MHz")
else:
while True:
frequencies = get_cpu_frequencies()
timestamp = int(time.time())
for cpu, frequency in frequencies:
print(
f"PUTVAL {hostname}/cpu-frequency/gauge-cpu{cpu} {timestamp}:{frequency:.0f}"
)
sys.stdout.flush()
time.sleep(measurement_interval)
if __name__ == "__main__":
main()

View File

@@ -1,25 +1,77 @@
#!/bin/bash
COLLECTION=sepia
INTERVAL=90
#!/usr/bin/python3
DIRS=$(cat <<LIST
/host/root/media/data/Inverter
/host/root/media/data/Monique
/host/root/media/data/Music
/host/root/media/data/Peter
/host/root/media/data/Photographs
/host/root/media/data/Raw
/host/root/media/data/Sanne
/host/root/media/data/Wii
LIST
)
#
# Imports
#
import sys
import time
import subprocess
import argparse
while :; do
SECONDS=0
for DIR in $DIRS; do
SIZE=$(du -cs $DIR | tail -1 | awk '{print $1}')
NAME=$(echo $DIR | sed 's/.//' | tr / - )
echo "PUTVAL $COLLECTION/exec-du-$NAME/gauge-size interval=$INTERVAL N:$SIZE"
done
sleep $((INTERVAL-$SECONDS))
done
#
# Methods
#
def get_disk_usage(path, human_readable):
"""disk usage in human readable format (e.g. '2,1GB')"""
arguments = "-sh" if human_readable else "-s"
command = "du %s %s" % (arguments, path)
status, output = subprocess.getstatusoutput(command)
if status != 0:
raise Exception(command)
disk_usage = output.split()[0]
if not human_readable:
# du reports in units of 1024 bytes, convert to plain number of bytes
disk_usage = int(disk_usage) * 1024
return disk_usage
#
# Directories to scan
#
hostname = "sepia"
interval = 10
directories = list()
directories.append(["inverter", "/host/root/media/data/Inverter"])
directories.append(["monique", "/host/root/media/data/Monique"])
directories.append(["music", "/host/root/media/data/Music"])
directories.append(["peter", "/host/root/media/data/Peter"])
directories.append(["photographs", "/host/root/media/data/Photographs"])
directories.append(["sanne", "/host/root/media/data/Sanne"])
directories.append(["wii", "/host/root/media/data/Wii"])
#
# Command line arguments
#
parser = argparse.ArgumentParser(description="Get DU disk usage")
parser.add_argument("-s", action="store_true", help="print in human readable format")
args = parser.parse_args()
human_readable = args.s
#
# Main
#
if human_readable:
for (name, path) in directories:
disk_usage = get_disk_usage(path, human_readable)
print(("%s: %s" % (name, disk_usage)))
else:
# RRD mode
while True:
for (name, path) in directories:
disk_usage = get_disk_usage(path, human_readable)
timestamp = int(time.time())
size = float(disk_usage)
print(
(
"PUTVAL {}/exec-du_{}/gauge-size {}:{:.1f}".format(
hostname, name, timestamp, size
)
)
)
sys.stdout.flush()
time.sleep(interval)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/python3
#
# Imports
@@ -6,61 +6,58 @@
import sys
import time
import argparse
import pylikwid
import pmt
#
# Configuration
#
hostname = "sepia"
cpuid = 0
pinfo = pylikwid.getpowerinfo()
domainid = pinfo.get("domains").get("PKG").get("ID")
measurement_duration = 5
measurement_interval = 15
dinfo = pinfo.get("domains")
domain_names = dinfo.keys()
domain_ids = [domain['ID'] for domain in dinfo.values()]
pm = pmt.create("rapl")
#
# Command line arguments
#
parser = argparse.ArgumentParser(description='Get CPU power consumption')
parser.add_argument('-s', action='store_true', help='print in human readable format')
parser = argparse.ArgumentParser(description="Get CPU power consumption")
parser.add_argument("-s", action="store_true", help="print in human readable format")
args = parser.parse_args()
human_readable = args.s
#
# Methods
#
def get_power():
start = list()
end = list()
power = list()
for domain_id in domain_ids:
e_start = pylikwid.startpower(cpuid, domain_id)
start.append(e_start)
time.sleep(measurement_duration)
for domain_id in domain_ids:
e_stop = pylikwid.stoppower(cpuid, domain_id)
end.append(e_stop)
for events in zip(start, end, domain_ids):
joules = pylikwid.getpower(events[0], events[1], events[2])
power.append(joules / measurement_duration)
return dict(zip(domain_names, power))
#
# Methods
#
def get_power():
time.sleep(measurement_duration)
measurements = dict()
state = pm.read()
for i in range(state.nr_measurements()):
name = state.name(i)
watts = state.watts(i)
measurements[name] = watts
return measurements
def print_rrd(measurements):
timestamp = int(time.time())
for measurement in measurements.items():
for measurement in list(measurements.items()):
name = measurement[0].lower()
power = measurement[1]
print("PUTVAL {}/exec-power/gauge-{} {}:{:.1f}".format(hostname, name, timestamp, power))
print(
(
"PUTVAL {}/exec-power/gauge-{} {}:{:.1f}".format(
hostname, name, timestamp, power
)
)
)
#
# Main
#
if (human_readable):
print get_power()
if human_readable:
print(get_power())
else:
while True:
power = get_power()

View File

@@ -1,12 +1,15 @@
#!/bin/bash
SPEEDTEST=/sbin/speedtest-cli
SPEEDTEST=/usr/bin/speedtest-cli
COLLECTION=sepia
INTERVAL=900
while :; do
SECONDS=0
RESULT=($($SPEEDTEST | grep Mbit | cut -d' ' -f 2))
echo "PUTVAL $COLLECTION/exec-speedtest/gauge-download interval=$INTERVAL N:${RESULT[0]}"
echo "PUTVAL $COLLECTION/exec-speedtest/gauge-upload interval=$INTERVAL N:${RESULT[1]}"
TIMESTAMP=$(date +%s)
#echo "PUTVAL $COLLECTION/exec-speedtest/gauge-download interval=$INTERVAL N:${RESULT[0]}"
#echo "PUTVAL $COLLECTION/exec-speedtest/gauge-upload interval=$INTERVAL N:${RESULT[1]}"
echo "PUTVAL $COLLECTION/exec-speedtest/gauge-download ${TIMESTAMP}:${RESULT[0]}"
echo "PUTVAL $COLLECTION/exec-speedtest/gauge-upload ${TIMESTAMP}:${RESULT[1]}"
sleep $((INTERVAL-$SECONDS))
done

View File

@@ -1,15 +1,19 @@
services:
collectd:
container_name: collectd
image: collectd:latest
privileged: true
restart: unless-stopped
volumes:
- /opt/collectd/etc:/etc/collectd
- /opt/collectd/var:/var/lib/collectd
- /opt/collectd/usr:/host/usr
- /root/scripts/speedtest-cli:/sbin/speedtest-cli
- /:/host/root
- /media:/host/media
- /var/lib/docker:/media/docker
- /dev/mapper:/dev/mapper
collectd:
build:
context: /opt/collectd/docker
dockerfile: Dockerfile
container_name: collectd
image: collectd:bookworm
privileged: true
restart: unless-stopped
volumes:
- /opt/collectd/etc:/etc/collectd
- /opt/collectd/var:/var/lib/collectd
- /opt/collectd/usr:/host/usr
- /:/host/root
- /media:/host/media
- /media/jupiter/borg:/host/media/borg
- /media/jupiter/rsnapshot:/host/media/rsnapshot
- /var/lib/docker:/media/docker
- /dev/mapper:/dev/mapper

View File

@@ -12,7 +12,6 @@ include:
# Storage
- docker-compose.seafile.yaml
- docker-compose.collectd.yaml
# Sensors