diff --git a/collectd/etc/collectd.conf b/collectd/etc/collectd.conf new file mode 100644 index 0000000..f3ae664 --- /dev/null +++ b/collectd/etc/collectd.conf @@ -0,0 +1,132 @@ +# Config file for collectd(1). +# +# Some plugins need additional configuration and are disabled by default. +# Please read collectd.conf(5) for details. +# +# You should also read /usr/share/doc/collectd-core/README.Debian.plugins +# before enabling any more plugins. + +############################################################################## +# Global # +#----------------------------------------------------------------------------# +# Global settings for the daemon. # +############################################################################## + +Hostname "server" +FQDNLookup true +#BaseDir "/var/lib/collectd" +#PluginDir "/usr/lib/collectd" +#TypesDB "/usr/share/collectd/types.db" "/etc/collectd/my_types.db" +#----------------------------------------------------------------------------# +# When enabled, plugins are loaded automatically with the default options # +# when an appropriate block is encountered. # +# Disabled by default. # +#----------------------------------------------------------------------------# +#AutoLoadPlugin false + +#----------------------------------------------------------------------------# +# Interval at which to query values. This may be overwritten on a per-plugin # +# base by using the 'Interval' option of the LoadPlugin block: # +# # +# Interval 60 # +# # +#----------------------------------------------------------------------------# +Interval 10 + +#Timeout 2 +#ReadThreads 5 +#WriteThreads 5 + +# Limit the size of the write queue. Default is no limit. Setting up a limit +# is recommended for servers handling a high volume of traffic. +#WriteQueueLimitHigh 1000000 +#WriteQueueLimitLow 800000 + +############################################################################## +# Logging # +#----------------------------------------------------------------------------# +# Plugins which provide logging functions should be loaded first, so log # +# messages generated when loading or configuring other plugins can be # +# accessed. # +############################################################################## + +LoadPlugin logfile + + + LogLevel "info" + File "/var/lib/collectd/log/collectd.log" + Timestamp true + PrintSeverity true + + +############################################################################## +# LoadPlugin section # +#----------------------------------------------------------------------------# +# Specify what features to activate. # +############################################################################## + +LoadPlugin cpu +LoadPlugin cpufreq +LoadPlugin df +LoadPlugin disk +LoadPlugin entropy +#LoadPlugin interface +#LoadPlugin irq +LoadPlugin load +LoadPlugin md +LoadPlugin memory +LoadPlugin network +LoadPlugin processes +LoadPlugin rrdtool +LoadPlugin sensors +LoadPlugin swap +LoadPlugin uptime +LoadPlugin users +LoadPlugin smart + +############################################################################## +# Plugin configuration # +#----------------------------------------------------------------------------# +# In this section configuration stubs for each plugin are provided. A desc- # +# ription of those options is available in the collectd.conf(5) manual page. # +############################################################################## + + + DataDir "/var/lib/collectd/rrd" +# CacheTimeout 120 +# CacheFlush 900 +# WritesPerSecond 30 +# CreateFilesAsync false +# RandomTimeout 0 +# +# The following settings are rather advanced +# and should usually not be touched: +# StepSize 10 +# HeartBeat 20 +# RRARows 1200 +# RRATimespan 158112000 +# XFF 0.1 + + + + Disk "sda" + Disk "sdb" + Disk "nvme0" + IgnoreSelected false + + +# +# Device "/dev/md0" +# IgnoreSelected false +# + + + Disk "sda" + Disk "sdb" + Disk "nvme0" + IgnoreSelected false + + + + Filter "*.conf" + diff --git a/collectd/etc/collectd.conf.d/btrfs-data.conf b/collectd/etc/collectd.conf.d/btrfs-data.conf new file mode 100644 index 0000000..3e8fe80 --- /dev/null +++ b/collectd/etc/collectd.conf.d/btrfs-data.conf @@ -0,0 +1,5 @@ +LoadPlugin exec + + + Exec collectd "/host/usr/local/bin/btrfs-data" + diff --git a/collectd/etc/collectd.conf.d/cpufreq-data.conf b/collectd/etc/collectd.conf.d/cpufreq-data.conf new file mode 100644 index 0000000..f3b1e15 --- /dev/null +++ b/collectd/etc/collectd.conf.d/cpufreq-data.conf @@ -0,0 +1,5 @@ +LoadPlugin exec + + + Exec collectd "/host/usr/local/bin/cpufreq-data" + diff --git a/collectd/etc/collectd.conf.d/df.conf b/collectd/etc/collectd.conf.d/df.conf new file mode 100644 index 0000000..594f9ee --- /dev/null +++ b/collectd/etc/collectd.conf.d/df.conf @@ -0,0 +1,5 @@ + + MountPoint "/media/docker" + FSType "ext4" + IgnoreSelected false + diff --git a/collectd/etc/collectd.conf.d/du-data.conf b/collectd/etc/collectd.conf.d/du-data.conf new file mode 100644 index 0000000..3382f7b --- /dev/null +++ b/collectd/etc/collectd.conf.d/du-data.conf @@ -0,0 +1,5 @@ +LoadPlugin exec + + + Exec collectd "/host/usr/local/bin/du-data" + diff --git a/collectd/etc/collectd.conf.d/power-data.conf b/collectd/etc/collectd.conf.d/power-data.conf new file mode 100644 index 0000000..08f5c9e --- /dev/null +++ b/collectd/etc/collectd.conf.d/power-data.conf @@ -0,0 +1,5 @@ +LoadPlugin exec + + + Exec collectd "/host/usr/local/bin/power-data" + diff --git a/collectd/etc/collectd.conf.d/speedtest-data.conf b/collectd/etc/collectd.conf.d/speedtest-data.conf new file mode 100644 index 0000000..1902744 --- /dev/null +++ b/collectd/etc/collectd.conf.d/speedtest-data.conf @@ -0,0 +1,5 @@ +LoadPlugin exec + + + Exec nobody "/host/usr/local/bin/speedtest-data" + diff --git a/collectd/usr/local/bin/btrfs-data b/collectd/usr/local/bin/btrfs-data new file mode 100755 index 0000000..758f733 --- /dev/null +++ b/collectd/usr/local/bin/btrfs-data @@ -0,0 +1,245 @@ +#!/usr/bin/python3 + +# +# Imports +# +import sys +import time +import subprocess +import argparse + + +# +# Misc +# +# sys.tracebacklimit = 0 + + +# +# Global variables +# +size_data_total = 0 +size_data_exclusive = 0 +size_snapshot_total = 0 +size_snapshot_exclusive = 0 + + +# +# Methods +# +def get_subvol_list(path): + command = "sudo btrfs subvolume list -t %s" % (path) + status, output = subprocess.getstatusoutput(command) + + if status != 0: + raise Exception(command) + + # Every line contains the following values: subvol_id, gen, toplevel, path + return output.splitlines()[2:] + + +def get_filesystem_size(path): + command = "sudo btrfs filesystem show --raw %s" % (path) + status, output = subprocess.getstatusoutput(command) + + if status != 0: + # This command fails when running inside Docker container + # return maximum size of any filesystem instead + command = "sudo btrfs filesystem show --raw" + status, output = subprocess.getstatusoutput(command) + lines = output.splitlines() + lines = [x for x in lines if "devid" in x] + sizes = [int(line.split()[3]) for line in lines] + return max(sizes) + + # The sizes are on the third line + line = output.splitlines()[2] + + # Element 3 and 5 respectively contain total and used sizes + return int(line.split()[3]) + + +def get_id_root(name, path): + lines = get_subvol_list(path) + + # Filter lines where toplevel == 5 + subvol_ids = [x for x in lines if int(x.split()[2]) == 5] + + # Try to retrieve the subvol_id for the root subvolume (if any) + if len(subvol_ids) == 1: + # The path contains a btrfs filesystem without subvolume for data + return int(subvol_ids[0].split()[0]) + else: + # The path contains a btrfs filesystem with multiple subvolumes for data + try: + return int(list(filter(lambda x: x.split()[3] == name, subvol_ids))[0].split()[0]) + except IndexError: + pass + + # Volume not found, root is probably the btrfs default (5) + return 5 + + +def get_id_subvolumes(path, subvol_id): + lines = get_subvol_list(path) + lines = [x for x in lines if int(x.split()[2]) == subvol_id] + return list([int(x.split()[0]) for x in lines]) + + +def get_disk_usage(name, path): + id_root = get_id_root(name, path) + id_subvolumes = get_id_subvolumes(path, id_root) + size_filesystem = get_filesystem_size(path) + + # Get disk usage from quota + command = "sudo btrfs qgroup show --raw %s" % (path) + status, output = subprocess.getstatusoutput(command) + + if status != 0: + raise Exception(command) + + lines = output.splitlines()[2:] + + # Global variables + global size_data_total + global size_data_exclusive + global size_snapshot_total + global size_snapshot_exclusive + + # Total data volume in subvolume + size_data_total = 0 + + # Total data volume in snapshots + # -> this variable is useless + size_snapshot_total = 0 + + # Data exclusively in subvolume + # -> data that is not (yet) incorporated in a snapshot + size_data_exclusive = 0 + + # Data exclusively available in snapshots + # -> data that was removed from volume + size_snapshot_exclusive = 0 + + for line in lines: + split = line.split() + subvol_id = 0 + size_total = 0 + size_exclusive = 0 + try: + subvol_id = int(split[0].split("/")[1]) + size_total = float(split[1]) + size_exclusive = float(split[2]) + except IndexError: + # ignore "WARNING: Quota disabled" + pass + + # size_exclusive is incorrect when snapshot is + # removed and qgroups are not updated yet, + # ignore the value when it seems unrealistic + if size_exclusive > size_filesystem: + size_exclusive = 0 + + if subvol_id == id_root: + size_data_total = size_total + size_data_exclusive = size_exclusive + elif subvol_id in id_subvolumes: + size_snapshot_total += size_total + size_snapshot_exclusive += size_exclusive + + +def rescan_quota(path): + command = "sudo btrfs quota rescan %s" % (path) + status, output = subprocess.getstatusoutput(command) + if status != 0: + Exception(command) + + +def print_human_readable(name): + global size_data_total + global size_data_exclusive + global size_snapshot_exclusive + size_data_total = size_data_total / (1024 * 1e6) + size_data_exclusive = size_data_exclusive / (1024 * 1e6) + size_snapshot_exclusive = size_snapshot_exclusive / (1024 * 1e6) + print( + "%10s: %6.1f Gb, %6.1f Gb, %6.1f Gb" + % (name, size_data_total, size_data_exclusive, size_snapshot_exclusive) + ) + + +def print_rrd(name): + timestamp = int(time.time()) + print( + ( + "PUTVAL {}/exec-btrfs_{}/gauge-data_total {}:{:.1f}".format( + hostname, name, timestamp, size_data_total + ) + ) + ) + print( + ( + "PUTVAL {}/exec-btrfs_{}/gauge-data_exclusive {}:{:.1f}".format( + hostname, name, timestamp, size_data_exclusive + ) + ) + ) + print( + ( + "PUTVAL {}/exec-btrfs_{}/gauge-snapshot_total {}:{:.1f}".format( + hostname, name, timestamp, size_snapshot_total + ) + ) + ) + print( + ( + "PUTVAL {}/exec-btrfs_{}/gauge-snapshot_exclusive {}:{:.1f}".format( + hostname, name, timestamp, size_snapshot_exclusive + ) + ) + ) + + +# +# Volumes to scan +# +hostname = "server" +interval = 10 +volumes = list() + +volumes.append(["helios", "/host/media/helios"]) +volumes.append(["borg", "/host/media//borg"]) +volumes.append(["rsnapshot", "/host/media/rsnapshot"]) +volumes.append(["mercury", "/host/media/mercury"]) +volumes.append(["neptune", "/host/media/neptune"]) +volumes.append(["nubes", "/host/media/nubes"]) +volumes.append(["scratch", "/host/media/scratch"]) + + +# +# Command line arguments +# +parser = argparse.ArgumentParser(description="Get BTRFS disk usage") +parser.add_argument("-s", action="store_true", help="print in human readable format") +args = parser.parse_args() +human_readable = args.s + + +# +# Main +# +if human_readable: + for (name, path) in volumes: + get_disk_usage(name, path) + print_human_readable(name) +else: + # RRD mode + while True: + for (name, path) in volumes: + get_disk_usage(name, path) + + print_rrd(name) + + sys.stdout.flush() + time.sleep(interval) + # rescan_quota(path) diff --git a/collectd/usr/local/bin/cpufreq-data b/collectd/usr/local/bin/cpufreq-data new file mode 100755 index 0000000..412add0 --- /dev/null +++ b/collectd/usr/local/bin/cpufreq-data @@ -0,0 +1,57 @@ +#!/usr/bin/python3 +import argparse +import time +import sys +import os + +hostname = "server" +measurement_interval = 5 + + +def get_cpu_frequencies(): + frequencies = [] + try: + cpu_dirs = [ + d + for d in os.listdir("/sys/devices/system/cpu/") + if d.startswith("cpu") and d[3:].isdigit() + ] + for cpu_dir in cpu_dirs: + with open( + f"/sys/devices/system/cpu/{cpu_dir}/cpufreq/scaling_cur_freq", "r" + ) as f: + frequency = int(f.read().strip()) / 1000 # Convert Hz to MHz + frequencies.append((int(cpu_dir[3:]), frequency)) + except Exception as e: + print("Error:", e) + return frequencies + + +def main(): + parser = argparse.ArgumentParser(description="Query CPU frequencies.") + parser.add_argument( + "-s", + "--human-readable", + action="store_true", + help="Print frequencies in human-readable format", + ) + args = parser.parse_args() + + if args.human_readable: + frequencies = get_cpu_frequencies() + for cpu, frequency in frequencies: + print(f"CPU{cpu} Frequency: {frequency:.2f} MHz") + else: + while True: + frequencies = get_cpu_frequencies() + timestamp = int(time.time()) + for cpu, frequency in frequencies: + print( + f"PUTVAL {hostname}/cpu-frequency/gauge-cpu{cpu} {timestamp}:{frequency:.0f}" + ) + sys.stdout.flush() + time.sleep(measurement_interval) + + +if __name__ == "__main__": + main() diff --git a/collectd/usr/local/bin/du-data b/collectd/usr/local/bin/du-data new file mode 100755 index 0000000..e5a820e --- /dev/null +++ b/collectd/usr/local/bin/du-data @@ -0,0 +1,82 @@ +#!/usr/bin/python3 + +# +# Imports +# +import sys +import time +import subprocess +import argparse + + +# +# Methods +# +def get_disk_usage(path, human_readable): + """disk usage in human readable format (e.g. '2,1GB')""" + arguments = "-sh" if human_readable else "-s" + command = "du %s %s" % (arguments, path) + status, output = subprocess.getstatusoutput(command) + + if status != 0: + raise Exception(command) + + disk_usage = output.split()[0] + if not human_readable: + # du reports in units of 1024 bytes, convert to plain number of bytes + disk_usage = int(disk_usage) * 1024 + return disk_usage + + +# +# Directories to scan +# +hostname = "server" +interval = 10 +directories = list() + +directories.append(["bram", "/host/media/helios/Bram"]) +directories.append(["rik", "/host/media/helios/Rik"]) +directories.append(["books", "/host/media/neptune/Books"]) +directories.append(["games", "/host/media/mercury/Games"]) +directories.append(["misc", "/host/media/neptune/Miscellaneous"]) +directories.append(["shows", "/host/media/neptune/Video/Shows"]) +directories.append(["movies", "/host/media/neptune/Video/Movies"]) +directories.append(["music", "/host/media/neptune/Music"]) +directories.append(["photographs", "/host/media/helios/Photographs"]) +directories.append(["pictures", "/host/media/helios/Pictures"]) +directories.append(["software", "/host/media/mercury/Software"]) + + +# +# Command line arguments +# +parser = argparse.ArgumentParser(description="Get BTRFS disk usage") +parser.add_argument("-s", action="store_true", help="print in human readable format") +args = parser.parse_args() +human_readable = args.s + + +# +# Main +# +if human_readable: + for (name, path) in directories: + disk_usage = get_disk_usage(path, human_readable) + print(("%s: %s" % (name, disk_usage))) +else: + # RRD mode + while True: + for (name, path) in directories: + disk_usage = get_disk_usage(path, human_readable) + timestamp = int(time.time()) + size = float(disk_usage) + print( + ( + "PUTVAL {}/exec-du_{}/gauge-size {}:{:.1f}".format( + hostname, name, timestamp, size + ) + ) + ) + sys.stdout.flush() + time.sleep(interval) diff --git a/collectd/usr/local/bin/power-data b/collectd/usr/local/bin/power-data new file mode 100755 index 0000000..763829d --- /dev/null +++ b/collectd/usr/local/bin/power-data @@ -0,0 +1,66 @@ +#!/usr/bin/python3 + +# +# Imports +# +import sys +import time +import argparse +import pmt + +# +# Configuration +# +hostname = "server" +measurement_duration = 5 +measurement_interval = 15 +pm = pmt.create("rapl") + +# +# Command line arguments +# +parser = argparse.ArgumentParser(description="Get CPU power consumption") +parser.add_argument("-s", action="store_true", help="print in human readable format") +args = parser.parse_args() +human_readable = args.s + + +# +# Methods +# +def get_power(): + time.sleep(measurement_duration) + measurements = dict() + state = pm.read() + for i in range(state.nr_measurements()): + name = state.name(i) + watts = state.watts(i) + measurements[name] = watts + return measurements + + +def print_rrd(measurements): + timestamp = int(time.time()) + for measurement in list(measurements.items()): + name = measurement[0].lower() + power = measurement[1] + print( + ( + "PUTVAL {}/exec-power/gauge-{} {}:{:.1f}".format( + hostname, name, timestamp, power + ) + ) + ) + + +# +# Main +# +if human_readable: + print(get_power()) +else: + while True: + power = get_power() + print_rrd(power) + sys.stdout.flush() + time.sleep(measurement_interval) diff --git a/collectd/usr/local/bin/power-data-likwid b/collectd/usr/local/bin/power-data-likwid new file mode 100755 index 0000000..d438f2e --- /dev/null +++ b/collectd/usr/local/bin/power-data-likwid @@ -0,0 +1,77 @@ +#!/usr/bin/python3 + +# +# Imports +# +import sys +import time +import argparse +import pylikwid + +# +# Configuration +# +hostname = "server" +cpuid = 0 +pinfo = pylikwid.getpowerinfo() +domainid = pinfo.get("domains").get("PKG").get("ID") +measurement_duration = 5 +measurement_interval = 15 +dinfo = pinfo.get("domains") +domain_names = list(dinfo.keys()) +domain_ids = [domain["ID"] for domain in list(dinfo.values())] + +# +# Command line arguments +# +parser = argparse.ArgumentParser(description="Get CPU power consumption") +parser.add_argument("-s", action="store_true", help="print in human readable format") +args = parser.parse_args() +human_readable = args.s + +# +# Methods +# +def get_power(): + start = list() + end = list() + power = list() + for domain_id in domain_ids: + e_start = pylikwid.startpower(cpuid, domain_id) + start.append(e_start) + time.sleep(measurement_duration) + for domain_id in domain_ids: + e_stop = pylikwid.stoppower(cpuid, domain_id) + end.append(e_stop) + for events in zip(start, end, domain_ids): + joules = pylikwid.getpower(events[0], events[1], events[2]) + power.append(joules / measurement_duration) + + return dict(list(zip(domain_names, power))) + + +def print_rrd(measurements): + timestamp = int(time.time()) + for measurement in list(measurements.items()): + name = measurement[0].lower() + power = measurement[1] + print( + ( + "PUTVAL {}/exec-power/gauge-{} {}:{:.1f}".format( + hostname, name, timestamp, power + ) + ) + ) + + +# +# Main +# +if human_readable: + print(get_power()) +else: + while True: + power = get_power() + print_rrd(power) + sys.stdout.flush() + time.sleep(measurement_interval) diff --git a/collectd/usr/local/bin/speedtest-data b/collectd/usr/local/bin/speedtest-data new file mode 100755 index 0000000..22d3cb8 --- /dev/null +++ b/collectd/usr/local/bin/speedtest-data @@ -0,0 +1,15 @@ +#!/bin/bash +SPEEDTEST=/usr/bin/speedtest-cli +COLLECTION=server +INTERVAL=900 + +while :; do + SECONDS=0 + RESULT=($($SPEEDTEST | grep Mbit | cut -d' ' -f 2)) + TIMESTAMP=$(date +%s) + #echo "PUTVAL $COLLECTION/exec-speedtest/gauge-download interval=$INTERVAL N:${RESULT[0]}" + #echo "PUTVAL $COLLECTION/exec-speedtest/gauge-upload interval=$INTERVAL N:${RESULT[1]}" + echo "PUTVAL $COLLECTION/exec-speedtest/gauge-download ${TIMESTAMP}:${RESULT[0]}" + echo "PUTVAL $COLLECTION/exec-speedtest/gauge-upload ${TIMESTAMP}:${RESULT[1]}" + sleep $((INTERVAL-$SECONDS)) +done