|
| 1 | +-- |
| 2 | +-- SPDX-FileCopyrightText: (c) 2025 Enderson Maia <[email protected]> |
| 3 | +-- SPDX-License-Identifier: MIT OR GPL-2.0-only |
| 4 | +-- |
| 5 | + |
| 6 | +local lunatik = require("lunatik") |
| 7 | +local thread = require("thread") |
| 8 | +local socket = require("socket") |
| 9 | +local unix = require("socket.unix") |
| 10 | +local linux = require("linux") |
| 11 | +local cpu = require("cpu") |
| 12 | + |
| 13 | +local shouldstop = thread.shouldstop |
| 14 | +local sock = socket.sock |
| 15 | + |
| 16 | +local server = unix.bind("/tmp/cpuexporter.sock", "STREAM") |
| 17 | +server:listen() |
| 18 | + |
| 19 | +-- Scale integer to decimal with high precision (16 decimal places) |
| 20 | +-- Converts a ratio (value/total) to percentage format with high precision |
| 21 | +local function to_percent_float(value, total) |
| 22 | + if total == 0 then |
| 23 | + return "0.0" |
| 24 | + end |
| 25 | + |
| 26 | + -- Calculate percentage: (value / total) * 100 |
| 27 | + -- Scale by 10^16 for 16 decimal places precision |
| 28 | + local scaled1 = (value * 1000000000) // total -- First scale by 10^9 |
| 29 | + local int_part = scaled1 // 10000000 -- Get integer part (divide by 10^7) |
| 30 | + local frac_part = scaled1 % 10000000 -- Get first 7 decimal digits |
| 31 | + local scaled2 = (frac_part * 1000000000) // total -- Scale remaining fraction by 10^9 |
| 32 | + |
| 33 | + return string.format("%d.%07d%09d", int_part, frac_part, scaled2) |
| 34 | +end |
| 35 | + |
| 36 | +-- Helper function to sum all values in a table |
| 37 | +local function _sum_table(tbl) |
| 38 | + local sum = 0 |
| 39 | + for _, value in pairs(tbl) do |
| 40 | + sum = sum + value |
| 41 | + end |
| 42 | + return sum |
| 43 | +end |
| 44 | + |
| 45 | +-- returns cpu_usage based on two samples within a 100ms interval |
| 46 | +local function cpu_usage() |
| 47 | + local usage = {} |
| 48 | + |
| 49 | +--TODO: add cpu-total with accumulated values for all cpus |
| 50 | + cpu.foreach_online(function(cpu_id) |
| 51 | +--FIXME: not sure if always calling cpu.stats twice is good |
| 52 | +-- or calling twice for the first time and keep state |
| 53 | +-- for the last call and make delta based on that |
| 54 | + local t1 = cpu.stats(cpu_id) |
| 55 | + local t1_total = _sum_table(t1) |
| 56 | + linux.schedule(100) |
| 57 | + local t2 = cpu.stats(cpu_id) |
| 58 | + local t2_total = _sum_table(t2) |
| 59 | + local total_delta = t2_total - t1_total |
| 60 | + |
| 61 | + usage[cpu_id] = {} |
| 62 | + for key, t2_value in pairs(t2) do |
| 63 | + local delta = t2_value - (t1[key] or 0) |
| 64 | + if key == "user" then |
| 65 | + delta = delta - (t2["guest"] - (t1["guest"] or 0)) |
| 66 | + elseif key == "nice" then |
| 67 | + delta = delta - (t2["guest_nice"] - (t1["guest_nice"] or 0)) |
| 68 | + end |
| 69 | + usage[cpu_id][key] = to_percent_float(delta, total_delta) |
| 70 | + end |
| 71 | + end) |
| 72 | + |
| 73 | + return usage |
| 74 | +end |
| 75 | + |
| 76 | +local function cpu_metrics() |
| 77 | + local metrics = "" |
| 78 | + local ts_ms = linux.time() // 1000 -- Convert to milliseconds (FIXME: note sure if this conversion is necessary) |
| 79 | + local usage_data = cpu_usage() -- Call once and store the result |
| 80 | + |
| 81 | + -- Collect all unique metric names |
| 82 | + local cpu_metric_names = {} |
| 83 | + for key, _ in pairs(usage_data[1]) do |
| 84 | + cpu_metric_names[key] = true |
| 85 | + end |
| 86 | + |
| 87 | + -- Output grouped by metric name |
| 88 | + for metric, _ in pairs(cpu_metric_names) do |
| 89 | + metrics = metrics .. string.format('# TYPE cpu_usage_%s gauge\n', metric) |
| 90 | + for cpu_id, cpu_metrics in pairs(usage_data) do |
| 91 | + local value = cpu_metrics[metric] or "0" |
| 92 | + metrics = metrics .. string.format('cpu_usage_%s{cpu="cpu%d"} %s %d\n', |
| 93 | + metric, cpu_id, value, ts_ms) |
| 94 | + end |
| 95 | + end |
| 96 | + |
| 97 | + return metrics |
| 98 | +end |
| 99 | + |
| 100 | +local function handle_client(session) |
| 101 | + -- Read the request |
| 102 | + local request, err = session:receive(1024) |
| 103 | + if not request then |
| 104 | + error(err) |
| 105 | + end |
| 106 | + |
| 107 | + -- Check if this is an HTTP request |
| 108 | + local method, path, http_version = string.match(request, "^(%w+)%s+([^%s]+)%s+(HTTP/%d%.%d)") |
| 109 | + |
| 110 | + if http_version then |
| 111 | + -- This is an HTTP request, validate it |
| 112 | + if method ~= "GET" then |
| 113 | + session:send("HTTP/1.1 405 Method Not Allowed\r\n\r\n") |
| 114 | + error("Method not allowed: " .. tostring(method)) |
| 115 | + end |
| 116 | + |
| 117 | + if path ~= "/metrics" then |
| 118 | + session:send("HTTP/1.1 404 Not Found\r\n\r\n") |
| 119 | + error("Path not found: " .. tostring(path)) |
| 120 | + end |
| 121 | + |
| 122 | + -- Send HTTP response headers |
| 123 | + session:send("HTTP/1.1 200 OK\r\n") |
| 124 | + session:send("Content-Type: text/plain; version=0.0.4\r\n") |
| 125 | + session:send("\r\n") |
| 126 | + end |
| 127 | + |
| 128 | + -- Send metrics (works for both HTTP and plain connections like socat) |
| 129 | + session:send(cpu_metrics()) |
| 130 | +end |
| 131 | + |
| 132 | +local function daemon() |
| 133 | + print("cpud [daemon]: started") |
| 134 | + while (not shouldstop()) do |
| 135 | + local ok, session = pcall(server.accept, server, sock.NONBLOCK) |
| 136 | + if ok then |
| 137 | + local ok, err = pcall(handle_client, session) |
| 138 | + if not ok then |
| 139 | + print("cpud [daemon]: error handling client: " .. tostring(err)) |
| 140 | + end |
| 141 | + session:close() |
| 142 | + elseif session == "EAGAIN" then |
| 143 | + linux.schedule(100) |
| 144 | + end |
| 145 | + end |
| 146 | + print("cpud [daemon]: stopped") |
| 147 | +end |
| 148 | + |
| 149 | +return daemon |
| 150 | + |
0 commit comments