Files
dchain/node/metrics.go
vsecoder 7e7393e4f8 chore: initial commit for v0.0.1
DChain single-node blockchain + React Native messenger client.

Core:
- PBFT consensus with multi-sig validator admission + equivocation slashing
- BadgerDB + schema migration scaffold (CurrentSchemaVersion=0)
- libp2p gossipsub (tx/v1, blocks/v1, relay/v1, version/v1)
- Native Go contracts (username_registry) alongside WASM (wazero)
- WebSocket gateway with topic-based fanout + Ed25519-nonce auth
- Relay mailbox with NaCl envelope encryption (X25519 + Ed25519)
- Prometheus /metrics, per-IP rate limit, body-size cap

Deployment:
- Single-node compose (deploy/single/) with Caddy TLS + optional Prometheus
- 3-node dev compose (docker-compose.yml) with mocked internet topology
- 3-validator prod compose (deploy/prod/) for federation
- Auto-update from Gitea via /api/update-check + systemd timer
- Build-time version injection (ldflags → node --version)
- UI / Swagger toggle flags (DCHAIN_DISABLE_UI, DCHAIN_DISABLE_SWAGGER)

Client (client-app/):
- Expo / React Native / NativeWind
- E2E NaCl encryption, typing indicator, contact requests
- Auto-discovery of canonical contracts, chain_id aware, WS reconnect on node switch

Documentation:
- README.md, CHANGELOG.md, CONTEXT.md
- deploy/single/README.md with 6 operator scenarios
- deploy/UPDATE_STRATEGY.md with 4-layer forward-compat design
- docs/contracts/*.md per contract
2026-04-17 14:16:44 +03:00

233 lines
8.0 KiB
Go

// Package node — minimal Prometheus-format metrics.
//
// We expose counters, gauges, and histograms in the Prometheus text
// exposition format on GET /metrics. A full-blown prometheus/client_golang
// dependency would pull 10+ transitive modules; for our needs (a handful of
// metrics + stable output format) a ~200 LOC in-tree implementation is
// enough, with zero extra build surface.
//
// Concurrency: all metric types are safe for concurrent Inc/Add/Observe.
// Registration happens at init time and is not reentrant.
package node
import (
"fmt"
"net/http"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
)
// ─── registry ────────────────────────────────────────────────────────────────
// metric is the internal interface every exposed metric implements.
type metric interface {
// write emits the metric's lines to w in Prometheus text format.
write(w *strings.Builder)
}
type metricRegistry struct {
mu sync.RWMutex
entries []metric
}
var defaultRegistry = &metricRegistry{}
func (r *metricRegistry) register(m metric) {
r.mu.Lock()
r.entries = append(r.entries, m)
r.mu.Unlock()
}
// metricsHandler writes Prometheus exposition format for all registered metrics.
func metricsHandler(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
var sb strings.Builder
defaultRegistry.mu.RLock()
for _, m := range defaultRegistry.entries {
m.write(&sb)
}
defaultRegistry.mu.RUnlock()
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
_, _ = w.Write([]byte(sb.String()))
}
// ─── counter ─────────────────────────────────────────────────────────────────
// MetricCounter is a monotonically increasing value. Typical use: number of
// blocks committed, number of rejected txs.
type MetricCounter struct {
name, help string
v atomic.Uint64
}
// NewCounter registers and returns a new counter.
func NewCounter(name, help string) *MetricCounter {
c := &MetricCounter{name: name, help: help}
defaultRegistry.register(c)
return c
}
// Inc adds 1 to the counter.
func (c *MetricCounter) Inc() { c.v.Add(1) }
// Add adds n to the counter (must be ≥ 0 — counters are monotonic).
func (c *MetricCounter) Add(n uint64) { c.v.Add(n) }
func (c *MetricCounter) write(sb *strings.Builder) {
fmt.Fprintf(sb, "# HELP %s %s\n# TYPE %s counter\n%s %d\n",
c.name, c.help, c.name, c.name, c.v.Load())
}
// ─── gauge ───────────────────────────────────────────────────────────────────
// MetricGauge is a value that can go up or down. Typical use: current
// mempool size, active websocket connections, tip height.
type MetricGauge struct {
name, help string
v atomic.Int64
fn func() int64 // optional live provider; if set, v is unused
}
// NewGauge registers a gauge backed by an atomic Int64.
func NewGauge(name, help string) *MetricGauge {
g := &MetricGauge{name: name, help: help}
defaultRegistry.register(g)
return g
}
// NewGaugeFunc registers a gauge whose value is fetched on scrape. Useful
// when the source of truth is some other subsystem (chain.TipIndex, peer
// count, etc.) and we don't want a separate mirror variable.
func NewGaugeFunc(name, help string, fn func() int64) *MetricGauge {
g := &MetricGauge{name: name, help: help, fn: fn}
defaultRegistry.register(g)
return g
}
// Set overrides the stored value. Only meaningful for non-fn gauges.
func (g *MetricGauge) Set(v int64) { g.v.Store(v) }
// Inc / Dec convenience for bookkeeping gauges.
func (g *MetricGauge) Inc() { g.v.Add(1) }
func (g *MetricGauge) Dec() { g.v.Add(-1) }
func (g *MetricGauge) write(sb *strings.Builder) {
v := g.v.Load()
if g.fn != nil {
v = g.fn()
}
fmt.Fprintf(sb, "# HELP %s %s\n# TYPE %s gauge\n%s %d\n",
g.name, g.help, g.name, g.name, v)
}
// ─── histogram ───────────────────────────────────────────────────────────────
// MetricHistogram is a fixed-bucket latency histogram. Typical use: time to
// commit a block, time to apply a tx. We use user-supplied buckets (upper
// bounds in seconds) and track sum + count alongside for Prometheus-standard
// output.
type MetricHistogram struct {
name, help string
buckets []float64
counts []atomic.Uint64
inf atomic.Uint64
sum atomic.Uint64 // sum in microseconds to avoid floats
count atomic.Uint64
}
// NewHistogram registers a histogram with explicit bucket upper bounds (s).
// Buckets must be strictly increasing. The implicit +Inf bucket is added
// automatically per Prometheus spec.
func NewHistogram(name, help string, buckets []float64) *MetricHistogram {
sorted := make([]float64, len(buckets))
copy(sorted, buckets)
sort.Float64s(sorted)
h := &MetricHistogram{
name: name, help: help,
buckets: sorted,
counts: make([]atomic.Uint64, len(sorted)),
}
defaultRegistry.register(h)
return h
}
// Observe records a single sample (duration in seconds).
func (h *MetricHistogram) Observe(seconds float64) {
// Record in every bucket whose upper bound ≥ sample.
for i, b := range h.buckets {
if seconds <= b {
h.counts[i].Add(1)
}
}
h.inf.Add(1)
h.sum.Add(uint64(seconds * 1_000_000)) // µs resolution
h.count.Add(1)
}
func (h *MetricHistogram) write(sb *strings.Builder) {
fmt.Fprintf(sb, "# HELP %s %s\n# TYPE %s histogram\n", h.name, h.help, h.name)
for i, b := range h.buckets {
fmt.Fprintf(sb, `%s_bucket{le="%s"} %d`+"\n",
h.name, strconv.FormatFloat(b, 'g', -1, 64), h.counts[i].Load())
}
fmt.Fprintf(sb, `%s_bucket{le="+Inf"} %d`+"\n", h.name, h.inf.Load())
fmt.Fprintf(sb, "%s_sum %f\n", h.name, float64(h.sum.Load())/1_000_000)
fmt.Fprintf(sb, "%s_count %d\n", h.name, h.count.Load())
}
// ─── registered metrics (called from main.go) ────────────────────────────────
//
// Keeping these as package-level vars lets callers just do
// `MetricBlocksTotal.Inc()` instead of threading a registry through every
// component. Names follow Prometheus naming conventions:
// <namespace>_<subsystem>_<metric>_<unit>
var (
MetricBlocksTotal = NewCounter(
"dchain_blocks_total",
"Total number of blocks committed by this node",
)
MetricTxsTotal = NewCounter(
"dchain_txs_total",
"Total number of transactions included in committed blocks",
)
MetricTxSubmitAccepted = NewCounter(
"dchain_tx_submit_accepted_total",
"Transactions accepted into the mempool via /api/tx or WS submit_tx",
)
MetricTxSubmitRejected = NewCounter(
"dchain_tx_submit_rejected_total",
"Transactions rejected at API validation (bad sig, timestamp, etc.)",
)
MetricWSConnections = NewGauge(
"dchain_ws_connections",
"Currently open websocket connections on this node",
)
MetricPeers = NewGauge(
"dchain_peer_count",
"Currently connected libp2p peers",
)
// Block-commit latency — how long AddBlock takes end-to-end. Catches
// slow contract calls before they reach the freeze threshold.
MetricBlockCommitSeconds = NewHistogram(
"dchain_block_commit_seconds",
"Wall-clock seconds spent inside chain.AddBlock",
[]float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2, 5, 10, 30},
)
// Worst validator liveness in the current set — how many seqNums have
// passed without a commit vote from the most-delinquent validator.
// Alert on this staying > 20 for more than a few minutes; that
// validator is either down, partitioned, or wedged.
MetricMaxMissedBlocks = NewGauge(
"dchain_max_missed_blocks",
"Highest missed-block count among current validators (0 if all healthy)",
)
)