chore: initial commit for v0.0.1
DChain single-node blockchain + React Native messenger client. Core: - PBFT consensus with multi-sig validator admission + equivocation slashing - BadgerDB + schema migration scaffold (CurrentSchemaVersion=0) - libp2p gossipsub (tx/v1, blocks/v1, relay/v1, version/v1) - Native Go contracts (username_registry) alongside WASM (wazero) - WebSocket gateway with topic-based fanout + Ed25519-nonce auth - Relay mailbox with NaCl envelope encryption (X25519 + Ed25519) - Prometheus /metrics, per-IP rate limit, body-size cap Deployment: - Single-node compose (deploy/single/) with Caddy TLS + optional Prometheus - 3-node dev compose (docker-compose.yml) with mocked internet topology - 3-validator prod compose (deploy/prod/) for federation - Auto-update from Gitea via /api/update-check + systemd timer - Build-time version injection (ldflags → node --version) - UI / Swagger toggle flags (DCHAIN_DISABLE_UI, DCHAIN_DISABLE_SWAGGER) Client (client-app/): - Expo / React Native / NativeWind - E2E NaCl encryption, typing indicator, contact requests - Auto-discovery of canonical contracts, chain_id aware, WS reconnect on node switch Documentation: - README.md, CHANGELOG.md, CONTEXT.md - deploy/single/README.md with 6 operator scenarios - deploy/UPDATE_STRATEGY.md with 4-layer forward-compat design - docs/contracts/*.md per contract
This commit is contained in:
232
node/metrics.go
Normal file
232
node/metrics.go
Normal file
@@ -0,0 +1,232 @@
|
||||
// Package node — minimal Prometheus-format metrics.
|
||||
//
|
||||
// We expose counters, gauges, and histograms in the Prometheus text
|
||||
// exposition format on GET /metrics. A full-blown prometheus/client_golang
|
||||
// dependency would pull 10+ transitive modules; for our needs (a handful of
|
||||
// metrics + stable output format) a ~200 LOC in-tree implementation is
|
||||
// enough, with zero extra build surface.
|
||||
//
|
||||
// Concurrency: all metric types are safe for concurrent Inc/Add/Observe.
|
||||
// Registration happens at init time and is not reentrant.
|
||||
package node
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// ─── registry ────────────────────────────────────────────────────────────────
|
||||
|
||||
// metric is the internal interface every exposed metric implements.
|
||||
type metric interface {
|
||||
// write emits the metric's lines to w in Prometheus text format.
|
||||
write(w *strings.Builder)
|
||||
}
|
||||
|
||||
type metricRegistry struct {
|
||||
mu sync.RWMutex
|
||||
entries []metric
|
||||
}
|
||||
|
||||
var defaultRegistry = &metricRegistry{}
|
||||
|
||||
func (r *metricRegistry) register(m metric) {
|
||||
r.mu.Lock()
|
||||
r.entries = append(r.entries, m)
|
||||
r.mu.Unlock()
|
||||
}
|
||||
|
||||
// metricsHandler writes Prometheus exposition format for all registered metrics.
|
||||
func metricsHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
var sb strings.Builder
|
||||
defaultRegistry.mu.RLock()
|
||||
for _, m := range defaultRegistry.entries {
|
||||
m.write(&sb)
|
||||
}
|
||||
defaultRegistry.mu.RUnlock()
|
||||
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||
_, _ = w.Write([]byte(sb.String()))
|
||||
}
|
||||
|
||||
// ─── counter ─────────────────────────────────────────────────────────────────
|
||||
|
||||
// MetricCounter is a monotonically increasing value. Typical use: number of
|
||||
// blocks committed, number of rejected txs.
|
||||
type MetricCounter struct {
|
||||
name, help string
|
||||
v atomic.Uint64
|
||||
}
|
||||
|
||||
// NewCounter registers and returns a new counter.
|
||||
func NewCounter(name, help string) *MetricCounter {
|
||||
c := &MetricCounter{name: name, help: help}
|
||||
defaultRegistry.register(c)
|
||||
return c
|
||||
}
|
||||
|
||||
// Inc adds 1 to the counter.
|
||||
func (c *MetricCounter) Inc() { c.v.Add(1) }
|
||||
|
||||
// Add adds n to the counter (must be ≥ 0 — counters are monotonic).
|
||||
func (c *MetricCounter) Add(n uint64) { c.v.Add(n) }
|
||||
|
||||
func (c *MetricCounter) write(sb *strings.Builder) {
|
||||
fmt.Fprintf(sb, "# HELP %s %s\n# TYPE %s counter\n%s %d\n",
|
||||
c.name, c.help, c.name, c.name, c.v.Load())
|
||||
}
|
||||
|
||||
// ─── gauge ───────────────────────────────────────────────────────────────────
|
||||
|
||||
// MetricGauge is a value that can go up or down. Typical use: current
|
||||
// mempool size, active websocket connections, tip height.
|
||||
type MetricGauge struct {
|
||||
name, help string
|
||||
v atomic.Int64
|
||||
fn func() int64 // optional live provider; if set, v is unused
|
||||
}
|
||||
|
||||
// NewGauge registers a gauge backed by an atomic Int64.
|
||||
func NewGauge(name, help string) *MetricGauge {
|
||||
g := &MetricGauge{name: name, help: help}
|
||||
defaultRegistry.register(g)
|
||||
return g
|
||||
}
|
||||
|
||||
// NewGaugeFunc registers a gauge whose value is fetched on scrape. Useful
|
||||
// when the source of truth is some other subsystem (chain.TipIndex, peer
|
||||
// count, etc.) and we don't want a separate mirror variable.
|
||||
func NewGaugeFunc(name, help string, fn func() int64) *MetricGauge {
|
||||
g := &MetricGauge{name: name, help: help, fn: fn}
|
||||
defaultRegistry.register(g)
|
||||
return g
|
||||
}
|
||||
|
||||
// Set overrides the stored value. Only meaningful for non-fn gauges.
|
||||
func (g *MetricGauge) Set(v int64) { g.v.Store(v) }
|
||||
|
||||
// Inc / Dec convenience for bookkeeping gauges.
|
||||
func (g *MetricGauge) Inc() { g.v.Add(1) }
|
||||
func (g *MetricGauge) Dec() { g.v.Add(-1) }
|
||||
|
||||
func (g *MetricGauge) write(sb *strings.Builder) {
|
||||
v := g.v.Load()
|
||||
if g.fn != nil {
|
||||
v = g.fn()
|
||||
}
|
||||
fmt.Fprintf(sb, "# HELP %s %s\n# TYPE %s gauge\n%s %d\n",
|
||||
g.name, g.help, g.name, g.name, v)
|
||||
}
|
||||
|
||||
// ─── histogram ───────────────────────────────────────────────────────────────
|
||||
|
||||
// MetricHistogram is a fixed-bucket latency histogram. Typical use: time to
|
||||
// commit a block, time to apply a tx. We use user-supplied buckets (upper
|
||||
// bounds in seconds) and track sum + count alongside for Prometheus-standard
|
||||
// output.
|
||||
type MetricHistogram struct {
|
||||
name, help string
|
||||
buckets []float64
|
||||
counts []atomic.Uint64
|
||||
inf atomic.Uint64
|
||||
sum atomic.Uint64 // sum in microseconds to avoid floats
|
||||
count atomic.Uint64
|
||||
}
|
||||
|
||||
// NewHistogram registers a histogram with explicit bucket upper bounds (s).
|
||||
// Buckets must be strictly increasing. The implicit +Inf bucket is added
|
||||
// automatically per Prometheus spec.
|
||||
func NewHistogram(name, help string, buckets []float64) *MetricHistogram {
|
||||
sorted := make([]float64, len(buckets))
|
||||
copy(sorted, buckets)
|
||||
sort.Float64s(sorted)
|
||||
h := &MetricHistogram{
|
||||
name: name, help: help,
|
||||
buckets: sorted,
|
||||
counts: make([]atomic.Uint64, len(sorted)),
|
||||
}
|
||||
defaultRegistry.register(h)
|
||||
return h
|
||||
}
|
||||
|
||||
// Observe records a single sample (duration in seconds).
|
||||
func (h *MetricHistogram) Observe(seconds float64) {
|
||||
// Record in every bucket whose upper bound ≥ sample.
|
||||
for i, b := range h.buckets {
|
||||
if seconds <= b {
|
||||
h.counts[i].Add(1)
|
||||
}
|
||||
}
|
||||
h.inf.Add(1)
|
||||
h.sum.Add(uint64(seconds * 1_000_000)) // µs resolution
|
||||
h.count.Add(1)
|
||||
}
|
||||
|
||||
func (h *MetricHistogram) write(sb *strings.Builder) {
|
||||
fmt.Fprintf(sb, "# HELP %s %s\n# TYPE %s histogram\n", h.name, h.help, h.name)
|
||||
for i, b := range h.buckets {
|
||||
fmt.Fprintf(sb, `%s_bucket{le="%s"} %d`+"\n",
|
||||
h.name, strconv.FormatFloat(b, 'g', -1, 64), h.counts[i].Load())
|
||||
}
|
||||
fmt.Fprintf(sb, `%s_bucket{le="+Inf"} %d`+"\n", h.name, h.inf.Load())
|
||||
fmt.Fprintf(sb, "%s_sum %f\n", h.name, float64(h.sum.Load())/1_000_000)
|
||||
fmt.Fprintf(sb, "%s_count %d\n", h.name, h.count.Load())
|
||||
}
|
||||
|
||||
// ─── registered metrics (called from main.go) ────────────────────────────────
|
||||
//
|
||||
// Keeping these as package-level vars lets callers just do
|
||||
// `MetricBlocksTotal.Inc()` instead of threading a registry through every
|
||||
// component. Names follow Prometheus naming conventions:
|
||||
// <namespace>_<subsystem>_<metric>_<unit>
|
||||
|
||||
var (
|
||||
MetricBlocksTotal = NewCounter(
|
||||
"dchain_blocks_total",
|
||||
"Total number of blocks committed by this node",
|
||||
)
|
||||
MetricTxsTotal = NewCounter(
|
||||
"dchain_txs_total",
|
||||
"Total number of transactions included in committed blocks",
|
||||
)
|
||||
MetricTxSubmitAccepted = NewCounter(
|
||||
"dchain_tx_submit_accepted_total",
|
||||
"Transactions accepted into the mempool via /api/tx or WS submit_tx",
|
||||
)
|
||||
MetricTxSubmitRejected = NewCounter(
|
||||
"dchain_tx_submit_rejected_total",
|
||||
"Transactions rejected at API validation (bad sig, timestamp, etc.)",
|
||||
)
|
||||
MetricWSConnections = NewGauge(
|
||||
"dchain_ws_connections",
|
||||
"Currently open websocket connections on this node",
|
||||
)
|
||||
MetricPeers = NewGauge(
|
||||
"dchain_peer_count",
|
||||
"Currently connected libp2p peers",
|
||||
)
|
||||
// Block-commit latency — how long AddBlock takes end-to-end. Catches
|
||||
// slow contract calls before they reach the freeze threshold.
|
||||
MetricBlockCommitSeconds = NewHistogram(
|
||||
"dchain_block_commit_seconds",
|
||||
"Wall-clock seconds spent inside chain.AddBlock",
|
||||
[]float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2, 5, 10, 30},
|
||||
)
|
||||
|
||||
// Worst validator liveness in the current set — how many seqNums have
|
||||
// passed without a commit vote from the most-delinquent validator.
|
||||
// Alert on this staying > 20 for more than a few minutes; that
|
||||
// validator is either down, partitioned, or wedged.
|
||||
MetricMaxMissedBlocks = NewGauge(
|
||||
"dchain_max_missed_blocks",
|
||||
"Highest missed-block count among current validators (0 if all healthy)",
|
||||
)
|
||||
)
|
||||
Reference in New Issue
Block a user