From 975335767f1ad9bd5d987e2de5a576aa90aa2f4f Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Mon, 9 Mar 2026 23:42:37 +0900 Subject: [PATCH 01/13] Add Prometheus/Grafana observability for DynamoDB and Raft and normalize legacy GSI snapshots for compatibility --- README.md | 35 + adapter/dynamodb.go | 325 +++++++- adapter/dynamodb_metrics_test.go | 76 ++ adapter/dynamodb_migration_test.go | 56 ++ cmd/server/demo.go | 124 ++- docs/docker_multinode_manual_run.md | 15 + main.go | 113 ++- monitoring/docker-compose.yml | 23 + monitoring/dynamodb.go | 281 +++++++ monitoring/dynamodb_test.go | 74 ++ .../elastickv-cluster-overview.json | 733 ++++++++++++++++++ .../provisioning/dashboards/dashboards.yml | 11 + .../provisioning/datasources/prometheus.yml | 10 + monitoring/prometheus/prometheus.yml | 11 + monitoring/raft.go | 313 ++++++++ monitoring/raft_test.go | 41 + monitoring/registry.go | 68 ++ 17 files changed, 2240 insertions(+), 69 deletions(-) create mode 100644 adapter/dynamodb_metrics_test.go create mode 100644 monitoring/docker-compose.yml create mode 100644 monitoring/dynamodb.go create mode 100644 monitoring/dynamodb_test.go create mode 100644 monitoring/grafana/dashboards/elastickv-cluster-overview.json create mode 100644 monitoring/grafana/provisioning/dashboards/dashboards.yml create mode 100644 monitoring/grafana/provisioning/datasources/prometheus.yml create mode 100644 monitoring/prometheus/prometheus.yml create mode 100644 monitoring/raft.go create mode 100644 monitoring/raft_test.go create mode 100644 monitoring/registry.go diff --git a/README.md b/README.md index c4b68291..d81b414d 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,31 @@ Deployment/runbook documents: - `docs/docker_multinode_manual_run.md` (manual `docker run`, 4-5 node cluster on multiple VMs, no docker compose) +## Metrics and Grafana + +Elastickv now exposes Prometheus metrics on `--metricsAddress` (default: `localhost:9090` in `main.go`, `:9090` in `cmd/server/demo.go`). + +The exported metrics cover: + +- DynamoDB-compatible API request rate, success/error split, latency, request/response size, and per-table read/write item counts +- Raft local state, leader identity, current members, commit/applied index, and leader contact lag + +Provisioned monitoring assets live under: + +- `monitoring/prometheus/prometheus.yml` +- `monitoring/grafana/dashboards/elastickv-cluster-overview.json` +- `monitoring/grafana/provisioning/` +- `monitoring/docker-compose.yml` + +To scrape a multi-node deployment, bind `--metricsAddress` to each node's private IP, for example `--metricsAddress "10.0.0.11:9090"`. + +For the local 3-node demo, start Grafana and Prometheus with: + +```bash +cd monitoring +docker compose up -d +``` + ## Example Usage @@ -41,6 +66,16 @@ To start the server, use the following command: go run cmd/server/demo.go ``` +To expose metrics on a dedicated port: +```bash +go run . \ + --address "127.0.0.1:50051" \ + --redisAddress "127.0.0.1:6379" \ + --dynamoAddress "127.0.0.1:8000" \ + --metricsAddress "127.0.0.1:9090" \ + --raftId "n1" +``` + ### Starting the Client To start the client, use this command: diff --git a/adapter/dynamodb.go b/adapter/dynamodb.go index 532d17ab..5867869e 100644 --- a/adapter/dynamodb.go +++ b/adapter/dynamodb.go @@ -19,6 +19,7 @@ import ( "time" "github.com/bootjp/elastickv/kv" + "github.com/bootjp/elastickv/monitoring" "github.com/bootjp/elastickv/store" "github.com/cockroachdb/errors" ) @@ -86,17 +87,106 @@ const ( dynamoSelectCount = "COUNT" ) +type DynamoDBServerOption func(*DynamoDBServer) + type DynamoDBServer struct { listen net.Listener store store.MVCCStore coordinator kv.Coordinator httpServer *http.Server targetHandlers map[string]func(http.ResponseWriter, *http.Request) + requestObserver monitoring.DynamoDBRequestObserver itemUpdateLocks [itemUpdateLockStripeCount]sync.Mutex tableLocks [tableLockStripeCount]sync.Mutex } -func NewDynamoDBServer(listen net.Listener, st store.MVCCStore, coordinate kv.Coordinator) *DynamoDBServer { +// WithDynamoDBRequestObserver enables Prometheus-compatible request metrics. +func WithDynamoDBRequestObserver(observer monitoring.DynamoDBRequestObserver) DynamoDBServerOption { + return func(server *DynamoDBServer) { + server.requestObserver = observer + } +} + +type dynamoRequestMetricsContextKey struct{} + +type dynamoRequestMetricsState struct { + tables map[string]monitoring.DynamoDBTableMetrics +} + +type countingReadCloser struct { + io.ReadCloser + bytesRead int +} + +func (c *countingReadCloser) Read(p []byte) (int, error) { + if c == nil || c.ReadCloser == nil { + return 0, io.EOF + } + n, err := c.ReadCloser.Read(p) + c.bytesRead += n + if err == nil || errors.Is(err, io.EOF) { + return n, err + } + return n, errors.WithStack(err) +} + +func (c *countingReadCloser) Close() error { + if c == nil || c.ReadCloser == nil { + return nil + } + if err := c.ReadCloser.Close(); err != nil { + return errors.WithStack(err) + } + return nil +} + +func (c *countingReadCloser) BytesRead() int { + if c == nil { + return 0 + } + return c.bytesRead +} + +type dynamoResponseRecorder struct { + http.ResponseWriter + statusCode int + bytesWritten int +} + +func (r *dynamoResponseRecorder) WriteHeader(statusCode int) { + if r.statusCode == 0 { + r.statusCode = statusCode + } + r.ResponseWriter.WriteHeader(statusCode) +} + +func (r *dynamoResponseRecorder) Write(p []byte) (int, error) { + if r.statusCode == 0 { + r.WriteHeader(http.StatusOK) + } + n, err := r.ResponseWriter.Write(p) + r.bytesWritten += n + if err != nil { + return n, errors.WithStack(err) + } + return n, nil +} + +func (r *dynamoResponseRecorder) StatusCode() int { + if r == nil || r.statusCode == 0 { + return http.StatusOK + } + return r.statusCode +} + +func (r *dynamoResponseRecorder) BytesWritten() int { + if r == nil { + return 0 + } + return r.bytesWritten +} + +func NewDynamoDBServer(listen net.Listener, st store.MVCCStore, coordinate kv.Coordinator, opts ...DynamoDBServerOption) *DynamoDBServer { d := &DynamoDBServer{ listen: listen, store: st, @@ -119,6 +209,11 @@ func NewDynamoDBServer(listen net.Listener, st store.MVCCStore, coordinate kv.Co mux := http.NewServeMux() mux.HandleFunc("/", d.handle) d.httpServer = &http.Server{Handler: mux, ReadHeaderTimeout: time.Second} + for _, opt := range opts { + if opt != nil { + opt(d) + } + } return d } @@ -137,9 +232,38 @@ func (d *DynamoDBServer) Stop() { func (d *DynamoDBServer) handle(w http.ResponseWriter, r *http.Request) { target := r.Header.Get("X-Amz-Target") - if !d.dispatchByTarget(target, w, r) { - writeDynamoError(w, http.StatusBadRequest, dynamoErrValidation, "unsupported operation") + if d.requestObserver == nil { + if !d.dispatchByTarget(target, w, r) { + writeDynamoError(w, http.StatusBadRequest, dynamoErrValidation, "unsupported operation") + } + return + } + + operation := dynamoOperationName(target) + d.requestObserver.ObserveInFlightChange(operation, 1) + defer d.requestObserver.ObserveInFlightChange(operation, -1) + + state := &dynamoRequestMetricsState{tables: make(map[string]monitoring.DynamoDBTableMetrics)} + r = r.WithContext(context.WithValue(r.Context(), dynamoRequestMetricsContextKey{}, state)) + bodyCounter := &countingReadCloser{ReadCloser: r.Body} + r.Body = bodyCounter + recorder := &dynamoResponseRecorder{ResponseWriter: w} + started := time.Now() + + if !d.dispatchByTarget(target, recorder, r) { + writeDynamoError(recorder, http.StatusBadRequest, dynamoErrValidation, "unsupported operation") } + + d.requestObserver.ObserveDynamoDBRequest(monitoring.DynamoDBRequestReport{ + Operation: operation, + HTTPStatus: recorder.StatusCode(), + ErrorType: recorder.Header().Get("x-amzn-ErrorType"), + Duration: time.Since(started), + RequestBytes: bodyCounter.BytesRead(), + ResponseBytes: recorder.BytesWritten(), + Tables: state.tableNames(), + TableMetrics: state.tableMetrics(), + }) } func maxDynamoBodyReader(w http.ResponseWriter, r *http.Request) io.Reader { @@ -155,6 +279,105 @@ func (d *DynamoDBServer) dispatchByTarget(target string, w http.ResponseWriter, return true } +func dynamoOperationName(target string) string { + op := strings.TrimSpace(strings.TrimPrefix(target, targetPrefix)) + if op == "" || op == target { + return "unknown" + } + return op +} + +func dynamoRequestMetricsFromContext(ctx context.Context) *dynamoRequestMetricsState { + if ctx == nil { + return nil + } + state, _ := ctx.Value(dynamoRequestMetricsContextKey{}).(*dynamoRequestMetricsState) + return state +} + +func (s *dynamoRequestMetricsState) recordTable(table string) { + if s == nil { + return + } + table = strings.TrimSpace(table) + if table == "" { + return + } + if s.tables == nil { + s.tables = make(map[string]monitoring.DynamoDBTableMetrics) + } + if _, ok := s.tables[table]; !ok { + s.tables[table] = monitoring.DynamoDBTableMetrics{} + } +} + +func (s *dynamoRequestMetricsState) addTableMetrics(table string, returnedItems int, scannedItems int, writtenItems int) { + if s == nil { + return + } + table = strings.TrimSpace(table) + if table == "" { + return + } + if s.tables == nil { + s.tables = make(map[string]monitoring.DynamoDBTableMetrics) + } + metrics := s.tables[table] + metrics.ReturnedItems += returnedItems + metrics.ScannedItems += scannedItems + metrics.WrittenItems += writtenItems + s.tables[table] = metrics +} + +func (s *dynamoRequestMetricsState) tableNames() []string { + if s == nil || len(s.tables) == 0 { + return nil + } + names := make([]string, 0, len(s.tables)) + for table := range s.tables { + names = append(names, table) + } + sort.Strings(names) + return names +} + +func (s *dynamoRequestMetricsState) tableMetrics() map[string]monitoring.DynamoDBTableMetrics { + if s == nil || len(s.tables) == 0 { + return nil + } + out := make(map[string]monitoring.DynamoDBTableMetrics, len(s.tables)) + for table, metrics := range s.tables { + out[table] = metrics + } + return out +} + +func (d *DynamoDBServer) observeTables(ctx context.Context, tables ...string) { + state := dynamoRequestMetricsFromContext(ctx) + if state == nil { + return + } + for _, table := range tables { + state.recordTable(table) + } +} + +func (d *DynamoDBServer) observeReadMetrics(ctx context.Context, table string, returnedItems int, scannedItems int) { + state := dynamoRequestMetricsFromContext(ctx) + if state == nil { + return + } + state.addTableMetrics(table, returnedItems, scannedItems, 0) +} + +func (d *DynamoDBServer) observeWrittenItems(ctx context.Context, table string, writtenItems int) { + state := dynamoRequestMetricsFromContext(ctx) + if state == nil { + return + } + state.addTableMetrics(table, 0, 0, writtenItems) +} + type createTableAttributeDefinition struct { AttributeName string `json:"AttributeName"` AttributeType string `json:"AttributeType"` @@ -328,6 +551,39 @@ type dynamoGlobalSecondaryIndex struct { Projection dynamoGSIProjection `json:"projection"` } +func (g *dynamoGlobalSecondaryIndex) UnmarshalJSON(b []byte) error { + type rawGSI struct { + KeySchema *dynamoKeySchema `json:"key_schema"` + Projection *dynamoGSIProjection `json:"projection"` + HashKey string `json:"hash_key"` + RangeKey string `json:"range_key"` + } + + var raw rawGSI + if err := json.Unmarshal(b, &raw); err != nil { + return errors.WithStack(err) + } + + if raw.KeySchema != nil { + g.KeySchema = *raw.KeySchema + } else { + g.KeySchema = dynamoKeySchema{ + HashKey: raw.HashKey, + RangeKey: raw.RangeKey, + } + } + + if raw.Projection != nil && strings.TrimSpace(raw.Projection.ProjectionType) != "" { + g.Projection = *raw.Projection + } else { + // Older schema snapshots stored only the key schema. Those GSIs behaved + // like ALL projections, so preserve that behavior when normalizing. + g.Projection = dynamoGSIProjection{ProjectionType: "ALL"} + } + + return nil +} + type dynamoTableSchema struct { TableName string `json:"table_name"` AttributeDefinitions map[string]string `json:"attribute_definitions,omitempty"` @@ -344,6 +600,7 @@ func (d *DynamoDBServer) createTable(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), in.TableName) unlock := d.lockTableOperations([]string{in.TableName}) defer unlock() schema, err := buildCreateTableSchema(in) @@ -513,6 +770,7 @@ func (d *DynamoDBServer) deleteTable(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), in.TableName) unlock := d.lockTableOperations([]string{in.TableName}) defer unlock() if err := d.deleteTableWithRetry(r.Context(), in.TableName); err != nil { @@ -687,6 +945,7 @@ func (d *DynamoDBServer) describeTable(w http.ResponseWriter, r *http.Request) { writeDynamoError(w, http.StatusBadRequest, dynamoErrValidation, "missing table name") return } + d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -790,6 +1049,7 @@ func (d *DynamoDBServer) putItem(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -799,6 +1059,7 @@ func (d *DynamoDBServer) putItem(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeWrittenItems(r.Context(), in.TableName, 1) resp := map[string]any{} if attrs := putItemReturnAttributes(in.ReturnValues, plan.current); len(attrs) > 0 { resp["Attributes"] = attrs @@ -963,6 +1224,7 @@ func (d *DynamoDBServer) getItem(w http.ResponseWriter, r *http.Request) { writeDynamoError(w, http.StatusBadRequest, dynamoErrValidation, "missing table name") return } + d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -988,6 +1250,7 @@ func (d *DynamoDBServer) getItem(w http.ResponseWriter, r *http.Request) { writeDynamoJSON(w, map[string]any{}) return } + d.observeReadMetrics(r.Context(), in.TableName, 1, 1) projected, err := projectItem(current.item, in.ProjectionExpression, in.ExpressionAttributeNames) if err != nil { writeDynamoErrorFromErr(w, err) @@ -1002,6 +1265,7 @@ func (d *DynamoDBServer) deleteItem(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -1018,6 +1282,9 @@ func (d *DynamoDBServer) deleteItem(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + if len(plan.current) > 0 { + d.observeWrittenItems(r.Context(), in.TableName, 1) + } resp := map[string]any{} if shouldReturnOld && len(plan.current) > 0 { resp["Attributes"] = plan.current @@ -1129,6 +1396,7 @@ func (d *DynamoDBServer) updateItem(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -1145,6 +1413,7 @@ func (d *DynamoDBServer) updateItem(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeWrittenItems(r.Context(), in.TableName, 1) resp := map[string]any{} if attrs := updateItemReturnAttributes(in.ReturnValues, plan.current, plan.next); len(attrs) > 0 { resp["Attributes"] = attrs @@ -1454,11 +1723,13 @@ func (d *DynamoDBServer) query(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), in.TableName) out, err := d.queryItems(r.Context(), in) if err != nil { writeDynamoErrorFromErr(w, err) return } + d.observeReadMetrics(r.Context(), in.TableName, out.count, out.scannedCount) resp := map[string]any{ "Items": out.items, "Count": out.count, @@ -1476,11 +1747,13 @@ func (d *DynamoDBServer) scan(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), in.TableName) out, err := d.scanItems(r.Context(), in) if err != nil { writeDynamoErrorFromErr(w, err) return } + d.observeReadMetrics(r.Context(), in.TableName, out.count, out.scannedCount) resp := map[string]any{ "Items": out.items, "Count": out.count, @@ -3252,11 +3525,15 @@ func (d *DynamoDBServer) batchWriteItem(w http.ResponseWriter, r *http.Request) writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), batchWriteTableNames(in)...) unprocessed, err := d.batchWriteItems(r.Context(), in) if err != nil { writeDynamoErrorFromErr(w, err) return } + for table, written := range batchWriteCommittedCounts(in, unprocessed) { + d.observeWrittenItems(r.Context(), table, written) + } writeDynamoJSON(w, map[string]any{"UnprocessedItems": unprocessed}) } @@ -3288,6 +3565,26 @@ func decodeBatchWriteItemInput(bodyReader io.Reader) (batchWriteItemInput, error return in, nil } +func batchWriteTableNames(in batchWriteItemInput) []string { + names := make([]string, 0, len(in.RequestItems)) + for table := range in.RequestItems { + names = append(names, table) + } + sort.Strings(names) + return names +} + +func batchWriteCommittedCounts(in batchWriteItemInput, unprocessed map[string][]batchWriteRequest) map[string]int { + out := make(map[string]int, len(in.RequestItems)) + for table, requests := range in.RequestItems { + written := len(requests) - len(unprocessed[table]) + if written > 0 { + out[table] = written + } + } + return out +} + func (d *DynamoDBServer) batchWriteItems( ctx context.Context, in batchWriteItemInput, @@ -3474,14 +3771,19 @@ func (d *DynamoDBServer) transactWriteItems(w http.ResponseWriter, r *http.Reque writeDynamoErrorFromErr(w, err) return } - if _, err := collectTransactWriteTableNames(in); err != nil { + tableNames, err := collectTransactWriteTableNames(in) + if err != nil { writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), tableNames...) if err := d.transactWriteItemsWithRetry(r.Context(), in); err != nil { writeDynamoErrorFromErr(w, err) return } + for table, written := range transactWriteWrittenCounts(in) { + d.observeWrittenItems(r.Context(), table, written) + } writeDynamoJSON(w, map[string]any{}) } @@ -3520,6 +3822,21 @@ func collectTransactWriteTableNames(in transactWriteItemsInput) ([]string, error return names, nil } +func transactWriteWrittenCounts(in transactWriteItemsInput) map[string]int { + out := make(map[string]int) + for _, item := range in.TransactItems { + tableName, err := transactWriteItemTableName(item) + if err != nil || strings.TrimSpace(tableName) == "" { + continue + } + switch { + case item.Put != nil, item.Update != nil, item.Delete != nil: + out[tableName]++ + } + } + return out +} + func transactWriteItemTableName(item transactWriteItem) (string, error) { switch countTransactWriteItemActions(item) { case 0: diff --git a/adapter/dynamodb_metrics_test.go b/adapter/dynamodb_metrics_test.go new file mode 100644 index 00000000..c47b28b2 --- /dev/null +++ b/adapter/dynamodb_metrics_test.go @@ -0,0 +1,76 @@ +package adapter + +import ( + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/bootjp/elastickv/monitoring" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" +) + +func TestDynamoDBServerHandleObservesSuccessMetrics(t *testing.T) { + registry := monitoring.NewRegistry("n1", "10.0.0.1:50051") + server := NewDynamoDBServer(nil, nil, &stubAdapterCoordinator{}, WithDynamoDBRequestObserver(registry.DynamoDBObserver())) + server.targetHandlers = map[string]func(http.ResponseWriter, *http.Request){ + putItemTarget: func(w http.ResponseWriter, r *http.Request) { + _, _ = io.ReadAll(maxDynamoBodyReader(w, r)) + server.observeTables(r.Context(), "orders") + server.observeWrittenItems(r.Context(), "orders", 1) + writeDynamoJSON(w, map[string]any{"ok": true}) + }, + } + + req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(`{"TableName":"orders"}`)) + req.Header.Set("X-Amz-Target", putItemTarget) + rec := httptest.NewRecorder() + server.handle(rec, req) + + require.Equal(t, http.StatusOK, rec.Code) + err := testutil.GatherAndCompare( + registry.Gatherer(), + strings.NewReader(` +# HELP elastickv_dynamodb_requests_total Total number of DynamoDB-compatible API requests by operation and outcome. +# TYPE elastickv_dynamodb_requests_total counter +elastickv_dynamodb_requests_total{node_address="10.0.0.1:50051",node_id="n1",operation="PutItem",outcome="success"} 1 +# HELP elastickv_dynamodb_written_items_total Total number of items written or deleted by DynamoDB-compatible write APIs. +# TYPE elastickv_dynamodb_written_items_total counter +elastickv_dynamodb_written_items_total{node_address="10.0.0.1:50051",node_id="n1",operation="PutItem",table="orders"} 1 +`), + "elastickv_dynamodb_requests_total", + "elastickv_dynamodb_written_items_total", + ) + require.NoError(t, err) +} + +func TestDynamoDBServerHandleObservesConditionalFailures(t *testing.T) { + registry := monitoring.NewRegistry("n1", "10.0.0.1:50051") + server := NewDynamoDBServer(nil, nil, &stubAdapterCoordinator{}, WithDynamoDBRequestObserver(registry.DynamoDBObserver())) + server.targetHandlers = map[string]func(http.ResponseWriter, *http.Request){ + updateItemTarget: func(w http.ResponseWriter, r *http.Request) { + _, _ = io.ReadAll(maxDynamoBodyReader(w, r)) + server.observeTables(r.Context(), "orders") + writeDynamoError(w, http.StatusBadRequest, dynamoErrConditionalFailed, "condition failed") + }, + } + + req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(`{"TableName":"orders"}`)) + req.Header.Set("X-Amz-Target", updateItemTarget) + rec := httptest.NewRecorder() + server.handle(rec, req) + + require.Equal(t, http.StatusBadRequest, rec.Code) + err := testutil.GatherAndCompare( + registry.Gatherer(), + strings.NewReader(` +# HELP elastickv_dynamodb_conditional_check_failed_total Total number of conditional check failures returned by DynamoDB-compatible APIs. +# TYPE elastickv_dynamodb_conditional_check_failed_total counter +elastickv_dynamodb_conditional_check_failed_total{node_address="10.0.0.1:50051",node_id="n1",operation="UpdateItem",table="orders"} 1 +`), + "elastickv_dynamodb_conditional_check_failed_total", + ) + require.NoError(t, err) +} diff --git a/adapter/dynamodb_migration_test.go b/adapter/dynamodb_migration_test.go index d0395912..11298048 100644 --- a/adapter/dynamodb_migration_test.go +++ b/adapter/dynamodb_migration_test.go @@ -2,6 +2,7 @@ package adapter import ( "context" + "encoding/json" "fmt" "testing" "time" @@ -151,6 +152,61 @@ func TestDynamoDB_EnsureLegacyTableMigration_MigratesLegacyGeneration(t *testing }, time.Second, 10*time.Millisecond) } +func TestDynamoDB_EnsureLegacyTableMigration_NormalizesLegacyGSIJSONFormat(t *testing.T) { + t.Parallel() + + legacySchema, server, st := newLegacyMigrationTestServer(t, true, "S") + writer := newDynamoFixtureWriter(t, st) + + legacyBody, err := json.Marshal(map[string]any{ + "table_name": legacySchema.TableName, + "attribute_definitions": legacySchema.AttributeDefinitions, + "primary_key": map[string]any{ + "hash_key": legacySchema.PrimaryKey.HashKey, + "range_key": legacySchema.PrimaryKey.RangeKey, + }, + "global_secondary_indexes": map[string]any{ + "status-index": map[string]any{ + "hash_key": "status", + "range_key": "sk", + }, + }, + "generation": legacySchema.Generation, + }) + require.NoError(t, err) + writer.put(dynamoTableMetaKey(legacySchema.TableName), legacyBody) + writer.put(dynamoTableGenerationKey(legacySchema.TableName), []byte(fmt.Sprintf("%d", legacySchema.Generation))) + + writer.writeItem(legacySchema, map[string]attributeValue{ + "pk": newStringAttributeValue("tenant"), + "sk": newStringAttributeValue("2026-03-09T12:00:00Z"), + "status": newStringAttributeValue("open"), + "value": newStringAttributeValue("v1"), + }) + + ctx := context.Background() + require.NoError(t, server.ensureLegacyTableMigration(ctx, legacySchema.TableName)) + + schema, exists, err := server.loadTableSchema(ctx, legacySchema.TableName) + require.NoError(t, err) + require.True(t, exists) + require.Equal(t, "status", schema.GlobalSecondaryIndexes["status-index"].KeySchema.HashKey) + require.Equal(t, "sk", schema.GlobalSecondaryIndexes["status-index"].KeySchema.RangeKey) + require.Equal(t, "ALL", schema.GlobalSecondaryIndexes["status-index"].Projection.ProjectionType) + + out, err := server.queryItems(ctx, queryInput{ + TableName: legacySchema.TableName, + IndexName: "status-index", + KeyConditionExpression: "status = :status", + ExpressionAttributeValues: map[string]attributeValue{ + ":status": newStringAttributeValue("open"), + }, + }) + require.NoError(t, err) + require.Len(t, out.items, 1) + require.Equal(t, newStringAttributeValue("v1"), out.items[0]["value"]) +} + func TestDynamoDB_EnsureLegacyTableMigration_PrefersExistingTargetItems(t *testing.T) { t.Parallel() diff --git a/cmd/server/demo.go b/cmd/server/demo.go index 439a076c..e78ac8b1 100644 --- a/cmd/server/demo.go +++ b/cmd/server/demo.go @@ -7,6 +7,7 @@ import ( "io" "log/slog" "net" + "net/http" "os" "path/filepath" "strings" @@ -19,6 +20,7 @@ import ( "github.com/bootjp/elastickv/adapter" "github.com/bootjp/elastickv/distribution" "github.com/bootjp/elastickv/kv" + "github.com/bootjp/elastickv/monitoring" pb "github.com/bootjp/elastickv/proto" "github.com/bootjp/elastickv/store" "github.com/cockroachdb/errors" @@ -31,13 +33,14 @@ import ( ) var ( - address = flag.String("address", ":50051", "gRPC/Raft address") - redisAddress = flag.String("redisAddress", ":6379", "Redis address") - dynamoAddress = flag.String("dynamoAddress", ":8000", "DynamoDB-compatible API address") - raftID = flag.String("raftId", "", "Raft ID") - raftDataDir = flag.String("raftDataDir", "/var/lib/elastickv", "Raft data directory") - raftBootstrap = flag.Bool("raftBootstrap", false, "Bootstrap cluster") - raftRedisMap = flag.String("raftRedisMap", "", "Map of Raft address to Redis address (raftAddr=redisAddr,...)") + address = flag.String("address", ":50051", "gRPC/Raft address") + redisAddress = flag.String("redisAddress", ":6379", "Redis address") + dynamoAddress = flag.String("dynamoAddress", ":8000", "DynamoDB-compatible API address") + metricsAddress = flag.String("metricsAddress", ":9090", "Prometheus metrics address") + raftID = flag.String("raftId", "", "Raft ID") + raftDataDir = flag.String("raftDataDir", "/var/lib/elastickv", "Raft data directory") + raftBootstrap = flag.Bool("raftBootstrap", false, "Bootstrap cluster") + raftRedisMap = flag.String("raftRedisMap", "", "Map of Raft address to Redis address (raftAddr=redisAddr,...)") ) const ( @@ -57,13 +60,14 @@ func init() { } type config struct { - address string - redisAddress string - dynamoAddress string - raftID string - raftDataDir string - raftBootstrap bool - raftRedisMap string + address string + redisAddress string + dynamoAddress string + metricsAddress string + raftID string + raftDataDir string + raftBootstrap bool + raftRedisMap string } func main() { @@ -74,13 +78,14 @@ func main() { if *raftID != "" { // Single node mode cfg := config{ - address: *address, - redisAddress: *redisAddress, - dynamoAddress: *dynamoAddress, - raftID: *raftID, - raftDataDir: *raftDataDir, - raftBootstrap: *raftBootstrap, - raftRedisMap: *raftRedisMap, + address: *address, + redisAddress: *redisAddress, + dynamoAddress: *dynamoAddress, + metricsAddress: *metricsAddress, + raftID: *raftID, + raftDataDir: *raftDataDir, + raftBootstrap: *raftBootstrap, + raftRedisMap: *raftRedisMap, } if err := run(runCtx, eg, cfg); err != nil { slog.Error(err.Error()) @@ -91,28 +96,31 @@ func main() { slog.Info("Starting demo cluster with 3 nodes...") nodes := []config{ { - address: "127.0.0.1:50051", - redisAddress: "127.0.0.1:63791", - dynamoAddress: "127.0.0.1:63801", - raftID: "n1", - raftDataDir: "", // In-memory - raftBootstrap: true, + address: "127.0.0.1:50051", + redisAddress: "127.0.0.1:63791", + dynamoAddress: "127.0.0.1:63801", + metricsAddress: "127.0.0.1:9091", + raftID: "n1", + raftDataDir: "", // In-memory + raftBootstrap: true, }, { - address: "127.0.0.1:50052", - redisAddress: "127.0.0.1:63792", - dynamoAddress: "127.0.0.1:63802", - raftID: "n2", - raftDataDir: "", - raftBootstrap: false, + address: "127.0.0.1:50052", + redisAddress: "127.0.0.1:63792", + dynamoAddress: "127.0.0.1:63802", + metricsAddress: "127.0.0.1:9092", + raftID: "n2", + raftDataDir: "", + raftBootstrap: false, }, { - address: "127.0.0.1:50053", - redisAddress: "127.0.0.1:63793", - dynamoAddress: "127.0.0.1:63803", - raftID: "n3", - raftDataDir: "", - raftBootstrap: false, + address: "127.0.0.1:50053", + redisAddress: "127.0.0.1:63793", + dynamoAddress: "127.0.0.1:63803", + metricsAddress: "127.0.0.1:9093", + raftID: "n3", + raftDataDir: "", + raftBootstrap: false, }, } @@ -388,6 +396,8 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { distCatalog, adapter.WithDistributionCoordinator(coordinator), ) + metricsRegistry := monitoring.NewRegistry(cfg.raftID, cfg.address) + metricsRegistry.RaftObserver().Start(ctx, []monitoring.RaftRuntime{{GroupID: 1, Raft: r}}, 5*time.Second) s, grpcSvc := setupGRPC(r, st, tm, coordinator, distServer) @@ -404,7 +414,20 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { if err != nil { return errors.WithStack(err) } - ds := adapter.NewDynamoDBServer(dynamoL, st, coordinator) + ds := adapter.NewDynamoDBServer( + dynamoL, + st, + coordinator, + adapter.WithDynamoDBRequestObserver(metricsRegistry.DynamoDBObserver()), + ) + metricsL, err := lc.Listen(ctx, "tcp", cfg.metricsAddress) + if err != nil { + return errors.WithStack(err) + } + ms := &http.Server{ + Handler: metricsRegistry.Handler(), + ReadHeaderTimeout: time.Second, + } eg.Go(catalogWatcherTask(ctx, distCatalog, distEngine)) eg.Go(grpcShutdownTask(ctx, s, grpcSock, cfg.address, grpcSvc)) @@ -413,6 +436,8 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { eg.Go(redisServeTask(rd, cfg.redisAddress)) eg.Go(dynamoShutdownTask(ctx, ds, cfg.dynamoAddress)) eg.Go(dynamoServeTask(ds, cfg.dynamoAddress)) + eg.Go(metricsShutdownTask(ctx, ms, cfg.metricsAddress)) + eg.Go(metricsServeTask(ms, metricsL, cfg.metricsAddress)) return nil } @@ -493,3 +518,22 @@ func dynamoServeTask(dynamoServer *adapter.DynamoDBServer, address string) func( return errors.WithStack(err) } } + +func metricsShutdownTask(ctx context.Context, server *http.Server, address string) func() error { + return func() error { + <-ctx.Done() + slog.Info("Shutting down metrics server", "address", address, "reason", ctx.Err()) + return server.Shutdown(context.Background()) + } +} + +func metricsServeTask(server *http.Server, listener net.Listener, address string) func() error { + return func() error { + slog.Info("Starting metrics server", "address", address) + err := server.Serve(listener) + if err == nil || errors.Is(err, http.ErrServerClosed) || errors.Is(err, net.ErrClosed) { + return nil + } + return errors.WithStack(err) + } +} diff --git a/docs/docker_multinode_manual_run.md b/docs/docker_multinode_manual_run.md index 9c7a524b..1861e7aa 100644 --- a/docs/docker_multinode_manual_run.md +++ b/docs/docker_multinode_manual_run.md @@ -17,6 +17,7 @@ Use private VM IPs for all bind addresses and lock down network access because g - 1 node = 1 VM - Total nodes: 4 or 5 - VMs must be able to reach each other over TCP (at minimum `50051/tcp`) +- Prometheus must be able to reach each node's metrics endpoint if you want centralized monitoring (`9090/tcp` in the examples below) - Docker Engine installed on every VM Example (5 nodes): @@ -92,6 +93,7 @@ This guide uses `--network host` and explicit VM private IPs. Binding guidance: - Set `--address`, `--redisAddress`, and `--dynamoAddress` to the VM private IP. +- Set `--metricsAddress` to the VM private IP if Prometheus scrapes from another host. - Do not use `0.0.0.0` as the advertised address. - Do not use `localhost` for cluster communication. @@ -109,6 +111,7 @@ docker run -d \ --address "10.0.0.11:50051" \ --redisAddress "10.0.0.11:6379" \ --dynamoAddress "10.0.0.11:8000" \ + --metricsAddress "10.0.0.11:9090" \ --raftId "n1" \ --raftDataDir "/var/lib/elastickv" \ --raftRedisMap "${RAFT_TO_REDIS_MAP}" \ @@ -130,6 +133,7 @@ docker run -d \ --address "10.0.0.12:50051" \ --redisAddress "10.0.0.12:6379" \ --dynamoAddress "10.0.0.12:8000" \ + --metricsAddress "10.0.0.12:9090" \ --raftId "n2" \ --raftDataDir "/var/lib/elastickv" \ --raftRedisMap "${RAFT_TO_REDIS_MAP}" \ @@ -142,6 +146,7 @@ Start `n3` to `n5` the same way by replacing: - `--address` - `--redisAddress` - `--dynamoAddress` +- `--metricsAddress` - `--raftId` ## Startup Order and Quorum Caution @@ -203,6 +208,16 @@ redis-cli -h 10.0.0.11 -p 6379 SET health ok redis-cli -h 10.0.0.12 -p 6379 GET health ``` +## 6.5) Validate Metrics + +Check the local Prometheus endpoint on any node: + +```bash +curl -fsS http://10.0.0.11:9090/metrics | grep '^elastickv_' +``` + +Grafana/Prometheus provisioning examples are available under `monitoring/`. + ## 7) Fault Tolerance Drill With 5 nodes, the cluster should continue serving with up to 2 node failures. diff --git a/main.go b/main.go index d7366e42..a0d6eb24 100644 --- a/main.go +++ b/main.go @@ -5,6 +5,7 @@ import ( "flag" "log" "net" + "net/http" "strings" "sync" "time" @@ -14,6 +15,7 @@ import ( "github.com/bootjp/elastickv/adapter" "github.com/bootjp/elastickv/distribution" "github.com/bootjp/elastickv/kv" + "github.com/bootjp/elastickv/monitoring" pb "github.com/bootjp/elastickv/proto" "github.com/bootjp/elastickv/store" "github.com/cockroachdb/errors" @@ -27,12 +29,14 @@ const ( heartbeatTimeout = 200 * time.Millisecond electionTimeout = 2000 * time.Millisecond leaderLease = 100 * time.Millisecond + raftMetricsObserveInterval = 5 * time.Second ) var ( myAddr = flag.String("address", "localhost:50051", "TCP host+port for this node") redisAddr = flag.String("redisAddress", "localhost:6379", "TCP host+port for redis") dynamoAddr = flag.String("dynamoAddress", "localhost:8000", "TCP host+port for DynamoDB-compatible API") + metricsAddr = flag.String("metricsAddress", "localhost:9090", "TCP host+port for Prometheus metrics") raftId = flag.String("raftId", "", "Node id used by Raft") raftDir = flag.String("raftDataDir", "data/", "Raft data dir") raftBootstrap = flag.Bool("raftBootstrap", false, "Whether to bootstrap the Raft cluster") @@ -96,19 +100,23 @@ func run() error { distCatalog, adapter.WithDistributionCoordinator(coordinate), ) + metricsRegistry := monitoring.NewRegistry(*raftId, *myAddr) + metricsRegistry.RaftObserver().Start(runCtx, raftMonitorRuntimes(runtimes), raftMetricsObserveInterval) runner := runtimeServerRunner{ - ctx: runCtx, - lc: &lc, - eg: eg, - cancel: cancel, - runtimes: runtimes, - shardStore: shardStore, - coordinate: coordinate, - distServer: distServer, - redisAddress: *redisAddr, - leaderRedis: cfg.leaderRedis, - dynamoAddress: *dynamoAddr, + ctx: runCtx, + lc: &lc, + eg: eg, + cancel: cancel, + runtimes: runtimes, + shardStore: shardStore, + coordinate: coordinate, + distServer: distServer, + redisAddress: *redisAddr, + leaderRedis: cfg.leaderRedis, + dynamoAddress: *dynamoAddr, + metricsAddress: *metricsAddr, + metricsRegistry: metricsRegistry, } if err := runner.start(); err != nil { return err @@ -256,6 +264,20 @@ func buildShardGroups(raftID string, raftDir string, groups []groupSpec, multi b return runtimes, shardGroups, nil } +func raftMonitorRuntimes(runtimes []*raftGroupRuntime) []monitoring.RaftRuntime { + out := make([]monitoring.RaftRuntime, 0, len(runtimes)) + for _, runtime := range runtimes { + if runtime == nil || runtime.raft == nil { + continue + } + out = append(out, monitoring.RaftRuntime{ + GroupID: runtime.spec.id, + Raft: runtime.raft, + }) + } + return out +} + func startRaftServers(ctx context.Context, lc *net.ListenConfig, eg *errgroup.Group, runtimes []*raftGroupRuntime, shardStore *kv.ShardStore, coordinate kv.Coordinator, distServer *adapter.DistributionServer) error { for _, rt := range runtimes { gs := grpc.NewServer() @@ -330,12 +352,17 @@ func startRedisServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup.Gr return nil } -func startDynamoDBServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup.Group, dynamoAddr string, shardStore *kv.ShardStore, coordinate kv.Coordinator) error { +func startDynamoDBServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup.Group, dynamoAddr string, shardStore *kv.ShardStore, coordinate kv.Coordinator, metricsRegistry *monitoring.Registry) error { dynamoL, err := lc.Listen(ctx, "tcp", dynamoAddr) if err != nil { return errors.Wrapf(err, "failed to listen on %s", dynamoAddr) } - dynamoServer := adapter.NewDynamoDBServer(dynamoL, shardStore, coordinate) + dynamoServer := adapter.NewDynamoDBServer( + dynamoL, + shardStore, + coordinate, + adapter.WithDynamoDBRequestObserver(metricsRegistry.DynamoDBObserver()), + ) eg.Go(func() error { defer dynamoServer.Stop() stop := make(chan struct{}) @@ -356,6 +383,37 @@ func startDynamoDBServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup return nil } +func startMetricsServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup.Group, metricsAddr string, handler http.Handler) error { + if strings.TrimSpace(metricsAddr) == "" || handler == nil { + return nil + } + metricsL, err := lc.Listen(ctx, "tcp", metricsAddr) + if err != nil { + return errors.Wrapf(err, "failed to listen on %s", metricsAddr) + } + metricsServer := &http.Server{ + Handler: handler, + ReadHeaderTimeout: time.Second, + } + eg.Go(func() error { + stop := make(chan struct{}) + go func() { + select { + case <-ctx.Done(): + _ = metricsServer.Shutdown(context.Background()) + case <-stop: + } + }() + err := metricsServer.Serve(metricsL) + close(stop) + if errors.Is(err, http.ErrServerClosed) || errors.Is(err, net.ErrClosed) { + return nil + } + return errors.WithStack(err) + }) + return nil +} + func distributionCatalogStoreForGroup(runtimes []*raftGroupRuntime, groupID uint64) *distribution.CatalogStore { for _, rt := range runtimes { if rt == nil || rt.store == nil { @@ -421,17 +479,19 @@ func waitErrgroupAfterStartupFailure(cancel context.CancelFunc, eg *errgroup.Gro } type runtimeServerRunner struct { - ctx context.Context - lc *net.ListenConfig - eg *errgroup.Group - cancel context.CancelFunc - runtimes []*raftGroupRuntime - shardStore *kv.ShardStore - coordinate kv.Coordinator - distServer *adapter.DistributionServer - redisAddress string - leaderRedis map[raft.ServerAddress]string - dynamoAddress string + ctx context.Context + lc *net.ListenConfig + eg *errgroup.Group + cancel context.CancelFunc + runtimes []*raftGroupRuntime + shardStore *kv.ShardStore + coordinate kv.Coordinator + distServer *adapter.DistributionServer + redisAddress string + leaderRedis map[raft.ServerAddress]string + dynamoAddress string + metricsAddress string + metricsRegistry *monitoring.Registry } func (r runtimeServerRunner) start() error { @@ -441,7 +501,10 @@ func (r runtimeServerRunner) start() error { if err := startRedisServer(r.ctx, r.lc, r.eg, r.redisAddress, r.shardStore, r.coordinate, r.leaderRedis); err != nil { return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) } - if err := startDynamoDBServer(r.ctx, r.lc, r.eg, r.dynamoAddress, r.shardStore, r.coordinate); err != nil { + if err := startDynamoDBServer(r.ctx, r.lc, r.eg, r.dynamoAddress, r.shardStore, r.coordinate, r.metricsRegistry); err != nil { + return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) + } + if err := startMetricsServer(r.ctx, r.lc, r.eg, r.metricsAddress, r.metricsRegistry.Handler()); err != nil { return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) } return nil diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 00000000..c3805b0f --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,23 @@ +services: + prometheus: + image: prom/prometheus:v3.5.0 + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + ports: + - "9095:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + extra_hosts: + - "host.docker.internal:host-gateway" + + grafana: + image: grafana/grafana:12.2.0 + depends_on: + - prometheus + ports: + - "3000:3000" + volumes: + - ./grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro + - ./grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro + - ./grafana/dashboards:/var/lib/grafana/dashboards:ro diff --git a/monitoring/dynamodb.go b/monitoring/dynamodb.go new file mode 100644 index 00000000..8d8eacaf --- /dev/null +++ b/monitoring/dynamodb.go @@ -0,0 +1,281 @@ +package monitoring + +import ( + "net/http" + "slices" + "strings" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + dynamoOutcomeSuccess = "success" + dynamoOutcomeUserError = "user_error" + dynamoOutcomeSystemError = "system_error" + dynamoOutcomeConditionalCheckFailed = "conditional_check_failed" + dynamoOperationUnknown = "unknown" +) + +// DynamoDBRequestObserver records per-request DynamoDB-compatible API metrics. +type DynamoDBRequestObserver interface { + ObserveInFlightChange(operation string, delta float64) + ObserveDynamoDBRequest(report DynamoDBRequestReport) +} + +// DynamoDBRequestReport is the normalized result of a single request. +type DynamoDBRequestReport struct { + Operation string + HTTPStatus int + ErrorType string + Duration time.Duration + RequestBytes int + ResponseBytes int + Tables []string + TableMetrics map[string]DynamoDBTableMetrics +} + +// DynamoDBTableMetrics captures table-scoped work done by a request. +type DynamoDBTableMetrics struct { + ReturnedItems int + ScannedItems int + WrittenItems int +} + +type DynamoDBMetrics struct { + inflightRequests *prometheus.GaugeVec + requestsTotal *prometheus.CounterVec + tableRequests *prometheus.CounterVec + requestDuration *prometheus.HistogramVec + requestSize *prometheus.HistogramVec + responseSize *prometheus.HistogramVec + returnedItems *prometheus.CounterVec + scannedItems *prometheus.CounterVec + writtenItems *prometheus.CounterVec + userErrors *prometheus.CounterVec + systemErrors *prometheus.CounterVec + conditionalFails *prometheus.CounterVec +} + +func newDynamoDBMetrics(registerer prometheus.Registerer) *DynamoDBMetrics { + m := &DynamoDBMetrics{ + inflightRequests: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "elastickv_dynamodb_inflight_requests", + Help: "Current number of in-flight DynamoDB-compatible API requests.", + }, + []string{"operation"}, + ), + requestsTotal: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "elastickv_dynamodb_requests_total", + Help: "Total number of DynamoDB-compatible API requests by operation and outcome.", + }, + []string{"operation", "outcome"}, + ), + tableRequests: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "elastickv_dynamodb_table_requests_total", + Help: "Total number of table-scoped DynamoDB-compatible API requests by operation and outcome.", + }, + []string{"operation", "table", "outcome"}, + ), + requestDuration: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "elastickv_dynamodb_request_duration_seconds", + Help: "End-to-end latency of DynamoDB-compatible API requests.", + Buckets: []float64{0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5}, + }, + []string{"operation", "outcome"}, + ), + requestSize: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "elastickv_dynamodb_request_size_bytes", + Help: "Observed DynamoDB-compatible request sizes in bytes.", + Buckets: []float64{128, 512, 1024, 4 * 1024, 16 * 1024, 64 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024}, + }, + []string{"operation"}, + ), + responseSize: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "elastickv_dynamodb_response_size_bytes", + Help: "Observed DynamoDB-compatible response sizes in bytes.", + Buckets: []float64{128, 512, 1024, 4 * 1024, 16 * 1024, 64 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024}, + }, + []string{"operation", "outcome"}, + ), + returnedItems: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "elastickv_dynamodb_returned_items_total", + Help: "Total number of items returned by DynamoDB-compatible read APIs.", + }, + []string{"operation", "table"}, + ), + scannedItems: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "elastickv_dynamodb_scanned_items_total", + Help: "Total number of items scanned by DynamoDB-compatible read APIs.", + }, + []string{"operation", "table"}, + ), + writtenItems: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "elastickv_dynamodb_written_items_total", + Help: "Total number of items written or deleted by DynamoDB-compatible write APIs.", + }, + []string{"operation", "table"}, + ), + userErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "elastickv_dynamodb_user_errors_total", + Help: "Total number of client-caused DynamoDB-compatible API errors.", + }, + []string{"operation", "table"}, + ), + systemErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "elastickv_dynamodb_system_errors_total", + Help: "Total number of server-side DynamoDB-compatible API errors.", + }, + []string{"operation", "table"}, + ), + conditionalFails: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "elastickv_dynamodb_conditional_check_failed_total", + Help: "Total number of conditional check failures returned by DynamoDB-compatible APIs.", + }, + []string{"operation", "table"}, + ), + } + + registerer.MustRegister( + m.inflightRequests, + m.requestsTotal, + m.tableRequests, + m.requestDuration, + m.requestSize, + m.responseSize, + m.returnedItems, + m.scannedItems, + m.writtenItems, + m.userErrors, + m.systemErrors, + m.conditionalFails, + ) + + return m +} + +// ObserveInFlightChange adjusts the in-flight request gauge for an operation. +func (m *DynamoDBMetrics) ObserveInFlightChange(operation string, delta float64) { + if m == nil { + return + } + operation = normalizeDynamoOperation(operation) + m.inflightRequests.WithLabelValues(operation).Add(delta) +} + +// ObserveDynamoDBRequest records the final outcome of a request. +func (m *DynamoDBMetrics) ObserveDynamoDBRequest(report DynamoDBRequestReport) { + if m == nil { + return + } + + operation := normalizeDynamoOperation(report.Operation) + outcome := classifyDynamoOutcome(report.HTTPStatus, report.ErrorType) + tableMetrics := report.TableMetrics + if tableMetrics == nil { + tableMetrics = map[string]DynamoDBTableMetrics{} + } + + m.observeRequest(operation, outcome, report) + m.observeTables(operation, outcome, mergeReportTables(report.Tables, tableMetrics), tableMetrics) +} + +func normalizeDynamoOperation(operation string) string { + operation = strings.TrimSpace(operation) + if operation == "" { + return dynamoOperationUnknown + } + return operation +} + +func (m *DynamoDBMetrics) observeRequest(operation string, outcome string, report DynamoDBRequestReport) { + m.requestsTotal.WithLabelValues(operation, outcome).Inc() + m.requestDuration.WithLabelValues(operation, outcome).Observe(report.Duration.Seconds()) + m.requestSize.WithLabelValues(operation).Observe(float64(max(report.RequestBytes, 0))) + m.responseSize.WithLabelValues(operation, outcome).Observe(float64(max(report.ResponseBytes, 0))) +} + +func mergeReportTables(tables []string, tableMetrics map[string]DynamoDBTableMetrics) []string { + merged := append([]string(nil), tables...) + for table := range tableMetrics { + if table = strings.TrimSpace(table); table != "" { + merged = append(merged, table) + } + } + return normalizedTables(merged) +} + +func (m *DynamoDBMetrics) observeTables(operation string, outcome string, tables []string, tableMetrics map[string]DynamoDBTableMetrics) { + for _, table := range tables { + m.tableRequests.WithLabelValues(operation, table, outcome).Inc() + m.observeTableActivity(operation, table, tableMetrics[table]) + m.observeTableOutcome(operation, table, outcome) + } +} + +func (m *DynamoDBMetrics) observeTableActivity(operation string, table string, metrics DynamoDBTableMetrics) { + if metrics.ReturnedItems > 0 { + m.returnedItems.WithLabelValues(operation, table).Add(float64(metrics.ReturnedItems)) + } + if metrics.ScannedItems > 0 { + m.scannedItems.WithLabelValues(operation, table).Add(float64(metrics.ScannedItems)) + } + if metrics.WrittenItems > 0 { + m.writtenItems.WithLabelValues(operation, table).Add(float64(metrics.WrittenItems)) + } +} + +func (m *DynamoDBMetrics) observeTableOutcome(operation string, table string, outcome string) { + switch outcome { + case dynamoOutcomeConditionalCheckFailed: + m.conditionalFails.WithLabelValues(operation, table).Inc() + case dynamoOutcomeUserError: + m.userErrors.WithLabelValues(operation, table).Inc() + case dynamoOutcomeSystemError: + m.systemErrors.WithLabelValues(operation, table).Inc() + } +} + +func normalizedTables(tables []string) []string { + out := make([]string, 0, len(tables)) + seen := make(map[string]struct{}, len(tables)) + for _, table := range tables { + table = strings.TrimSpace(table) + if table == "" { + continue + } + if _, ok := seen[table]; ok { + continue + } + seen[table] = struct{}{} + out = append(out, table) + } + slices.Sort(out) + return out +} + +func classifyDynamoOutcome(status int, errorType string) string { + if strings.EqualFold(strings.TrimSpace(errorType), "ConditionalCheckFailedException") { + return dynamoOutcomeConditionalCheckFailed + } + switch { + case status >= 200 && status < 300: + return dynamoOutcomeSuccess + case status >= http.StatusInternalServerError: + return dynamoOutcomeSystemError + default: + return dynamoOutcomeUserError + } +} diff --git a/monitoring/dynamodb_test.go b/monitoring/dynamodb_test.go new file mode 100644 index 00000000..291ef220 --- /dev/null +++ b/monitoring/dynamodb_test.go @@ -0,0 +1,74 @@ +package monitoring + +import ( + "strings" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" +) + +func TestDynamoDBMetricsObserveRequest(t *testing.T) { + registry := NewRegistry("n1", "10.0.0.1:50051") + metrics, ok := registry.DynamoDBObserver().(*DynamoDBMetrics) + require.True(t, ok) + + metrics.ObserveInFlightChange("PutItem", 1) + metrics.ObserveInFlightChange("PutItem", -1) + metrics.ObserveDynamoDBRequest(DynamoDBRequestReport{ + Operation: "PutItem", + HTTPStatus: 200, + Duration: 12 * time.Millisecond, + RequestBytes: 256, + ResponseBytes: 64, + Tables: []string{"orders", "orders"}, + TableMetrics: map[string]DynamoDBTableMetrics{ + "orders": {WrittenItems: 1}, + }, + }) + + err := testutil.GatherAndCompare( + registry.Gatherer(), + strings.NewReader(` +# HELP elastickv_dynamodb_requests_total Total number of DynamoDB-compatible API requests by operation and outcome. +# TYPE elastickv_dynamodb_requests_total counter +elastickv_dynamodb_requests_total{node_address="10.0.0.1:50051",node_id="n1",operation="PutItem",outcome="success"} 1 +# HELP elastickv_dynamodb_table_requests_total Total number of table-scoped DynamoDB-compatible API requests by operation and outcome. +# TYPE elastickv_dynamodb_table_requests_total counter +elastickv_dynamodb_table_requests_total{node_address="10.0.0.1:50051",node_id="n1",operation="PutItem",outcome="success",table="orders"} 1 +# HELP elastickv_dynamodb_written_items_total Total number of items written or deleted by DynamoDB-compatible write APIs. +# TYPE elastickv_dynamodb_written_items_total counter +elastickv_dynamodb_written_items_total{node_address="10.0.0.1:50051",node_id="n1",operation="PutItem",table="orders"} 1 +`), + "elastickv_dynamodb_requests_total", + "elastickv_dynamodb_table_requests_total", + "elastickv_dynamodb_written_items_total", + ) + require.NoError(t, err) +} + +func TestDynamoDBMetricsClassifyConditionalFailure(t *testing.T) { + registry := NewRegistry("n1", "10.0.0.1:50051") + metrics, ok := registry.DynamoDBObserver().(*DynamoDBMetrics) + require.True(t, ok) + + metrics.ObserveDynamoDBRequest(DynamoDBRequestReport{ + Operation: "UpdateItem", + HTTPStatus: 400, + ErrorType: "ConditionalCheckFailedException", + Duration: time.Millisecond, + Tables: []string{"orders"}, + }) + + err := testutil.GatherAndCompare( + registry.Gatherer(), + strings.NewReader(` +# HELP elastickv_dynamodb_conditional_check_failed_total Total number of conditional check failures returned by DynamoDB-compatible APIs. +# TYPE elastickv_dynamodb_conditional_check_failed_total counter +elastickv_dynamodb_conditional_check_failed_total{node_address="10.0.0.1:50051",node_id="n1",operation="UpdateItem",table="orders"} 1 +`), + "elastickv_dynamodb_conditional_check_failed_total", + ) + require.NoError(t, err) +} diff --git a/monitoring/grafana/dashboards/elastickv-cluster-overview.json b/monitoring/grafana/dashboards/elastickv-cluster-overview.json new file mode 100644 index 00000000..f74b0e02 --- /dev/null +++ b/monitoring/grafana/dashboards/elastickv-cluster-overview.json @@ -0,0 +1,733 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(elastickv_dynamodb_requests_total{job=\"elastickv\"}[5m]))", + "instant": false, + "legendFormat": "requests/s", + "range": true, + "refId": "A" + } + ], + "title": "Request Rate", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.1 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(elastickv_dynamodb_requests_total{job=\"elastickv\",outcome!=\"success\"}[5m]))", + "instant": false, + "legendFormat": "errors/s", + "range": true, + "refId": "A" + } + ], + "title": "Error Rate", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.05 + }, + { + "color": "red", + "value": 0.25 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by (le) (rate(elastickv_dynamodb_request_duration_seconds_bucket{job=\"elastickv\"}[5m])))", + "instant": false, + "legendFormat": "p95", + "range": true, + "refId": "A" + } + ], + "title": "P95 Latency", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(elastickv_dynamodb_inflight_requests{job=\"elastickv\"})", + "instant": false, + "legendFormat": "in-flight", + "range": true, + "refId": "A" + } + ], + "title": "In-flight Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (operation) (rate(elastickv_dynamodb_requests_total{job=\"elastickv\"}[5m]))", + "legendFormat": "{{operation}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests by Operation", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (outcome) (rate(elastickv_dynamodb_requests_total{job=\"elastickv\",outcome!=\"success\"}[5m]))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "Error Split by Outcome", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (table) (rate(elastickv_dynamodb_returned_items_total{job=\"elastickv\"}[5m]))", + "legendFormat": "returned {{table}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (table) (rate(elastickv_dynamodb_scanned_items_total{job=\"elastickv\"}[5m]))", + "legendFormat": "scanned {{table}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (table) (rate(elastickv_dynamodb_written_items_total{job=\"elastickv\"}[5m]))", + "legendFormat": "written {{table}}", + "range": true, + "refId": "C" + } + ], + "title": "Per-table Item Activity", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (group, node_id) (elastickv_raft_commit_index{job=\"elastickv\"})", + "legendFormat": "commit g{{group}} {{node_id}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (group, node_id) (elastickv_raft_applied_index{job=\"elastickv\"})", + "legendFormat": "applied g{{group}} {{node_id}}", + "range": true, + "refId": "B" + } + ], + "title": "Raft Index Progress", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 9, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (group, leader_id, leader_address) (elastickv_raft_leader_identity{job=\"elastickv\"})", + "format": "table", + "instant": true, + "range": false, + "refId": "A" + } + ], + "title": "Current Leader per Group", + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 10, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (group, member_id, member_address, suffrage) (elastickv_raft_member_present{job=\"elastickv\"})", + "format": "table", + "instant": true, + "range": false, + "refId": "A" + } + ], + "title": "Current Members", + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + } + }, + "mappings": [ + { + "options": { + "0": { + "text": "Follower" + }, + "1": { + "text": "Leader" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 20 + }, + "id": 11, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (group, member_id, member_address) (elastickv_raft_member_is_leader{job=\"elastickv\"})", + "format": "table", + "instant": true, + "range": false, + "refId": "A" + } + ], + "title": "Leader Flag per Member", + "type": "table" + } + ], + "refresh": "10s", + "schemaVersion": 41, + "style": "dark", + "tags": [ + "elastickv", + "raft", + "dynamodb" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Elastickv Cluster Overview", + "uid": "elastickv-cluster", + "version": 1, + "weekStart": "" +} diff --git a/monitoring/grafana/provisioning/dashboards/dashboards.yml b/monitoring/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 00000000..bd102f31 --- /dev/null +++ b/monitoring/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: + - name: Elastickv + orgId: 1 + folder: Elastickv + type: file + disableDeletion: false + updateIntervalSeconds: 10 + options: + path: /var/lib/grafana/dashboards diff --git a/monitoring/grafana/provisioning/datasources/prometheus.yml b/monitoring/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 00000000..00f99157 --- /dev/null +++ b/monitoring/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 00000000..e8edb150 --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,11 @@ +global: + scrape_interval: 5s + evaluation_interval: 5s + +scrape_configs: + - job_name: elastickv + static_configs: + - targets: + - host.docker.internal:9091 + - host.docker.internal:9092 + - host.docker.internal:9093 diff --git a/monitoring/raft.go b/monitoring/raft.go new file mode 100644 index 00000000..c0b1d161 --- /dev/null +++ b/monitoring/raft.go @@ -0,0 +1,313 @@ +package monitoring + +import ( + "context" + "fmt" + "strconv" + "strings" + "sync" + "time" + + "github.com/hashicorp/raft" + "github.com/prometheus/client_golang/prometheus" +) + +var raftStates = []string{ + "follower", + "candidate", + "leader", + "shutdown", + "unknown", +} + +const defaultObserveInterval = 5 * time.Second + +// RaftRuntime describes a raft group observed by the metrics exporter. +type RaftRuntime struct { + GroupID uint64 + Raft *raft.Raft +} + +type RaftMetrics struct { + localState *prometheus.GaugeVec + leaderIdentity *prometheus.GaugeVec + memberPresent *prometheus.GaugeVec + memberIsLeader *prometheus.GaugeVec + memberCount *prometheus.GaugeVec + commitIndex *prometheus.GaugeVec + appliedIndex *prometheus.GaugeVec + lastContact *prometheus.GaugeVec +} + +func newRaftMetrics(registerer prometheus.Registerer) *RaftMetrics { + m := &RaftMetrics{ + localState: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "elastickv_raft_local_state", + Help: "Current local raft state for each group.", + }, + []string{"group", "state"}, + ), + leaderIdentity: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "elastickv_raft_leader_identity", + Help: "Current leader identity for each raft group, as observed by this node.", + }, + []string{"group", "leader_id", "leader_address"}, + ), + memberPresent: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "elastickv_raft_member_present", + Help: "Current raft configuration members known to this node.", + }, + []string{"group", "member_id", "member_address", "suffrage"}, + ), + memberIsLeader: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "elastickv_raft_member_is_leader", + Help: "Whether a raft configuration member is the current leader, as observed by this node.", + }, + []string{"group", "member_id", "member_address"}, + ), + memberCount: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "elastickv_raft_members", + Help: "Number of raft configuration members currently known for each group.", + }, + []string{"group"}, + ), + commitIndex: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "elastickv_raft_commit_index", + Help: "Latest raft commit index for each group.", + }, + []string{"group"}, + ), + appliedIndex: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "elastickv_raft_applied_index", + Help: "Latest raft applied index for each group.", + }, + []string{"group"}, + ), + lastContact: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "elastickv_raft_last_contact_seconds", + Help: "Time since the last observed leader contact for each group.", + }, + []string{"group"}, + ), + } + + registerer.MustRegister( + m.localState, + m.leaderIdentity, + m.memberPresent, + m.memberIsLeader, + m.memberCount, + m.commitIndex, + m.appliedIndex, + m.lastContact, + ) + + return m +} + +type RaftObserver struct { + metrics *RaftMetrics + + mu sync.Mutex + leaderLabels map[string]prometheus.Labels + memberLabels map[string]map[string]prometheus.Labels + memberLeaders map[string]map[string]prometheus.Labels +} + +func newRaftObserver(metrics *RaftMetrics) *RaftObserver { + return &RaftObserver{ + metrics: metrics, + leaderLabels: map[string]prometheus.Labels{}, + memberLabels: map[string]map[string]prometheus.Labels{}, + memberLeaders: map[string]map[string]prometheus.Labels{}, + } +} + +// Start polls raft state and configuration on a fixed interval until ctx is canceled. +func (o *RaftObserver) Start(ctx context.Context, runtimes []RaftRuntime, interval time.Duration) { + if o == nil { + return + } + if interval <= 0 { + interval = defaultObserveInterval + } + o.ObserveOnce(runtimes) + ticker := time.NewTicker(interval) + go func() { + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + o.ObserveOnce(runtimes) + } + } + }() +} + +// ObserveOnce captures the latest raft state for all configured runtimes. +func (o *RaftObserver) ObserveOnce(runtimes []RaftRuntime) { + if o == nil { + return + } + for _, runtime := range runtimes { + o.observeRuntime(runtime) + } +} + +func (o *RaftObserver) observeRuntime(runtime RaftRuntime) { + if o == nil || o.metrics == nil || runtime.Raft == nil { + return + } + + group := strconv.FormatUint(runtime.GroupID, 10) + stats := runtime.Raft.Stats() + state := normalizeRaftState(stats["state"]) + for _, candidate := range raftStates { + value := 0.0 + if candidate == state { + value = 1 + } + o.metrics.localState.WithLabelValues(group, candidate).Set(value) + } + + o.metrics.commitIndex.WithLabelValues(group).Set(float64(parseUintMetric(stats["commit_index"]))) + o.metrics.appliedIndex.WithLabelValues(group).Set(float64(parseUintMetric(stats["applied_index"]))) + o.metrics.lastContact.WithLabelValues(group).Set(parseLastContactSeconds(stats["last_contact"])) + + leaderAddr, leaderID := runtime.Raft.LeaderWithID() + o.setLeaderMetric(group, string(leaderID), string(leaderAddr)) + + future := runtime.Raft.GetConfiguration() + if err := future.Error(); err != nil { + return + } + o.metrics.memberCount.WithLabelValues(group).Set(float64(len(future.Configuration().Servers))) + o.setMembers(group, string(leaderID), future.Configuration().Servers) +} + +func (o *RaftObserver) setLeaderMetric(group string, leaderID string, leaderAddress string) { + o.mu.Lock() + defer o.mu.Unlock() + + if previous, ok := o.leaderLabels[group]; ok { + o.metrics.leaderIdentity.Delete(previous) + delete(o.leaderLabels, group) + } + if leaderID == "" && leaderAddress == "" { + return + } + labels := prometheus.Labels{ + "group": group, + "leader_id": leaderID, + "leader_address": leaderAddress, + } + o.metrics.leaderIdentity.With(labels).Set(1) + o.leaderLabels[group] = labels +} + +func (o *RaftObserver) setMembers(group string, leaderID string, servers []raft.Server) { + o.mu.Lock() + defer o.mu.Unlock() + + previousMembers := o.memberLabels[group] + for _, labels := range previousMembers { + o.metrics.memberPresent.Delete(labels) + } + previousLeaders := o.memberLeaders[group] + for _, labels := range previousLeaders { + o.metrics.memberIsLeader.Delete(labels) + } + + nextMembers := make(map[string]prometheus.Labels, len(servers)) + nextLeaders := make(map[string]prometheus.Labels, len(servers)) + for _, server := range servers { + memberID := string(server.ID) + memberAddress := string(server.Address) + memberKey := fmt.Sprintf("%s|%s", memberID, memberAddress) + + memberLabels := prometheus.Labels{ + "group": group, + "member_id": memberID, + "member_address": memberAddress, + "suffrage": normalizeSuffrage(server.Suffrage), + } + o.metrics.memberPresent.With(memberLabels).Set(1) + nextMembers[memberKey] = memberLabels + + leaderLabels := prometheus.Labels{ + "group": group, + "member_id": memberID, + "member_address": memberAddress, + } + if memberID != "" && memberID == leaderID { + o.metrics.memberIsLeader.With(leaderLabels).Set(1) + } else { + o.metrics.memberIsLeader.With(leaderLabels).Set(0) + } + nextLeaders[memberKey] = leaderLabels + } + + o.memberLabels[group] = nextMembers + o.memberLeaders[group] = nextLeaders +} + +func normalizeRaftState(raw string) string { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "follower": + return "follower" + case "candidate": + return "candidate" + case "leader": + return "leader" + case "shutdown": + return "shutdown" + default: + return "unknown" + } +} + +func normalizeSuffrage(s raft.ServerSuffrage) string { + switch s { + case raft.Voter: + return "voter" + case raft.Nonvoter: + return "nonvoter" + case raft.Staging: + return "staging" + default: + return "unknown" + } +} + +func parseUintMetric(raw string) uint64 { + v, err := strconv.ParseUint(strings.TrimSpace(raw), 10, 64) + if err != nil { + return 0 + } + return v +} + +func parseLastContactSeconds(raw string) float64 { + raw = strings.TrimSpace(raw) + switch raw { + case "", "never": + return 0 + case "0": + return 0 + } + d, err := time.ParseDuration(raw) + if err != nil { + return 0 + } + return d.Seconds() +} diff --git a/monitoring/raft_test.go b/monitoring/raft_test.go new file mode 100644 index 00000000..4e48b875 --- /dev/null +++ b/monitoring/raft_test.go @@ -0,0 +1,41 @@ +package monitoring + +import ( + "strings" + "testing" + + "github.com/hashicorp/raft" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" +) + +func TestRaftObserverSetMembersReplacesRemovedMembers(t *testing.T) { + registry := NewRegistry("n1", "10.0.0.1:50051") + observer := registry.RaftObserver() + require.NotNil(t, observer) + + observer.setLeaderMetric("1", "n2", "10.0.0.2:50051") + observer.setMembers("1", "n2", []raft.Server{ + {ID: "n1", Address: "10.0.0.1:50051", Suffrage: raft.Voter}, + {ID: "n2", Address: "10.0.0.2:50051", Suffrage: raft.Voter}, + }) + observer.setLeaderMetric("1", "n1", "10.0.0.1:50051") + observer.setMembers("1", "n1", []raft.Server{ + {ID: "n1", Address: "10.0.0.1:50051", Suffrage: raft.Voter}, + }) + + err := testutil.GatherAndCompare( + registry.Gatherer(), + strings.NewReader(` +# HELP elastickv_raft_leader_identity Current leader identity for each raft group, as observed by this node. +# TYPE elastickv_raft_leader_identity gauge +elastickv_raft_leader_identity{group="1",leader_address="10.0.0.1:50051",leader_id="n1",node_address="10.0.0.1:50051",node_id="n1"} 1 +# HELP elastickv_raft_member_present Current raft configuration members known to this node. +# TYPE elastickv_raft_member_present gauge +elastickv_raft_member_present{group="1",member_address="10.0.0.1:50051",member_id="n1",node_address="10.0.0.1:50051",node_id="n1",suffrage="voter"} 1 +`), + "elastickv_raft_leader_identity", + "elastickv_raft_member_present", + ) + require.NoError(t, err) +} diff --git a/monitoring/registry.go b/monitoring/registry.go new file mode 100644 index 00000000..616ce23c --- /dev/null +++ b/monitoring/registry.go @@ -0,0 +1,68 @@ +package monitoring + +import ( + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +// Registry owns the Prometheus registry used by a single Elastickv node. +type Registry struct { + baseRegistry *prometheus.Registry + registerer prometheus.Registerer + gatherer prometheus.Gatherer + + dynamo *DynamoDBMetrics + raft *RaftMetrics +} + +// NewRegistry builds a registry with constant labels that identify the local node. +func NewRegistry(nodeID string, nodeAddress string) *Registry { + base := prometheus.NewRegistry() + registerer := prometheus.WrapRegistererWith(prometheus.Labels{ + "node_id": nodeID, + "node_address": nodeAddress, + }, base) + + r := &Registry{ + baseRegistry: base, + registerer: registerer, + gatherer: base, + } + r.dynamo = newDynamoDBMetrics(registerer) + r.raft = newRaftMetrics(registerer) + return r +} + +// Handler returns an HTTP handler that exposes the Prometheus scrape endpoint. +func (r *Registry) Handler() http.Handler { + if r == nil || r.gatherer == nil { + return promhttp.Handler() + } + return promhttp.HandlerFor(r.gatherer, promhttp.HandlerOpts{}) +} + +// Gatherer exposes the underlying gatherer for tests and custom exporters. +func (r *Registry) Gatherer() prometheus.Gatherer { + if r == nil { + return nil + } + return r.gatherer +} + +// DynamoDBObserver returns the DynamoDB request observer backed by this registry. +func (r *Registry) DynamoDBObserver() DynamoDBRequestObserver { + if r == nil { + return nil + } + return r.dynamo +} + +// RaftObserver returns the Raft topology observer backed by this registry. +func (r *Registry) RaftObserver() *RaftObserver { + if r == nil { + return nil + } + return newRaftObserver(r.raft) +} From a7d7a1af8ab7fc3a9fc0b1ceefbe6e810b0e51aa Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Tue, 10 Mar 2026 00:51:22 +0900 Subject: [PATCH 02/13] Record only valid DynamoDB metrics and log observability shutdown/parse failures --- adapter/dynamodb.go | 40 ++++++++--------------------- adapter/dynamodb_metrics_test.go | 18 +++++++++++-- cmd/server/demo.go | 38 +++++++++++++++++----------- main.go | 43 +++++++++++++++++++------------- monitoring/dynamodb.go | 18 +++++++++++++ monitoring/dynamodb_test.go | 25 +++++++++++++++++++ monitoring/raft.go | 6 +++++ 7 files changed, 124 insertions(+), 64 deletions(-) diff --git a/adapter/dynamodb.go b/adapter/dynamodb.go index 5867869e..93b762c3 100644 --- a/adapter/dynamodb.go +++ b/adapter/dynamodb.go @@ -124,8 +124,11 @@ func (c *countingReadCloser) Read(p []byte) (int, error) { } n, err := c.ReadCloser.Read(p) c.bytesRead += n - if err == nil || errors.Is(err, io.EOF) { - return n, err + if err == nil { + return n, nil + } + if errors.Is(err, io.EOF) { + return n, io.EOF } return n, errors.WithStack(err) } @@ -319,10 +322,10 @@ func (s *dynamoRequestMetricsState) addTableMetrics(table string, returnedItems if table == "" { return } - if s.tables == nil { - s.tables = make(map[string]monitoring.DynamoDBTableMetrics) + metrics, ok := s.tables[table] + if !ok { + return } - metrics := s.tables[table] metrics.ReturnedItems += returnedItems metrics.ScannedItems += scannedItems metrics.WrittenItems += writtenItems @@ -600,7 +603,6 @@ func (d *DynamoDBServer) createTable(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } - d.observeTables(r.Context(), in.TableName) unlock := d.lockTableOperations([]string{in.TableName}) defer unlock() schema, err := buildCreateTableSchema(in) @@ -612,6 +614,7 @@ func (d *DynamoDBServer) createTable(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } + d.observeTables(r.Context(), schema.TableName) writeDynamoJSON(w, map[string]any{ "TableDescription": map[string]any{ "TableName": in.TableName, @@ -770,7 +773,6 @@ func (d *DynamoDBServer) deleteTable(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } - d.observeTables(r.Context(), in.TableName) unlock := d.lockTableOperations([]string{in.TableName}) defer unlock() if err := d.deleteTableWithRetry(r.Context(), in.TableName); err != nil { @@ -945,7 +947,6 @@ func (d *DynamoDBServer) describeTable(w http.ResponseWriter, r *http.Request) { writeDynamoError(w, http.StatusBadRequest, dynamoErrValidation, "missing table name") return } - d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -1049,7 +1050,6 @@ func (d *DynamoDBServer) putItem(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } - d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -1224,7 +1224,6 @@ func (d *DynamoDBServer) getItem(w http.ResponseWriter, r *http.Request) { writeDynamoError(w, http.StatusBadRequest, dynamoErrValidation, "missing table name") return } - d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -1265,7 +1264,6 @@ func (d *DynamoDBServer) deleteItem(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } - d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -1396,7 +1394,6 @@ func (d *DynamoDBServer) updateItem(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } - d.observeTables(r.Context(), in.TableName) if err := d.ensureLegacyTableMigration(r.Context(), in.TableName); err != nil { writeDynamoErrorFromErr(w, err) return @@ -1723,7 +1720,6 @@ func (d *DynamoDBServer) query(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } - d.observeTables(r.Context(), in.TableName) out, err := d.queryItems(r.Context(), in) if err != nil { writeDynamoErrorFromErr(w, err) @@ -1747,7 +1743,6 @@ func (d *DynamoDBServer) scan(w http.ResponseWriter, r *http.Request) { writeDynamoErrorFromErr(w, err) return } - d.observeTables(r.Context(), in.TableName) out, err := d.scanItems(r.Context(), in) if err != nil { writeDynamoErrorFromErr(w, err) @@ -3525,7 +3520,6 @@ func (d *DynamoDBServer) batchWriteItem(w http.ResponseWriter, r *http.Request) writeDynamoErrorFromErr(w, err) return } - d.observeTables(r.Context(), batchWriteTableNames(in)...) unprocessed, err := d.batchWriteItems(r.Context(), in) if err != nil { writeDynamoErrorFromErr(w, err) @@ -3565,15 +3559,6 @@ func decodeBatchWriteItemInput(bodyReader io.Reader) (batchWriteItemInput, error return in, nil } -func batchWriteTableNames(in batchWriteItemInput) []string { - names := make([]string, 0, len(in.RequestItems)) - for table := range in.RequestItems { - names = append(names, table) - } - sort.Strings(names) - return names -} - func batchWriteCommittedCounts(in batchWriteItemInput, unprocessed map[string][]batchWriteRequest) map[string]int { out := make(map[string]int, len(in.RequestItems)) for table, requests := range in.RequestItems { @@ -3771,12 +3756,6 @@ func (d *DynamoDBServer) transactWriteItems(w http.ResponseWriter, r *http.Reque writeDynamoErrorFromErr(w, err) return } - tableNames, err := collectTransactWriteTableNames(in) - if err != nil { - writeDynamoErrorFromErr(w, err) - return - } - d.observeTables(r.Context(), tableNames...) if err := d.transactWriteItemsWithRetry(r.Context(), in); err != nil { writeDynamoErrorFromErr(w, err) return @@ -7382,6 +7361,7 @@ func (d *DynamoDBServer) loadTableSchemaAt(ctx context.Context, tableName string if err := json.Unmarshal(b, schema); err != nil { return nil, false, errors.WithStack(err) } + d.observeTables(ctx, schema.TableName) return schema, true, nil } diff --git a/adapter/dynamodb_metrics_test.go b/adapter/dynamodb_metrics_test.go index c47b28b2..c76e4cf6 100644 --- a/adapter/dynamodb_metrics_test.go +++ b/adapter/dynamodb_metrics_test.go @@ -1,6 +1,7 @@ package adapter import ( + "context" "io" "net/http" "net/http/httptest" @@ -24,7 +25,7 @@ func TestDynamoDBServerHandleObservesSuccessMetrics(t *testing.T) { }, } - req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(`{"TableName":"orders"}`)) + req := httptest.NewRequestWithContext(context.Background(), http.MethodPost, "/", strings.NewReader(`{"TableName":"orders"}`)) req.Header.Set("X-Amz-Target", putItemTarget) rec := httptest.NewRecorder() server.handle(rec, req) @@ -57,7 +58,7 @@ func TestDynamoDBServerHandleObservesConditionalFailures(t *testing.T) { }, } - req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(`{"TableName":"orders"}`)) + req := httptest.NewRequestWithContext(context.Background(), http.MethodPost, "/", strings.NewReader(`{"TableName":"orders"}`)) req.Header.Set("X-Amz-Target", updateItemTarget) rec := httptest.NewRecorder() server.handle(rec, req) @@ -74,3 +75,16 @@ elastickv_dynamodb_conditional_check_failed_total{node_address="10.0.0.1:50051", ) require.NoError(t, err) } + +func TestDynamoRequestMetricsStateIgnoresUnknownTableMetrics(t *testing.T) { + state := &dynamoRequestMetricsState{} + + state.addTableMetrics("orders", 0, 0, 1) + require.Nil(t, state.tableMetrics()) + + state.recordTable("orders") + state.addTableMetrics("orders", 0, 0, 1) + require.Equal(t, map[string]monitoring.DynamoDBTableMetrics{ + "orders": {WrittenItems: 1}, + }, state.tableMetrics()) +} diff --git a/cmd/server/demo.go b/cmd/server/demo.go index e78ac8b1..9eeeb0f8 100644 --- a/cmd/server/demo.go +++ b/cmd/server/demo.go @@ -51,6 +51,7 @@ const ( joinWait = 3 * time.Second joinRetryInterval = 1 * time.Second joinRPCTimeout = 3 * time.Second + raftObserveInterval = 5 * time.Second ) func init() { @@ -368,20 +369,8 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { return errors.WithStack(err) } - if cfg.raftBootstrap { - cfg := raft.Configuration{ - Servers: []raft.Server{ - { - Suffrage: raft.Voter, - ID: raft.ServerID(cfg.raftID), - Address: raft.ServerAddress(cfg.address), - }, - }, - } - f := r.BootstrapCluster(cfg) - if err := f.Error(); err != nil && !errors.Is(err, raft.ErrCantBootstrap) { - return errors.WithStack(err) - } + if err := bootstrapClusterIfNeeded(r, cfg); err != nil { + return err } trx := kv.NewTransaction(r) @@ -397,7 +386,7 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { adapter.WithDistributionCoordinator(coordinator), ) metricsRegistry := monitoring.NewRegistry(cfg.raftID, cfg.address) - metricsRegistry.RaftObserver().Start(ctx, []monitoring.RaftRuntime{{GroupID: 1, Raft: r}}, 5*time.Second) + metricsRegistry.RaftObserver().Start(ctx, []monitoring.RaftRuntime{{GroupID: 1, Raft: r}}, raftObserveInterval) s, grpcSvc := setupGRPC(r, st, tm, coordinator, distServer) @@ -442,6 +431,25 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { return nil } +func bootstrapClusterIfNeeded(r *raft.Raft, cfg config) error { + if !cfg.raftBootstrap { + return nil + } + bootstrapCfg := raft.Configuration{ + Servers: []raft.Server{ + { + Suffrage: raft.Voter, + ID: raft.ServerID(cfg.raftID), + Address: raft.ServerAddress(cfg.address), + }, + }, + } + if err := r.BootstrapCluster(bootstrapCfg).Error(); err != nil && !errors.Is(err, raft.ErrCantBootstrap) { + return errors.WithStack(err) + } + return nil +} + func catalogWatcherTask(ctx context.Context, distCatalog *distribution.CatalogStore, distEngine *distribution.Engine) func() error { return func() error { if err := distribution.RunCatalogWatcher(ctx, distCatalog, distEngine, slog.Default()); err != nil { diff --git a/main.go b/main.go index a0d6eb24..7f05d5df 100644 --- a/main.go +++ b/main.go @@ -26,9 +26,9 @@ import ( ) const ( - heartbeatTimeout = 200 * time.Millisecond - electionTimeout = 2000 * time.Millisecond - leaderLease = 100 * time.Millisecond + heartbeatTimeout = 200 * time.Millisecond + electionTimeout = 2000 * time.Millisecond + leaderLease = 100 * time.Millisecond raftMetricsObserveInterval = 5 * time.Second ) @@ -396,24 +396,33 @@ func startMetricsServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup. ReadHeaderTimeout: time.Second, } eg.Go(func() error { - stop := make(chan struct{}) - go func() { - select { - case <-ctx.Done(): - _ = metricsServer.Shutdown(context.Background()) - case <-stop: - } - }() - err := metricsServer.Serve(metricsL) - close(stop) - if errors.Is(err, http.ErrServerClosed) || errors.Is(err, net.ErrClosed) { - return nil - } - return errors.WithStack(err) + return serveMetricsUntilCanceled(ctx, metricsServer, metricsL, metricsAddr) }) return nil } +func serveMetricsUntilCanceled(ctx context.Context, server *http.Server, listener net.Listener, metricsAddr string) error { + stop := make(chan struct{}) + go watchMetricsShutdown(ctx, server, stop, metricsAddr) + + err := server.Serve(listener) + close(stop) + if errors.Is(err, http.ErrServerClosed) || errors.Is(err, net.ErrClosed) { + return nil + } + return errors.WithStack(err) +} + +func watchMetricsShutdown(ctx context.Context, server *http.Server, stop <-chan struct{}, metricsAddr string) { + select { + case <-ctx.Done(): + if err := server.Shutdown(context.Background()); err != nil && !errors.Is(err, http.ErrServerClosed) && !errors.Is(err, net.ErrClosed) { + log.Printf("metrics server shutdown error on %s: %v", metricsAddr, err) + } + case <-stop: + } +} + func distributionCatalogStoreForGroup(runtimes []*raftGroupRuntime, groupID uint64) *distribution.CatalogStore { for _, rt := range runtimes { if rt == nil || rt.store == nil { diff --git a/monitoring/dynamodb.go b/monitoring/dynamodb.go index 8d8eacaf..722195c5 100644 --- a/monitoring/dynamodb.go +++ b/monitoring/dynamodb.go @@ -17,6 +17,21 @@ const ( dynamoOperationUnknown = "unknown" ) +var dynamoOperations = []string{ + "BatchWriteItem", + "CreateTable", + "DeleteItem", + "DeleteTable", + "DescribeTable", + "GetItem", + "ListTables", + "PutItem", + "Query", + "Scan", + "TransactWriteItems", + "UpdateItem", +} + // DynamoDBRequestObserver records per-request DynamoDB-compatible API metrics. type DynamoDBRequestObserver interface { ObserveInFlightChange(operation string, delta float64) @@ -197,6 +212,9 @@ func normalizeDynamoOperation(operation string) string { if operation == "" { return dynamoOperationUnknown } + if !slices.Contains(dynamoOperations, operation) { + return dynamoOperationUnknown + } return operation } diff --git a/monitoring/dynamodb_test.go b/monitoring/dynamodb_test.go index 291ef220..e07f784b 100644 --- a/monitoring/dynamodb_test.go +++ b/monitoring/dynamodb_test.go @@ -72,3 +72,28 @@ elastickv_dynamodb_conditional_check_failed_total{node_address="10.0.0.1:50051", ) require.NoError(t, err) } + +func TestDynamoDBMetricsNormalizesUnknownOperation(t *testing.T) { + registry := NewRegistry("n1", "10.0.0.1:50051") + metrics, ok := registry.DynamoDBObserver().(*DynamoDBMetrics) + require.True(t, ok) + + metrics.ObserveInFlightChange("InjectedOperation", 1) + metrics.ObserveInFlightChange("InjectedOperation", -1) + metrics.ObserveDynamoDBRequest(DynamoDBRequestReport{ + Operation: "InjectedOperation", + HTTPStatus: 200, + Duration: time.Millisecond, + }) + + err := testutil.GatherAndCompare( + registry.Gatherer(), + strings.NewReader(` +# HELP elastickv_dynamodb_requests_total Total number of DynamoDB-compatible API requests by operation and outcome. +# TYPE elastickv_dynamodb_requests_total counter +elastickv_dynamodb_requests_total{node_address="10.0.0.1:50051",node_id="n1",operation="unknown",outcome="success"} 1 +`), + "elastickv_dynamodb_requests_total", + ) + require.NoError(t, err) +} diff --git a/monitoring/raft.go b/monitoring/raft.go index c0b1d161..56606a81 100644 --- a/monitoring/raft.go +++ b/monitoring/raft.go @@ -3,6 +3,7 @@ package monitoring import ( "context" "fmt" + "log/slog" "strconv" "strings" "sync" @@ -20,6 +21,8 @@ var raftStates = []string{ "unknown", } +var loggedLastContactParseValues sync.Map + const defaultObserveInterval = 5 * time.Second // RaftRuntime describes a raft group observed by the metrics exporter. @@ -307,6 +310,9 @@ func parseLastContactSeconds(raw string) float64 { } d, err := time.ParseDuration(raw) if err != nil { + if _, loaded := loggedLastContactParseValues.LoadOrStore(raw, struct{}{}); !loaded { + slog.Warn("failed to parse raft last_contact metric", "raw", raw, "err", err) + } return 0 } return d.Seconds() From ea78680f242d4166e33a2a58cd09b1f1ed72da97 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Tue, 10 Mar 2026 00:59:49 +0900 Subject: [PATCH 03/13] Add metrics shutdown timeout with context cancel --- main.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main.go b/main.go index 7f05d5df..2adec99d 100644 --- a/main.go +++ b/main.go @@ -30,6 +30,7 @@ const ( electionTimeout = 2000 * time.Millisecond leaderLease = 100 * time.Millisecond raftMetricsObserveInterval = 5 * time.Second + metricsShutdownTimeout = 5 * time.Second ) var ( @@ -416,7 +417,9 @@ func serveMetricsUntilCanceled(ctx context.Context, server *http.Server, listene func watchMetricsShutdown(ctx context.Context, server *http.Server, stop <-chan struct{}, metricsAddr string) { select { case <-ctx.Done(): - if err := server.Shutdown(context.Background()); err != nil && !errors.Is(err, http.ErrServerClosed) && !errors.Is(err, net.ErrClosed) { + shutdownCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), metricsShutdownTimeout) + defer cancel() + if err := server.Shutdown(shutdownCtx); err != nil && !errors.Is(err, http.ErrServerClosed) && !errors.Is(err, net.ErrClosed) { log.Printf("metrics server shutdown error on %s: %v", metricsAddr, err) } case <-stop: From 3879c19f2aa600590620469a1519a2602b3ac684 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Tue, 10 Mar 2026 01:04:06 +0900 Subject: [PATCH 04/13] Add deferred cancel call in run function --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index 2adec99d..f557d341 100644 --- a/main.go +++ b/main.go @@ -78,10 +78,10 @@ func run() error { } ctx, cancel := context.WithCancel(context.Background()) + defer cancel() clock := kv.NewHLC() shardStore := kv.NewShardStore(cfg.engine, shardGroups) defer func() { - cancel() _ = shardStore.Close() for _, rt := range runtimes { rt.Close() From 19110e0ba562587b84c022c368d33e6a1b3eb421 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Tue, 10 Mar 2026 01:11:29 +0900 Subject: [PATCH 05/13] Refactor DynamoDB operations and metrics handling --- adapter/dynamodb.go | 22 ++++++++++++++++++---- adapter/dynamodb_metrics_test.go | 6 ++++++ monitoring/dynamodb.go | 30 +----------------------------- monitoring/dynamodb_test.go | 2 +- 4 files changed, 26 insertions(+), 34 deletions(-) diff --git a/adapter/dynamodb.go b/adapter/dynamodb.go index 93b762c3..d6075ece 100644 --- a/adapter/dynamodb.go +++ b/adapter/dynamodb.go @@ -70,6 +70,21 @@ const ( dynamoOrderedKeyEncodingV2 = 2 ) +var dynamoOperationTargets = map[string]string{ + batchWriteItemTarget: "BatchWriteItem", + createTableTarget: "CreateTable", + deleteItemTarget: "DeleteItem", + deleteTableTarget: "DeleteTable", + describeTableTarget: "DescribeTable", + getItemTarget: "GetItem", + listTablesTarget: "ListTables", + putItemTarget: "PutItem", + queryTarget: "Query", + scanTarget: "Scan", + transactWriteItemsTarget: "TransactWriteItems", + updateItemTarget: "UpdateItem", +} + const ( dynamoErrValidation = "ValidationException" dynamoErrInternal = "InternalServerError" @@ -283,11 +298,10 @@ func (d *DynamoDBServer) dispatchByTarget(target string, w http.ResponseWriter, } func dynamoOperationName(target string) string { - op := strings.TrimSpace(strings.TrimPrefix(target, targetPrefix)) - if op == "" || op == target { - return "unknown" + if operation, ok := dynamoOperationTargets[strings.TrimSpace(target)]; ok { + return operation } - return op + return "unknown" } func dynamoRequestMetricsFromContext(ctx context.Context) *dynamoRequestMetricsState { diff --git a/adapter/dynamodb_metrics_test.go b/adapter/dynamodb_metrics_test.go index c76e4cf6..08982b0c 100644 --- a/adapter/dynamodb_metrics_test.go +++ b/adapter/dynamodb_metrics_test.go @@ -88,3 +88,9 @@ func TestDynamoRequestMetricsStateIgnoresUnknownTableMetrics(t *testing.T) { "orders": {WrittenItems: 1}, }, state.tableMetrics()) } + +func TestDynamoOperationNameRejectsUnknownTargets(t *testing.T) { + require.Equal(t, "PutItem", dynamoOperationName(putItemTarget)) + require.Equal(t, "unknown", dynamoOperationName(targetPrefix+"PutItem-random")) + require.Equal(t, "unknown", dynamoOperationName("random")) +} diff --git a/monitoring/dynamodb.go b/monitoring/dynamodb.go index 722195c5..8ed05eae 100644 --- a/monitoring/dynamodb.go +++ b/monitoring/dynamodb.go @@ -204,7 +204,7 @@ func (m *DynamoDBMetrics) ObserveDynamoDBRequest(report DynamoDBRequestReport) { } m.observeRequest(operation, outcome, report) - m.observeTables(operation, outcome, mergeReportTables(report.Tables, tableMetrics), tableMetrics) + m.observeTables(operation, outcome, report.Tables, tableMetrics) } func normalizeDynamoOperation(operation string) string { @@ -225,16 +225,6 @@ func (m *DynamoDBMetrics) observeRequest(operation string, outcome string, repor m.responseSize.WithLabelValues(operation, outcome).Observe(float64(max(report.ResponseBytes, 0))) } -func mergeReportTables(tables []string, tableMetrics map[string]DynamoDBTableMetrics) []string { - merged := append([]string(nil), tables...) - for table := range tableMetrics { - if table = strings.TrimSpace(table); table != "" { - merged = append(merged, table) - } - } - return normalizedTables(merged) -} - func (m *DynamoDBMetrics) observeTables(operation string, outcome string, tables []string, tableMetrics map[string]DynamoDBTableMetrics) { for _, table := range tables { m.tableRequests.WithLabelValues(operation, table, outcome).Inc() @@ -266,24 +256,6 @@ func (m *DynamoDBMetrics) observeTableOutcome(operation string, table string, ou } } -func normalizedTables(tables []string) []string { - out := make([]string, 0, len(tables)) - seen := make(map[string]struct{}, len(tables)) - for _, table := range tables { - table = strings.TrimSpace(table) - if table == "" { - continue - } - if _, ok := seen[table]; ok { - continue - } - seen[table] = struct{}{} - out = append(out, table) - } - slices.Sort(out) - return out -} - func classifyDynamoOutcome(status int, errorType string) string { if strings.EqualFold(strings.TrimSpace(errorType), "ConditionalCheckFailedException") { return dynamoOutcomeConditionalCheckFailed diff --git a/monitoring/dynamodb_test.go b/monitoring/dynamodb_test.go index e07f784b..5f5965b2 100644 --- a/monitoring/dynamodb_test.go +++ b/monitoring/dynamodb_test.go @@ -22,7 +22,7 @@ func TestDynamoDBMetricsObserveRequest(t *testing.T) { Duration: 12 * time.Millisecond, RequestBytes: 256, ResponseBytes: 64, - Tables: []string{"orders", "orders"}, + Tables: []string{"orders"}, TableMetrics: map[string]DynamoDBTableMetrics{ "orders": {WrittenItems: 1}, }, From 98851e3c9fbcbb1abd8ed5ad4023e89aa351dc21 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Tue, 10 Mar 2026 01:27:17 +0900 Subject: [PATCH 06/13] Update main.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index f557d341..2adec99d 100644 --- a/main.go +++ b/main.go @@ -78,10 +78,10 @@ func run() error { } ctx, cancel := context.WithCancel(context.Background()) - defer cancel() clock := kv.NewHLC() shardStore := kv.NewShardStore(cfg.engine, shardGroups) defer func() { + cancel() _ = shardStore.Close() for _, rt := range runtimes { rt.Close() From b9561055073b7c7f69ccf9034ed473b4bbf1cf08 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Tue, 10 Mar 2026 02:26:24 +0900 Subject: [PATCH 07/13] Address review feedback on metrics handling --- adapter/dynamodb.go | 15 +++++++++------ main.go | 1 + 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/adapter/dynamodb.go b/adapter/dynamodb.go index d6075ece..32194952 100644 --- a/adapter/dynamodb.go +++ b/adapter/dynamodb.go @@ -251,9 +251,7 @@ func (d *DynamoDBServer) Stop() { func (d *DynamoDBServer) handle(w http.ResponseWriter, r *http.Request) { target := r.Header.Get("X-Amz-Target") if d.requestObserver == nil { - if !d.dispatchByTarget(target, w, r) { - writeDynamoError(w, http.StatusBadRequest, dynamoErrValidation, "unsupported operation") - } + d.dispatchOrWriteUnsupported(target, w, r) return } @@ -268,9 +266,7 @@ func (d *DynamoDBServer) handle(w http.ResponseWriter, r *http.Request) { recorder := &dynamoResponseRecorder{ResponseWriter: w} started := time.Now() - if !d.dispatchByTarget(target, recorder, r) { - writeDynamoError(recorder, http.StatusBadRequest, dynamoErrValidation, "unsupported operation") - } + d.dispatchOrWriteUnsupported(target, recorder, r) d.requestObserver.ObserveDynamoDBRequest(monitoring.DynamoDBRequestReport{ Operation: operation, @@ -297,6 +293,13 @@ func (d *DynamoDBServer) dispatchByTarget(target string, w http.ResponseWriter, return true } +func (d *DynamoDBServer) dispatchOrWriteUnsupported(target string, w http.ResponseWriter, r *http.Request) { + if d.dispatchByTarget(target, w, r) { + return + } + writeDynamoError(w, http.StatusBadRequest, dynamoErrValidation, "unsupported operation") +} + func dynamoOperationName(target string) string { if operation, ok := dynamoOperationTargets[strings.TrimSpace(target)]; ok { return operation diff --git a/main.go b/main.go index 2adec99d..ae8c2207 100644 --- a/main.go +++ b/main.go @@ -87,6 +87,7 @@ func run() error { rt.Close() } }() + defer cancel() coordinate := kv.NewShardedCoordinator(cfg.engine, shardGroups, cfg.defaultGroup, clock, shardStore) distCatalog, err := setupDistributionCatalog(ctx, runtimes, cfg.engine) if err != nil { From e3f90e2c2189c008e31015e49d2b3b664b98c6e6 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Tue, 10 Mar 2026 02:27:34 +0900 Subject: [PATCH 08/13] Fix cleanup order after review updates --- main.go | 1 - 1 file changed, 1 deletion(-) diff --git a/main.go b/main.go index ae8c2207..49f3d803 100644 --- a/main.go +++ b/main.go @@ -81,7 +81,6 @@ func run() error { clock := kv.NewHLC() shardStore := kv.NewShardStore(cfg.engine, shardGroups) defer func() { - cancel() _ = shardStore.Close() for _, rt := range runtimes { rt.Close() From e119e611917eef84eb80de6fa8edc30ee33f805c Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Wed, 11 Mar 2026 00:01:07 +0900 Subject: [PATCH 09/13] Harden metrics exposure and label tracking --- adapter/dynamodb.go | 6 +-- adapter/dynamodb_metrics_test.go | 6 +-- cmd/server/demo.go | 31 ++++++++++--- main.go | 12 +++-- monitoring/dynamodb.go | 75 +++++++++++++++++++++++++++++++- monitoring/dynamodb_test.go | 36 +++++++++++++++ monitoring/http.go | 45 +++++++++++++++++++ monitoring/http_test.go | 35 +++++++++++++++ 8 files changed, 226 insertions(+), 20 deletions(-) create mode 100644 monitoring/http.go create mode 100644 monitoring/http_test.go diff --git a/adapter/dynamodb.go b/adapter/dynamodb.go index 32194952..9c26ae8a 100644 --- a/adapter/dynamodb.go +++ b/adapter/dynamodb.go @@ -339,10 +339,8 @@ func (s *dynamoRequestMetricsState) addTableMetrics(table string, returnedItems if table == "" { return } - metrics, ok := s.tables[table] - if !ok { - return - } + s.recordTable(table) + metrics := s.tables[table] metrics.ReturnedItems += returnedItems metrics.ScannedItems += scannedItems metrics.WrittenItems += writtenItems diff --git a/adapter/dynamodb_metrics_test.go b/adapter/dynamodb_metrics_test.go index 08982b0c..e170c805 100644 --- a/adapter/dynamodb_metrics_test.go +++ b/adapter/dynamodb_metrics_test.go @@ -76,13 +76,9 @@ elastickv_dynamodb_conditional_check_failed_total{node_address="10.0.0.1:50051", require.NoError(t, err) } -func TestDynamoRequestMetricsStateIgnoresUnknownTableMetrics(t *testing.T) { +func TestDynamoRequestMetricsStateAddsMetricsWithoutExplicitTableRegistration(t *testing.T) { state := &dynamoRequestMetricsState{} - state.addTableMetrics("orders", 0, 0, 1) - require.Nil(t, state.tableMetrics()) - - state.recordTable("orders") state.addTableMetrics("orders", 0, 0, 1) require.Equal(t, map[string]monitoring.DynamoDBTableMetrics{ "orders": {WrittenItems: 1}, diff --git a/cmd/server/demo.go b/cmd/server/demo.go index 9eeeb0f8..640f66ee 100644 --- a/cmd/server/demo.go +++ b/cmd/server/demo.go @@ -36,7 +36,8 @@ var ( address = flag.String("address", ":50051", "gRPC/Raft address") redisAddress = flag.String("redisAddress", ":6379", "Redis address") dynamoAddress = flag.String("dynamoAddress", ":8000", "DynamoDB-compatible API address") - metricsAddress = flag.String("metricsAddress", ":9090", "Prometheus metrics address") + metricsAddress = flag.String("metricsAddress", "127.0.0.1:9090", "Prometheus metrics address") + metricsToken = flag.String("metricsToken", "", "Bearer token for Prometheus metrics; required for non-loopback metricsAddress") raftID = flag.String("raftId", "", "Raft ID") raftDataDir = flag.String("raftDataDir", "/var/lib/elastickv", "Raft data directory") raftBootstrap = flag.Bool("raftBootstrap", false, "Bootstrap cluster") @@ -65,6 +66,7 @@ type config struct { redisAddress string dynamoAddress string metricsAddress string + metricsToken string raftID string raftDataDir string raftBootstrap bool @@ -83,6 +85,7 @@ func main() { redisAddress: *redisAddress, dynamoAddress: *dynamoAddress, metricsAddress: *metricsAddress, + metricsToken: *metricsToken, raftID: *raftID, raftDataDir: *raftDataDir, raftBootstrap: *raftBootstrap, @@ -101,6 +104,7 @@ func main() { redisAddress: "127.0.0.1:63791", dynamoAddress: "127.0.0.1:63801", metricsAddress: "127.0.0.1:9091", + metricsToken: *metricsToken, raftID: "n1", raftDataDir: "", // In-memory raftBootstrap: true, @@ -110,6 +114,7 @@ func main() { redisAddress: "127.0.0.1:63792", dynamoAddress: "127.0.0.1:63802", metricsAddress: "127.0.0.1:9092", + metricsToken: *metricsToken, raftID: "n2", raftDataDir: "", raftBootstrap: false, @@ -119,6 +124,7 @@ func main() { redisAddress: "127.0.0.1:63793", dynamoAddress: "127.0.0.1:63803", metricsAddress: "127.0.0.1:9093", + metricsToken: *metricsToken, raftID: "n3", raftDataDir: "", raftBootstrap: false, @@ -409,13 +415,9 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { coordinator, adapter.WithDynamoDBRequestObserver(metricsRegistry.DynamoDBObserver()), ) - metricsL, err := lc.Listen(ctx, "tcp", cfg.metricsAddress) + metricsL, ms, err := setupMetricsHTTPServer(ctx, lc, cfg.metricsAddress, cfg.metricsToken, metricsRegistry.Handler()) if err != nil { - return errors.WithStack(err) - } - ms := &http.Server{ - Handler: metricsRegistry.Handler(), - ReadHeaderTimeout: time.Second, + return err } eg.Go(catalogWatcherTask(ctx, distCatalog, distEngine)) @@ -431,6 +433,21 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { return nil } +func setupMetricsHTTPServer(ctx context.Context, lc net.ListenConfig, metricsAddress string, metricsToken string, handler http.Handler) (net.Listener, *http.Server, error) { + if monitoring.MetricsAddressRequiresToken(metricsAddress) && strings.TrimSpace(metricsToken) == "" { + return nil, nil, errors.New("metricsToken is required when metricsAddress is not loopback") + } + metricsL, err := lc.Listen(ctx, "tcp", metricsAddress) + if err != nil { + return nil, nil, errors.WithStack(err) + } + ms := &http.Server{ + Handler: monitoring.ProtectHandler(handler, metricsToken), + ReadHeaderTimeout: time.Second, + } + return metricsL, ms, nil +} + func bootstrapClusterIfNeeded(r *raft.Raft, cfg config) error { if !cfg.raftBootstrap { return nil diff --git a/main.go b/main.go index 49f3d803..5da96ff9 100644 --- a/main.go +++ b/main.go @@ -38,6 +38,7 @@ var ( redisAddr = flag.String("redisAddress", "localhost:6379", "TCP host+port for redis") dynamoAddr = flag.String("dynamoAddress", "localhost:8000", "TCP host+port for DynamoDB-compatible API") metricsAddr = flag.String("metricsAddress", "localhost:9090", "TCP host+port for Prometheus metrics") + metricsToken = flag.String("metricsToken", "", "Bearer token for Prometheus metrics; required for non-loopback metricsAddress") raftId = flag.String("raftId", "", "Node id used by Raft") raftDir = flag.String("raftDataDir", "data/", "Raft data dir") raftBootstrap = flag.Bool("raftBootstrap", false, "Whether to bootstrap the Raft cluster") @@ -117,6 +118,7 @@ func run() error { leaderRedis: cfg.leaderRedis, dynamoAddress: *dynamoAddr, metricsAddress: *metricsAddr, + metricsToken: *metricsToken, metricsRegistry: metricsRegistry, } if err := runner.start(); err != nil { @@ -384,16 +386,19 @@ func startDynamoDBServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup return nil } -func startMetricsServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup.Group, metricsAddr string, handler http.Handler) error { +func startMetricsServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup.Group, metricsAddr string, metricsToken string, handler http.Handler) error { if strings.TrimSpace(metricsAddr) == "" || handler == nil { return nil } + if monitoring.MetricsAddressRequiresToken(metricsAddr) && strings.TrimSpace(metricsToken) == "" { + return errors.New("metricsToken is required when metricsAddress is not loopback") + } metricsL, err := lc.Listen(ctx, "tcp", metricsAddr) if err != nil { return errors.Wrapf(err, "failed to listen on %s", metricsAddr) } metricsServer := &http.Server{ - Handler: handler, + Handler: monitoring.ProtectHandler(handler, metricsToken), ReadHeaderTimeout: time.Second, } eg.Go(func() error { @@ -503,6 +508,7 @@ type runtimeServerRunner struct { leaderRedis map[raft.ServerAddress]string dynamoAddress string metricsAddress string + metricsToken string metricsRegistry *monitoring.Registry } @@ -516,7 +522,7 @@ func (r runtimeServerRunner) start() error { if err := startDynamoDBServer(r.ctx, r.lc, r.eg, r.dynamoAddress, r.shardStore, r.coordinate, r.metricsRegistry); err != nil { return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) } - if err := startMetricsServer(r.ctx, r.lc, r.eg, r.metricsAddress, r.metricsRegistry.Handler()); err != nil { + if err := startMetricsServer(r.ctx, r.lc, r.eg, r.metricsAddress, r.metricsToken, r.metricsRegistry.Handler()); err != nil { return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) } return nil diff --git a/monitoring/dynamodb.go b/monitoring/dynamodb.go index 8ed05eae..ebabfe38 100644 --- a/monitoring/dynamodb.go +++ b/monitoring/dynamodb.go @@ -4,6 +4,7 @@ import ( "net/http" "slices" "strings" + "sync" "time" "github.com/prometheus/client_golang/prometheus" @@ -15,6 +16,8 @@ const ( dynamoOutcomeSystemError = "system_error" dynamoOutcomeConditionalCheckFailed = "conditional_check_failed" dynamoOperationUnknown = "unknown" + dynamoTableOverflow = "_other" + dynamoMaxTrackedTables = 512 ) var dynamoOperations = []string{ @@ -70,6 +73,9 @@ type DynamoDBMetrics struct { userErrors *prometheus.CounterVec systemErrors *prometheus.CounterVec conditionalFails *prometheus.CounterVec + + mu sync.Mutex + trackedTables map[string]struct{} } func newDynamoDBMetrics(registerer prometheus.Registerer) *DynamoDBMetrics { @@ -161,6 +167,7 @@ func newDynamoDBMetrics(registerer prometheus.Registerer) *DynamoDBMetrics { }, []string{"operation", "table"}, ), + trackedTables: make(map[string]struct{}), } registerer.MustRegister( @@ -202,9 +209,10 @@ func (m *DynamoDBMetrics) ObserveDynamoDBRequest(report DynamoDBRequestReport) { if tableMetrics == nil { tableMetrics = map[string]DynamoDBTableMetrics{} } + tables, tableMetrics := m.normalizeReportTables(report.Tables, tableMetrics) m.observeRequest(operation, outcome, report) - m.observeTables(operation, outcome, report.Tables, tableMetrics) + m.observeTables(operation, outcome, tables, tableMetrics) } func normalizeDynamoOperation(operation string) string { @@ -233,6 +241,71 @@ func (m *DynamoDBMetrics) observeTables(operation string, outcome string, tables } } +func (m *DynamoDBMetrics) normalizeReportTables(tables []string, tableMetrics map[string]DynamoDBTableMetrics) ([]string, map[string]DynamoDBTableMetrics) { + if m == nil { + return nil, nil + } + normalized := make([]string, 0, len(tables)+len(tableMetrics)) + seen := make(map[string]struct{}, len(tables)+len(tableMetrics)) + aggregated := make(map[string]DynamoDBTableMetrics, len(tableMetrics)) + + addTable := func(table string) { + table = m.normalizeTable(table) + if table == "" { + return + } + if _, ok := seen[table]; ok { + return + } + seen[table] = struct{}{} + normalized = append(normalized, table) + } + + for _, table := range tables { + addTable(table) + } + for table, metrics := range tableMetrics { + table = m.normalizeTable(table) + if table == "" { + continue + } + addTable(table) + current := aggregated[table] + current.ReturnedItems += metrics.ReturnedItems + current.ScannedItems += metrics.ScannedItems + current.WrittenItems += metrics.WrittenItems + aggregated[table] = current + } + + slices.Sort(normalized) + return normalized, aggregated +} + +func (m *DynamoDBMetrics) normalizeTable(table string) string { + if m == nil { + return "" + } + table = strings.TrimSpace(table) + if table == "" { + return "" + } + if table == dynamoTableOverflow { + return dynamoTableOverflow + } + + m.mu.Lock() + defer m.mu.Unlock() + + if _, ok := m.trackedTables[table]; ok { + return table + } + if len(m.trackedTables) >= dynamoMaxTrackedTables { + return dynamoTableOverflow + } + m.trackedTables[table] = struct{}{} + return table +} + func (m *DynamoDBMetrics) observeTableActivity(operation string, table string, metrics DynamoDBTableMetrics) { if metrics.ReturnedItems > 0 { m.returnedItems.WithLabelValues(operation, table).Add(float64(metrics.ReturnedItems)) diff --git a/monitoring/dynamodb_test.go b/monitoring/dynamodb_test.go index 5f5965b2..27501691 100644 --- a/monitoring/dynamodb_test.go +++ b/monitoring/dynamodb_test.go @@ -1,6 +1,8 @@ package monitoring import ( + "net/http" + "strconv" "strings" "testing" "time" @@ -97,3 +99,37 @@ elastickv_dynamodb_requests_total{node_address="10.0.0.1:50051",node_id="n1",ope ) require.NoError(t, err) } + +func TestDynamoDBMetricsCollapsesOverflowTables(t *testing.T) { + registry := NewRegistry("n1", "10.0.0.1:50051") + metrics, ok := registry.DynamoDBObserver().(*DynamoDBMetrics) + require.True(t, ok) + + for i := 0; i < dynamoMaxTrackedTables; i++ { + metrics.trackedTables["table-"+strconv.Itoa(i)] = struct{}{} + } + + metrics.ObserveDynamoDBRequest(DynamoDBRequestReport{ + Operation: "PutItem", + HTTPStatus: http.StatusOK, + Tables: []string{"orders-overflow"}, + TableMetrics: map[string]DynamoDBTableMetrics{ + "orders-overflow": {WrittenItems: 2}, + }, + }) + + err := testutil.GatherAndCompare( + registry.Gatherer(), + strings.NewReader(` +# HELP elastickv_dynamodb_table_requests_total Total number of table-scoped DynamoDB-compatible API requests by operation and outcome. +# TYPE elastickv_dynamodb_table_requests_total counter +elastickv_dynamodb_table_requests_total{node_address="10.0.0.1:50051",node_id="n1",operation="PutItem",outcome="success",table="_other"} 1 +# HELP elastickv_dynamodb_written_items_total Total number of items written or deleted by DynamoDB-compatible write APIs. +# TYPE elastickv_dynamodb_written_items_total counter +elastickv_dynamodb_written_items_total{node_address="10.0.0.1:50051",node_id="n1",operation="PutItem",table="_other"} 2 +`), + "elastickv_dynamodb_table_requests_total", + "elastickv_dynamodb_written_items_total", + ) + require.NoError(t, err) +} diff --git a/monitoring/http.go b/monitoring/http.go new file mode 100644 index 00000000..cc3ce6fe --- /dev/null +++ b/monitoring/http.go @@ -0,0 +1,45 @@ +package monitoring + +import ( + "crypto/subtle" + "net" + "net/http" + "strings" +) + +// MetricsAddressRequiresToken reports whether the metrics endpoint is exposed beyond loopback. +func MetricsAddressRequiresToken(addr string) bool { + host, _, err := net.SplitHostPort(strings.TrimSpace(addr)) + if err != nil { + return true + } + host = strings.TrimSpace(host) + if host == "" || host == "0.0.0.0" || host == "::" { + return true + } + if strings.EqualFold(host, "localhost") { + return false + } + ip := net.ParseIP(host) + return ip == nil || !ip.IsLoopback() +} + +// ProtectHandler wraps a metrics handler with optional bearer-token authentication. +func ProtectHandler(handler http.Handler, bearerToken string) http.Handler { + if handler == nil { + return nil + } + token := strings.TrimSpace(bearerToken) + if token == "" { + return handler + } + expected := "Bearer " + token + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if subtle.ConstantTimeCompare([]byte(strings.TrimSpace(r.Header.Get("Authorization"))), []byte(expected)) != 1 { + w.Header().Set("WWW-Authenticate", "Bearer") + http.Error(w, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized) + return + } + handler.ServeHTTP(w, r) + }) +} diff --git a/monitoring/http_test.go b/monitoring/http_test.go new file mode 100644 index 00000000..98c06bc4 --- /dev/null +++ b/monitoring/http_test.go @@ -0,0 +1,35 @@ +package monitoring + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestMetricsAddressRequiresToken(t *testing.T) { + require.False(t, MetricsAddressRequiresToken("localhost:9090")) + require.False(t, MetricsAddressRequiresToken("127.0.0.1:9090")) + require.False(t, MetricsAddressRequiresToken("[::1]:9090")) + require.True(t, MetricsAddressRequiresToken(":9090")) + require.True(t, MetricsAddressRequiresToken("0.0.0.0:9090")) + require.True(t, MetricsAddressRequiresToken("10.0.0.1:9090")) +} + +func TestProtectHandlerRequiresBearerToken(t *testing.T) { + protected := ProtectHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNoContent) + }), "secret-token") + + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + rec := httptest.NewRecorder() + protected.ServeHTTP(rec, req) + require.Equal(t, http.StatusUnauthorized, rec.Code) + + req = httptest.NewRequest(http.MethodGet, "/metrics", nil) + req.Header.Set("Authorization", "Bearer secret-token") + rec = httptest.NewRecorder() + protected.ServeHTTP(rec, req) + require.Equal(t, http.StatusNoContent, rec.Code) +} From 2111cfe801702f4eb219cd003a8c8aaf19a97513 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Wed, 11 Mar 2026 00:11:30 +0900 Subject: [PATCH 10/13] Fix noctx lint in metrics tests --- monitoring/http_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/monitoring/http_test.go b/monitoring/http_test.go index 98c06bc4..dcc83996 100644 --- a/monitoring/http_test.go +++ b/monitoring/http_test.go @@ -1,6 +1,7 @@ package monitoring import ( + "context" "net/http" "net/http/httptest" "testing" @@ -22,12 +23,12 @@ func TestProtectHandlerRequiresBearerToken(t *testing.T) { w.WriteHeader(http.StatusNoContent) }), "secret-token") - req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + req := httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/metrics", nil) rec := httptest.NewRecorder() protected.ServeHTTP(rec, req) require.Equal(t, http.StatusUnauthorized, rec.Code) - req = httptest.NewRequest(http.MethodGet, "/metrics", nil) + req = httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/metrics", nil) req.Header.Set("Authorization", "Bearer secret-token") rec = httptest.NewRecorder() protected.ServeHTTP(rec, req) From e8a0c4996314730d6df2457c458a28fb73ae208e Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Wed, 11 Mar 2026 01:58:40 +0900 Subject: [PATCH 11/13] Refine metrics server startup handling --- cmd/server/demo.go | 60 ++++++++++++++++++++++++----------------- main.go | 35 +++--------------------- monitoring/http.go | 54 +++++++++++++++++++++++++++++++++++++ monitoring/raft.go | 9 ++++--- monitoring/raft_test.go | 8 ++++++ 5 files changed, 106 insertions(+), 60 deletions(-) diff --git a/cmd/server/demo.go b/cmd/server/demo.go index 640f66ee..498216b3 100644 --- a/cmd/server/demo.go +++ b/cmd/server/demo.go @@ -347,6 +347,8 @@ func setupRedis(ctx context.Context, lc net.ListenConfig, st store.MVCCStore, co func run(ctx context.Context, eg *errgroup.Group, cfg config) error { var lc net.ListenConfig + cleanup := startupCleanup{} + defer cleanup.Run() ldb, sdb, fss, err := setupStorage(cfg.raftDataDir) if err != nil { @@ -400,11 +402,15 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { if err != nil { return errors.WithStack(err) } + cleanup.Add(func() { + _ = grpcSock.Close() + }) rd, err := setupRedis(ctx, lc, st, coordinator, cfg.address, cfg.redisAddress, cfg.raftRedisMap) if err != nil { return err } + cleanup.Add(rd.Stop) dynamoL, err := lc.Listen(ctx, "tcp", cfg.dynamoAddress) if err != nil { return errors.WithStack(err) @@ -415,10 +421,14 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { coordinator, adapter.WithDynamoDBRequestObserver(metricsRegistry.DynamoDBObserver()), ) + cleanup.Add(ds.Stop) metricsL, ms, err := setupMetricsHTTPServer(ctx, lc, cfg.metricsAddress, cfg.metricsToken, metricsRegistry.Handler()) if err != nil { return err } + cleanup.Add(func() { + _ = metricsL.Close() + }) eg.Go(catalogWatcherTask(ctx, distCatalog, distEngine)) eg.Go(grpcShutdownTask(ctx, s, grpcSock, cfg.address, grpcSvc)) @@ -427,9 +437,10 @@ func run(ctx context.Context, eg *errgroup.Group, cfg config) error { eg.Go(redisServeTask(rd, cfg.redisAddress)) eg.Go(dynamoShutdownTask(ctx, ds, cfg.dynamoAddress)) eg.Go(dynamoServeTask(ds, cfg.dynamoAddress)) - eg.Go(metricsShutdownTask(ctx, ms, cfg.metricsAddress)) - eg.Go(metricsServeTask(ms, metricsL, cfg.metricsAddress)) + eg.Go(monitoring.MetricsShutdownTask(ctx, ms, cfg.metricsAddress)) + eg.Go(monitoring.MetricsServeTask(ms, metricsL, cfg.metricsAddress)) + cleanup.Release() return nil } @@ -441,13 +452,31 @@ func setupMetricsHTTPServer(ctx context.Context, lc net.ListenConfig, metricsAdd if err != nil { return nil, nil, errors.WithStack(err) } - ms := &http.Server{ - Handler: monitoring.ProtectHandler(handler, metricsToken), - ReadHeaderTimeout: time.Second, - } + ms := monitoring.NewMetricsServer(handler, metricsToken) return metricsL, ms, nil } +type startupCleanup struct { + funcs []func() +} + +func (c *startupCleanup) Add(fn func()) { + if fn == nil { + return + } + c.funcs = append(c.funcs, fn) +} + +func (c *startupCleanup) Release() { + c.funcs = nil +} + +func (c *startupCleanup) Run() { + for i := len(c.funcs) - 1; i >= 0; i-- { + c.funcs[i]() + } +} + func bootstrapClusterIfNeeded(r *raft.Raft, cfg config) error { if !cfg.raftBootstrap { return nil @@ -543,22 +572,3 @@ func dynamoServeTask(dynamoServer *adapter.DynamoDBServer, address string) func( return errors.WithStack(err) } } - -func metricsShutdownTask(ctx context.Context, server *http.Server, address string) func() error { - return func() error { - <-ctx.Done() - slog.Info("Shutting down metrics server", "address", address, "reason", ctx.Err()) - return server.Shutdown(context.Background()) - } -} - -func metricsServeTask(server *http.Server, listener net.Listener, address string) func() error { - return func() error { - slog.Info("Starting metrics server", "address", address) - err := server.Serve(listener) - if err == nil || errors.Is(err, http.ErrServerClosed) || errors.Is(err, net.ErrClosed) { - return nil - } - return errors.WithStack(err) - } -} diff --git a/main.go b/main.go index 5da96ff9..fd13174e 100644 --- a/main.go +++ b/main.go @@ -30,7 +30,6 @@ const ( electionTimeout = 2000 * time.Millisecond leaderLease = 100 * time.Millisecond raftMetricsObserveInterval = 5 * time.Second - metricsShutdownTimeout = 5 * time.Second ) var ( @@ -397,40 +396,12 @@ func startMetricsServer(ctx context.Context, lc *net.ListenConfig, eg *errgroup. if err != nil { return errors.Wrapf(err, "failed to listen on %s", metricsAddr) } - metricsServer := &http.Server{ - Handler: monitoring.ProtectHandler(handler, metricsToken), - ReadHeaderTimeout: time.Second, - } - eg.Go(func() error { - return serveMetricsUntilCanceled(ctx, metricsServer, metricsL, metricsAddr) - }) + metricsServer := monitoring.NewMetricsServer(handler, metricsToken) + eg.Go(monitoring.MetricsShutdownTask(ctx, metricsServer, metricsAddr)) + eg.Go(monitoring.MetricsServeTask(metricsServer, metricsL, metricsAddr)) return nil } -func serveMetricsUntilCanceled(ctx context.Context, server *http.Server, listener net.Listener, metricsAddr string) error { - stop := make(chan struct{}) - go watchMetricsShutdown(ctx, server, stop, metricsAddr) - - err := server.Serve(listener) - close(stop) - if errors.Is(err, http.ErrServerClosed) || errors.Is(err, net.ErrClosed) { - return nil - } - return errors.WithStack(err) -} - -func watchMetricsShutdown(ctx context.Context, server *http.Server, stop <-chan struct{}, metricsAddr string) { - select { - case <-ctx.Done(): - shutdownCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), metricsShutdownTimeout) - defer cancel() - if err := server.Shutdown(shutdownCtx); err != nil && !errors.Is(err, http.ErrServerClosed) && !errors.Is(err, net.ErrClosed) { - log.Printf("metrics server shutdown error on %s: %v", metricsAddr, err) - } - case <-stop: - } -} - func distributionCatalogStoreForGroup(runtimes []*raftGroupRuntime, groupID uint64) *distribution.CatalogStore { for _, rt := range runtimes { if rt == nil || rt.store == nil { diff --git a/monitoring/http.go b/monitoring/http.go index cc3ce6fe..3738f6f5 100644 --- a/monitoring/http.go +++ b/monitoring/http.go @@ -1,10 +1,20 @@ package monitoring import ( + "context" "crypto/subtle" + "log/slog" "net" "net/http" "strings" + "time" + + "github.com/cockroachdb/errors" +) + +const ( + metricsReadHeaderTimeout = time.Second + metricsShutdownTimeout = 5 * time.Second ) // MetricsAddressRequiresToken reports whether the metrics endpoint is exposed beyond loopback. @@ -43,3 +53,47 @@ func ProtectHandler(handler http.Handler, bearerToken string) http.Handler { handler.ServeHTTP(w, r) }) } + +// NewMetricsServer constructs an HTTP server for the protected metrics handler. +func NewMetricsServer(handler http.Handler, bearerToken string) *http.Server { + if handler == nil { + return nil + } + return &http.Server{ + Handler: ProtectHandler(handler, bearerToken), + ReadHeaderTimeout: metricsReadHeaderTimeout, + } +} + +// MetricsShutdownTask returns an errgroup task that stops the metrics server on context cancellation. +func MetricsShutdownTask(ctx context.Context, server *http.Server, address string) func() error { + return func() error { + if server == nil { + return nil + } + <-ctx.Done() + slog.Info("Shutting down metrics server", "address", address, "reason", ctx.Err()) + shutdownCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), metricsShutdownTimeout) + defer cancel() + err := server.Shutdown(shutdownCtx) + if err == nil || errors.Is(err, http.ErrServerClosed) || errors.Is(err, net.ErrClosed) { + return nil + } + return errors.WithStack(err) + } +} + +// MetricsServeTask returns an errgroup task that serves the metrics endpoint until shutdown. +func MetricsServeTask(server *http.Server, listener net.Listener, address string) func() error { + return func() error { + if server == nil || listener == nil { + return nil + } + slog.Info("Starting metrics server", "address", address) + err := server.Serve(listener) + if err == nil || errors.Is(err, http.ErrServerClosed) || errors.Is(err, net.ErrClosed) { + return nil + } + return errors.WithStack(err) + } +} diff --git a/monitoring/raft.go b/monitoring/raft.go index 56606a81..ebdf7357 100644 --- a/monitoring/raft.go +++ b/monitoring/raft.go @@ -23,7 +23,10 @@ var raftStates = []string{ var loggedLastContactParseValues sync.Map -const defaultObserveInterval = 5 * time.Second +const ( + defaultObserveInterval = 5 * time.Second + lastContactUnknownValue = -1 +) // RaftRuntime describes a raft group observed by the metrics exporter. type RaftRuntime struct { @@ -304,7 +307,7 @@ func parseLastContactSeconds(raw string) float64 { raw = strings.TrimSpace(raw) switch raw { case "", "never": - return 0 + return lastContactUnknownValue case "0": return 0 } @@ -313,7 +316,7 @@ func parseLastContactSeconds(raw string) float64 { if _, loaded := loggedLastContactParseValues.LoadOrStore(raw, struct{}{}); !loaded { slog.Warn("failed to parse raft last_contact metric", "raw", raw, "err", err) } - return 0 + return lastContactUnknownValue } return d.Seconds() } diff --git a/monitoring/raft_test.go b/monitoring/raft_test.go index 4e48b875..76748509 100644 --- a/monitoring/raft_test.go +++ b/monitoring/raft_test.go @@ -39,3 +39,11 @@ elastickv_raft_member_present{group="1",member_address="10.0.0.1:50051",member_i ) require.NoError(t, err) } + +func TestParseLastContactSeconds(t *testing.T) { + require.Equal(t, float64(lastContactUnknownValue), parseLastContactSeconds("")) + require.Equal(t, float64(lastContactUnknownValue), parseLastContactSeconds("never")) + require.Equal(t, 0.0, parseLastContactSeconds("0")) + require.Equal(t, 0.25, parseLastContactSeconds("250ms")) + require.Equal(t, float64(lastContactUnknownValue), parseLastContactSeconds("invalid")) +} From a192934dfc190c1a1fc58b2b25a44d87db842bdc Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Wed, 11 Mar 2026 22:05:12 +0900 Subject: [PATCH 12/13] Simplify metrics request reader flow --- adapter/dynamodb.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/adapter/dynamodb.go b/adapter/dynamodb.go index 9c26ae8a..f989b936 100644 --- a/adapter/dynamodb.go +++ b/adapter/dynamodb.go @@ -139,13 +139,13 @@ func (c *countingReadCloser) Read(p []byte) (int, error) { } n, err := c.ReadCloser.Read(p) c.bytesRead += n - if err == nil { - return n, nil - } - if errors.Is(err, io.EOF) { - return n, io.EOF + if err != nil { + if errors.Is(err, io.EOF) { + return n, io.EOF + } + return n, errors.WithStack(err) } - return n, errors.WithStack(err) + return n, nil } func (c *countingReadCloser) Close() error { From 7b395fb7afdaa4b5c0eaf879082a4e4d43b8684a Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Thu, 12 Mar 2026 02:52:42 +0900 Subject: [PATCH 13/13] Align demo metrics monitoring setup --- README.md | 16 ++++++++-- cmd/server/demo.go | 45 +++++++++++----------------- cmd/server/demo_test.go | 12 ++++++++ docs/docker_multinode_manual_run.md | 22 +++++++++++++- internal/cleanup.go | 26 ++++++++++++++++ internal/cleanup_test.go | 29 ++++++++++++++++++ main.go | 10 +++++-- monitoring/dynamodb.go | 10 +++---- monitoring/prometheus/prometheus.yml | 3 ++ 9 files changed, 134 insertions(+), 39 deletions(-) create mode 100644 cmd/server/demo_test.go create mode 100644 internal/cleanup.go create mode 100644 internal/cleanup_test.go diff --git a/README.md b/README.md index d81b414d..69cd190b 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Deployment/runbook documents: ## Metrics and Grafana -Elastickv now exposes Prometheus metrics on `--metricsAddress` (default: `localhost:9090` in `main.go`, `:9090` in `cmd/server/demo.go`). +Elastickv now exposes Prometheus metrics on `--metricsAddress` (default: `localhost:9090` in `main.go`, `127.0.0.1:9090` in `cmd/server/demo.go` single-node mode). The built-in 3-node demo binds metrics on `0.0.0.0:9091`, `0.0.0.0:9092`, and `0.0.0.0:9093`, and uses the bearer token `demo-metrics-token` unless `--metricsToken` is set. The exported metrics cover: @@ -46,7 +46,17 @@ Provisioned monitoring assets live under: - `monitoring/grafana/provisioning/` - `monitoring/docker-compose.yml` -To scrape a multi-node deployment, bind `--metricsAddress` to each node's private IP, for example `--metricsAddress "10.0.0.11:9090"`. +If you bind `--metricsAddress` to a non-loopback address, `--metricsToken` is required. Prometheus must send the same bearer token, for example: + +```yaml +scrape_configs: + - job_name: elastickv + authorization: + type: Bearer + credentials: YOUR_METRICS_TOKEN +``` + +To scrape a multi-node deployment, bind `--metricsAddress` to each node's private IP and set `--metricsToken`, for example `--metricsAddress "10.0.0.11:9090" --metricsToken "YOUR_METRICS_TOKEN"`. For the local 3-node demo, start Grafana and Prometheus with: @@ -55,6 +65,8 @@ cd monitoring docker compose up -d ``` +`monitoring/prometheus/prometheus.yml` assumes the demo token `demo-metrics-token`. If you override `--metricsToken` when running `go run ./cmd/server/demo.go`, update `authorization.credentials` in that file to match. + ## Example Usage diff --git a/cmd/server/demo.go b/cmd/server/demo.go index 498216b3..71b8bd5c 100644 --- a/cmd/server/demo.go +++ b/cmd/server/demo.go @@ -19,6 +19,7 @@ import ( raftadminpb "github.com/Jille/raftadmin/proto" "github.com/bootjp/elastickv/adapter" "github.com/bootjp/elastickv/distribution" + internalutil "github.com/bootjp/elastickv/internal" "github.com/bootjp/elastickv/kv" "github.com/bootjp/elastickv/monitoring" pb "github.com/bootjp/elastickv/proto" @@ -98,13 +99,14 @@ func main() { } else { // Demo cluster mode (3 nodes) slog.Info("Starting demo cluster with 3 nodes...") + demoMetricsToken := effectiveDemoMetricsToken(*metricsToken) nodes := []config{ { address: "127.0.0.1:50051", redisAddress: "127.0.0.1:63791", dynamoAddress: "127.0.0.1:63801", - metricsAddress: "127.0.0.1:9091", - metricsToken: *metricsToken, + metricsAddress: "0.0.0.0:9091", + metricsToken: demoMetricsToken, raftID: "n1", raftDataDir: "", // In-memory raftBootstrap: true, @@ -113,8 +115,8 @@ func main() { address: "127.0.0.1:50052", redisAddress: "127.0.0.1:63792", dynamoAddress: "127.0.0.1:63802", - metricsAddress: "127.0.0.1:9092", - metricsToken: *metricsToken, + metricsAddress: "0.0.0.0:9092", + metricsToken: demoMetricsToken, raftID: "n2", raftDataDir: "", raftBootstrap: false, @@ -123,8 +125,8 @@ func main() { address: "127.0.0.1:50053", redisAddress: "127.0.0.1:63793", dynamoAddress: "127.0.0.1:63803", - metricsAddress: "127.0.0.1:9093", - metricsToken: *metricsToken, + metricsAddress: "0.0.0.0:9093", + metricsToken: demoMetricsToken, raftID: "n3", raftDataDir: "", raftBootstrap: false, @@ -188,6 +190,14 @@ func main() { } } +func effectiveDemoMetricsToken(token string) string { + token = strings.TrimSpace(token) + if token != "" { + return token + } + return strings.Join([]string{"demo", "metrics", "token"}, "-") +} + func joinCluster(ctx context.Context, nodes []config) error { leader := nodes[0] // Give servers some time to start @@ -347,7 +357,7 @@ func setupRedis(ctx context.Context, lc net.ListenConfig, st store.MVCCStore, co func run(ctx context.Context, eg *errgroup.Group, cfg config) error { var lc net.ListenConfig - cleanup := startupCleanup{} + cleanup := internalutil.CleanupStack{} defer cleanup.Run() ldb, sdb, fss, err := setupStorage(cfg.raftDataDir) @@ -456,27 +466,6 @@ func setupMetricsHTTPServer(ctx context.Context, lc net.ListenConfig, metricsAdd return metricsL, ms, nil } -type startupCleanup struct { - funcs []func() -} - -func (c *startupCleanup) Add(fn func()) { - if fn == nil { - return - } - c.funcs = append(c.funcs, fn) -} - -func (c *startupCleanup) Release() { - c.funcs = nil -} - -func (c *startupCleanup) Run() { - for i := len(c.funcs) - 1; i >= 0; i-- { - c.funcs[i]() - } -} - func bootstrapClusterIfNeeded(r *raft.Raft, cfg config) error { if !cfg.raftBootstrap { return nil diff --git a/cmd/server/demo_test.go b/cmd/server/demo_test.go new file mode 100644 index 00000000..edd92a98 --- /dev/null +++ b/cmd/server/demo_test.go @@ -0,0 +1,12 @@ +package main + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestEffectiveDemoMetricsToken(t *testing.T) { + require.Equal(t, "custom-token", effectiveDemoMetricsToken(" custom-token ")) + require.Equal(t, "demo-metrics-token", effectiveDemoMetricsToken("")) +} diff --git a/docs/docker_multinode_manual_run.md b/docs/docker_multinode_manual_run.md index 1861e7aa..96743b5c 100644 --- a/docs/docker_multinode_manual_run.md +++ b/docs/docker_multinode_manual_run.md @@ -84,6 +84,12 @@ Shared fixed bootstrap voters (5-node example): RAFT_BOOTSTRAP_MEMBERS="n1=10.0.0.11:50051,n2=10.0.0.12:50051,n3=10.0.0.13:50051,n4=10.0.0.14:50051,n5=10.0.0.15:50051" ``` +Shared metrics bearer token (required because the examples bind `--metricsAddress` to non-loopback VM IPs): + +```bash +ELASTICKV_METRICS_TOKEN="$(openssl rand -hex 32)" +``` + For a 4-node cluster, remove the `n5` entry from both variables. ## 4) Start Nodes with `docker run` @@ -94,6 +100,7 @@ Binding guidance: - Set `--address`, `--redisAddress`, and `--dynamoAddress` to the VM private IP. - Set `--metricsAddress` to the VM private IP if Prometheus scrapes from another host. +- Set `--metricsToken` to the same shared bearer token on every node whenever `--metricsAddress` is non-loopback. - Do not use `0.0.0.0` as the advertised address. - Do not use `localhost` for cluster communication. @@ -112,6 +119,7 @@ docker run -d \ --redisAddress "10.0.0.11:6379" \ --dynamoAddress "10.0.0.11:8000" \ --metricsAddress "10.0.0.11:9090" \ + --metricsToken "${ELASTICKV_METRICS_TOKEN}" \ --raftId "n1" \ --raftDataDir "/var/lib/elastickv" \ --raftRedisMap "${RAFT_TO_REDIS_MAP}" \ @@ -134,6 +142,7 @@ docker run -d \ --redisAddress "10.0.0.12:6379" \ --dynamoAddress "10.0.0.12:8000" \ --metricsAddress "10.0.0.12:9090" \ + --metricsToken "${ELASTICKV_METRICS_TOKEN}" \ --raftId "n2" \ --raftDataDir "/var/lib/elastickv" \ --raftRedisMap "${RAFT_TO_REDIS_MAP}" \ @@ -213,7 +222,18 @@ redis-cli -h 10.0.0.12 -p 6379 GET health Check the local Prometheus endpoint on any node: ```bash -curl -fsS http://10.0.0.11:9090/metrics | grep '^elastickv_' +curl -fsS -H "Authorization: Bearer ${ELASTICKV_METRICS_TOKEN}" \ + http://10.0.0.11:9090/metrics | grep '^elastickv_' +``` + +Prometheus must send the same bearer token when scraping: + +```yaml +scrape_configs: + - job_name: elastickv + authorization: + type: Bearer + credentials: ${ELASTICKV_METRICS_TOKEN} ``` Grafana/Prometheus provisioning examples are available under `monitoring/`. diff --git a/internal/cleanup.go b/internal/cleanup.go new file mode 100644 index 00000000..7930e292 --- /dev/null +++ b/internal/cleanup.go @@ -0,0 +1,26 @@ +package internal + +// CleanupStack stores cleanup callbacks and runs them in LIFO order. +type CleanupStack struct { + funcs []func() +} + +// Add registers a cleanup callback. +func (c *CleanupStack) Add(fn func()) { + if fn == nil { + return + } + c.funcs = append(c.funcs, fn) +} + +// Release discards all registered callbacks. +func (c *CleanupStack) Release() { + c.funcs = nil +} + +// Run executes registered callbacks in reverse registration order. +func (c *CleanupStack) Run() { + for i := len(c.funcs) - 1; i >= 0; i-- { + c.funcs[i]() + } +} diff --git a/internal/cleanup_test.go b/internal/cleanup_test.go new file mode 100644 index 00000000..f6e50d28 --- /dev/null +++ b/internal/cleanup_test.go @@ -0,0 +1,29 @@ +package internal + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCleanupStackRunUsesLIFO(t *testing.T) { + stack := CleanupStack{} + var calls []string + + stack.Add(func() { calls = append(calls, "first") }) + stack.Add(func() { calls = append(calls, "second") }) + stack.Run() + + require.Equal(t, []string{"second", "first"}, calls) +} + +func TestCleanupStackReleaseDropsCallbacks(t *testing.T) { + stack := CleanupStack{} + called := false + + stack.Add(func() { called = true }) + stack.Release() + stack.Run() + + require.False(t, called) +} diff --git a/main.go b/main.go index fd13174e..5a2a91ad 100644 --- a/main.go +++ b/main.go @@ -14,6 +14,7 @@ import ( "github.com/Jille/raftadmin" "github.com/bootjp/elastickv/adapter" "github.com/bootjp/elastickv/distribution" + internalutil "github.com/bootjp/elastickv/internal" "github.com/bootjp/elastickv/kv" "github.com/bootjp/elastickv/monitoring" pb "github.com/bootjp/elastickv/proto" @@ -77,16 +78,19 @@ func run() error { return err } + cleanup := internalutil.CleanupStack{} + defer cleanup.Run() + ctx, cancel := context.WithCancel(context.Background()) clock := kv.NewHLC() shardStore := kv.NewShardStore(cfg.engine, shardGroups) - defer func() { + cleanup.Add(func() { _ = shardStore.Close() for _, rt := range runtimes { rt.Close() } - }() - defer cancel() + }) + cleanup.Add(cancel) coordinate := kv.NewShardedCoordinator(cfg.engine, shardGroups, cfg.defaultGroup, clock, shardStore) distCatalog, err := setupDistributionCatalog(ctx, runtimes, cfg.engine) if err != nil { diff --git a/monitoring/dynamodb.go b/monitoring/dynamodb.go index ebabfe38..502ea72b 100644 --- a/monitoring/dynamodb.go +++ b/monitoring/dynamodb.go @@ -249,27 +249,27 @@ func (m *DynamoDBMetrics) normalizeReportTables(tables []string, tableMetrics ma seen := make(map[string]struct{}, len(tables)+len(tableMetrics)) aggregated := make(map[string]DynamoDBTableMetrics, len(tableMetrics)) - addTable := func(table string) { + addTable := func(table string) string { table = m.normalizeTable(table) if table == "" { - return + return "" } if _, ok := seen[table]; ok { - return + return table } seen[table] = struct{}{} normalized = append(normalized, table) + return table } for _, table := range tables { addTable(table) } for table, metrics := range tableMetrics { - table = m.normalizeTable(table) + table = addTable(table) if table == "" { continue } - addTable(table) current := aggregated[table] current.ReturnedItems += metrics.ReturnedItems current.ScannedItems += metrics.ScannedItems diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml index e8edb150..2c51086d 100644 --- a/monitoring/prometheus/prometheus.yml +++ b/monitoring/prometheus/prometheus.yml @@ -4,6 +4,9 @@ global: scrape_configs: - job_name: elastickv + authorization: + type: Bearer + credentials: demo-metrics-token static_configs: - targets: - host.docker.internal:9091