From f2db55b05bd5cca1d5ebd377abe8738ffc480a5f Mon Sep 17 00:00:00 2001 From: Jens Neuse Date: Fri, 6 Mar 2026 08:27:46 +0100 Subject: [PATCH 1/5] feat(cache): add header impact analytics to detect unnecessary header forwarding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add HeaderImpactEvent analytics to detect when header forwarding rules don't affect subgraph responses. Events track (BaseKey, HeaderHash, ResponseHash) to enable cross-request analysis — when the same entity produces identical responses with different headers, header forwarding is irrelevant for that key. Changes: - New HeaderImpactEvent type, collector, and snapshot field - Capture headerHash in prepareCacheKeys and record events in updateL2Cache - E2E tests with real HTTP headers to verify response consistency across different headers - Unit tests for collector deduplication - Test conventions documented in execution/engine/CLAUDE.md Co-Authored-By: Claude Opus 4.6 --- execution/engine/CLAUDE.md | 110 +++++- .../federation_caching_analytics_test.go | 329 ++++++++++++++++++ .../engine/federation_caching_helpers_test.go | 76 ++++ v2/pkg/engine/resolve/cache_analytics.go | 63 +++- v2/pkg/engine/resolve/cache_analytics_test.go | 52 +++ v2/pkg/engine/resolve/loader.go | 5 + v2/pkg/engine/resolve/loader_cache.go | 38 ++ 7 files changed, 648 insertions(+), 25 deletions(-) diff --git a/execution/engine/CLAUDE.md b/execution/engine/CLAUDE.md index 2ea12f2432..8d15f0b4ce 100644 --- a/execution/engine/CLAUDE.md +++ b/execution/engine/CLAUDE.md @@ -1,25 +1,103 @@ -# Caching Test Rules +# E2E Test Conventions for `execution/engine` + +## Inline everything + +No `const` blocks, no named variables for expected values. Put all literal values (cache keys, hashes, byte sizes, query strings, expected responses) directly inline in assertions and setup code. Duplicate values across subtests rather than sharing — each subtest must be fully self-contained and readable without scrolling up. + +```go +// CORRECT: literals inline in assertions +assert.Equal(t, normalizeSnapshot(resolve.CacheAnalyticsSnapshot{ + L2Reads: []resolve.CacheKeyEvent{ + {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews"}, + }, + L2Writes: []resolve.CacheWriteEvent{ + {CacheKey: `11945571715631340836:{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", ByteSize: 177, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + }, +}), snap) + +// WRONG: named constants defined above the test logic +const ( + keyProductTop1 = `{"__typename":"Product","key":{"upc":"top-1"}}` + byteSizeProductTop1 = 177 +) +``` + +## Inline setup too + +Config structs (e.g. `SubgraphCachingConfigs`) should be defined inline in the setup call, not as named variables. Only keep variables for state that is mutated or referenced multiple times at runtime (e.g. `tracker`, `mockHeaders`, `setup`). + +```go +// CORRECT: config inline +setup := federationtesting.NewFederationSetup(addCachingGateway( + withCachingLoaderCache(map[string]resolve.LoaderCache{"default": NewFakeLoaderCache()}), + withHTTPClient(&http.Client{Transport: tracker}), + withSubgraphEntityCachingConfigs(engine.SubgraphCachingConfigs{ + {SubgraphName: "products", RootFieldCaching: plan.RootFieldCacheConfigurations{ + {TypeName: "Query", FieldName: "topProducts", CacheName: "default", TTL: 30 * time.Second}, + }}, + }), +)) + +// WRONG: named variable for config used only once +configs := engine.SubgraphCachingConfigs{...} +setup := federationtesting.NewFederationSetup(addCachingGateway( + withSubgraphEntityCachingConfigs(configs), +)) +``` + +## Self-contained subtests + +Each `t.Run` subtest must be independently readable. No shared constants, variables, or helpers defined in the parent test function. Duplication across subtests is preferred over sharing. + +## Inline queries + +Use `QueryStringWithHeaders` with inline GraphQL query strings. Do not load queries from files. + +```go +// CORRECT +resp, headers := gqlClient.QueryStringWithHeaders(ctx, setup.GatewayServer.URL, + `query { topProducts { name reviews { body } } }`, nil, t) + +// WRONG +resp := gqlClient.QueryWithHeaders(ctx, setup.GatewayServer.URL, + cachingTestQueryPath("queries/my_query.query"), nil, t) +``` + +## Full snapshot assertions + +Assert complete `CacheAnalyticsSnapshot` structs — not just the fields you care about. This catches unexpected events. + +## Snapshot comments + +Every event line in a snapshot assertion MUST have a brief comment explaining **why** that event occurred. + +```go +// CORRECT: explains causation +{CacheKey: `...`, Kind: resolve.CacheKeyMiss, Shadow: true}, // Shadow L2 miss: cache empty +{CacheKey: `...`, Kind: resolve.CacheKeyMiss, Shadow: false}, // L2 miss: shadow mode not implemented for root fields + +// WRONG: restates the field value +{CacheKey: `...`, Kind: resolve.CacheKeyMiss}, // this is a miss +``` ## Always check every cache log Every `defaultCache.ClearLog()` MUST be followed by `defaultCache.GetLog()` with full assertions BEFORE the next `ClearLog()` or end of test. Never clear a log without verifying its contents — skipped checks hide regressions. +## http.Header is a reference type + +When returning `http.Header` from mocks, always `.Clone()` before returning. The HTTP client mutates the header map in-place (adds `Accept`, `Content-Type`, `Accept-Encoding`), which corrupts the mock's stored state and causes different hashes on subsequent calls. + ```go -// CORRECT: every ClearLog has a corresponding GetLog + assertion -defaultCache.ClearLog() -resp := gqlClient.Query(...) -assert.Equal(t, expectedResp, string(resp)) - -logAfterFirst := defaultCache.GetLog() -wantLog := []CacheLogEntry{ - {Operation: "get", Keys: []string{`...`}, Hits: []bool{false}}, - {Operation: "set", Keys: []string{`...`}}, +// CORRECT: clone before returning +func (m *mock) HeadersForSubgraph(name string) (http.Header, uint64) { + h := m.headers[name] + return h.Clone(), hashHeaders(h) } -assert.Equal(t, sortCacheLogKeys(wantLog), sortCacheLogKeys(logAfterFirst), "descriptive message") -// WRONG: ClearLog without checking — hides bugs -defaultCache.ClearLog() -resp := gqlClient.Query(...) -assert.Equal(t, expectedResp, string(resp)) -defaultCache.ClearLog() // previous log lost! +// WRONG: returns the same map reference — will be mutated by HTTP client +func (m *mock) HeadersForSubgraph(name string) (http.Header, uint64) { + h := m.headers[name] + return h, hashHeaders(h) +} ``` \ No newline at end of file diff --git a/execution/engine/federation_caching_analytics_test.go b/execution/engine/federation_caching_analytics_test.go index 347696fa10..69c0915f07 100644 --- a/execution/engine/federation_caching_analytics_test.go +++ b/execution/engine/federation_caching_analytics_test.go @@ -1786,3 +1786,332 @@ func TestFederationCachingAliases(t *testing.T) { assert.Equal(t, 1, accountsCalls, "Should call accounts once (second alias L1 hit for same User entity)") }) } + +func TestHeaderImpactAnalyticsE2E(t *testing.T) { + t.Run("shadow mode with header prefix - same response different headers", func(t *testing.T) { + mockHeaders := &headerForwardingMock{ + headers: map[string]http.Header{ + "products": {"Authorization": {"Bearer token-A"}}, + "reviews": {"Authorization": {"Bearer token-A"}}, + "accounts": {"Authorization": {"Bearer token-A"}}, + }, + } + tracker := newSubgraphCallTracker(http.DefaultTransport) + + setup := federationtesting.NewFederationSetup(addCachingGateway( + withCachingEnableART(false), + withCachingLoaderCache(map[string]resolve.LoaderCache{"default": NewFakeLoaderCache()}), + withHTTPClient(&http.Client{Transport: tracker}), + withSubgraphHeadersBuilder(mockHeaders), + withCachingOptionsFunc(resolve.CachingOptions{EnableL2Cache: true, EnableCacheAnalytics: true}), + withSubgraphEntityCachingConfigs(engine.SubgraphCachingConfigs{ + { + SubgraphName: "products", + RootFieldCaching: plan.RootFieldCacheConfigurations{ + {TypeName: "Query", FieldName: "topProducts", CacheName: "default", TTL: 30 * time.Second, IncludeSubgraphHeaderPrefix: true, ShadowMode: true}, + }, + }, + { + SubgraphName: "reviews", + EntityCaching: plan.EntityCacheConfigurations{ + {TypeName: "Product", CacheName: "default", TTL: 30 * time.Second, IncludeSubgraphHeaderPrefix: true, ShadowMode: true}, + }, + }, + { + SubgraphName: "accounts", + EntityCaching: plan.EntityCacheConfigurations{ + {TypeName: "User", CacheName: "default", TTL: 30 * time.Second, IncludeSubgraphHeaderPrefix: true, ShadowMode: true}, + }, + }, + }), + )) + t.Cleanup(setup.Close) + + gqlClient := NewGraphqlClient(http.DefaultClient) + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Request 1: L2 miss → fetch → write with token-A header hash prefix + tracker.Reset() + resp, headers := gqlClient.QueryStringWithHeaders(ctx, setup.GatewayServer.URL, + `query { topProducts { name reviews { body authorWithoutProvides { username } } } }`, nil, t) + assert.Equal(t, + `{"data":{"topProducts":[{"name":"Trilby","reviews":[{"body":"A highly effective form of birth control.","authorWithoutProvides":{"username":"Me"}}]},{"name":"Fedora","reviews":[{"body":"Fedoras are one of the most fashionable hats around and can look great with a variety of outfits.","authorWithoutProvides":{"username":"Me"}}]}]}}`, + string(resp)) + + snap1 := normalizeSnapshot(parseCacheAnalytics(t, headers)) + + // Capture response hashes from first request (deterministic subgraph responses) + responseHashes := make(map[string]uint64, len(snap1.HeaderImpactEvents)) + for _, ev := range snap1.HeaderImpactEvents { + responseHashes[ev.BaseKey] = ev.ResponseHash + } + + assert.Equal(t, normalizeSnapshot(resolve.CacheAnalyticsSnapshot{ + L2Reads: []resolve.CacheKeyEvent{ + {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews", Shadow: true}, // Shadow L2 miss: cache empty + {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews", Shadow: true}, // Shadow L2 miss: cache empty + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products", Shadow: false}, // L2 miss: shadow mode not implemented for root fields + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts", Shadow: true}, // Shadow L2 miss: User not yet cached + }, + L2Writes: []resolve.CacheWriteEvent{ + {CacheKey: `11945571715631340836:{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", ByteSize: 177, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `11945571715631340836:{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", ByteSize: 233, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `11945571715631340836:{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", ByteSize: 127, DataSource: "products", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `11945571715631340836:{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", ByteSize: 49, DataSource: "accounts", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + }, + FieldHashes: []resolve.EntityFieldHash{ + {EntityType: "Product", FieldName: "name", FieldHash: 1032923585965781586, KeyRaw: `{"upc":"top-1"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "Product", FieldName: "name", FieldHash: 2432227032303632641, KeyRaw: `{"upc":"top-2"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceSubgraph}, + }, + EntityTypes: []resolve.EntityTypeInfo{ + {TypeName: "Product", Count: 2, UniqueKeys: 2}, + {TypeName: "User", Count: 2, UniqueKeys: 1}, + }, + HeaderImpactEvents: []resolve.HeaderImpactEvent{ + // Authorization: Bearer token-A → header hash 11945571715631340836 + {BaseKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, HeaderHash: 11945571715631340836, ResponseHash: responseHashes[`{"__typename":"Product","key":{"upc":"top-1"}}`], EntityType: "Product", DataSource: "reviews"}, + {BaseKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, HeaderHash: 11945571715631340836, ResponseHash: responseHashes[`{"__typename":"Product","key":{"upc":"top-2"}}`], EntityType: "Product", DataSource: "reviews"}, + {BaseKey: `{"__typename":"Query","field":"topProducts"}`, HeaderHash: 11945571715631340836, ResponseHash: responseHashes[`{"__typename":"Query","field":"topProducts"}`], EntityType: "Query", DataSource: "products"}, + {BaseKey: `{"__typename":"User","key":{"id":"1234"}}`, HeaderHash: 11945571715631340836, ResponseHash: responseHashes[`{"__typename":"User","key":{"id":"1234"}}`], EntityType: "User", DataSource: "accounts"}, + }, + }), snap1) + + // Request 2: Switch to token-B headers (actually different headers forwarded to subgraphs) + mockHeaders.setAll(http.Header{"Authorization": {"Bearer token-B"}}) + + tracker.Reset() + resp, headers = gqlClient.QueryStringWithHeaders(ctx, setup.GatewayServer.URL, + `query { topProducts { name reviews { body authorWithoutProvides { username } } } }`, nil, t) + assert.Equal(t, + `{"data":{"topProducts":[{"name":"Trilby","reviews":[{"body":"A highly effective form of birth control.","authorWithoutProvides":{"username":"Me"}}]},{"name":"Fedora","reviews":[{"body":"Fedoras are one of the most fashionable hats around and can look great with a variety of outfits.","authorWithoutProvides":{"username":"Me"}}]}]}}`, + string(resp)) + + snap2 := normalizeSnapshot(parseCacheAnalytics(t, headers)) + + // Key insight: different headers (token-B) → SAME ResponseHash → headers are irrelevant + assert.Equal(t, normalizeSnapshot(resolve.CacheAnalyticsSnapshot{ + L2Reads: []resolve.CacheKeyEvent{ + {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews", Shadow: true}, // token-B prefix not in cache + {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews", Shadow: true}, // token-B prefix not in cache + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products", Shadow: false}, // shadow mode not implemented for root fields + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts", Shadow: true}, // token-B prefix not in cache + }, + L2Writes: []resolve.CacheWriteEvent{ + {CacheKey: `4753115417090238877:{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", ByteSize: 177, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `4753115417090238877:{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", ByteSize: 233, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `4753115417090238877:{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", ByteSize: 127, DataSource: "products", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `4753115417090238877:{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", ByteSize: 49, DataSource: "accounts", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + }, + FieldHashes: []resolve.EntityFieldHash{ + {EntityType: "Product", FieldName: "name", FieldHash: 1032923585965781586, KeyRaw: `{"upc":"top-1"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "Product", FieldName: "name", FieldHash: 2432227032303632641, KeyRaw: `{"upc":"top-2"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceSubgraph}, + }, + EntityTypes: []resolve.EntityTypeInfo{ + {TypeName: "Product", Count: 2, UniqueKeys: 2}, + {TypeName: "User", Count: 2, UniqueKeys: 1}, + }, + HeaderImpactEvents: []resolve.HeaderImpactEvent{ + // Authorization: Bearer token-B → header hash 4753115417090238877; SAME ResponseHash → headers irrelevant + {BaseKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, HeaderHash: 4753115417090238877, ResponseHash: responseHashes[`{"__typename":"Product","key":{"upc":"top-1"}}`], EntityType: "Product", DataSource: "reviews"}, + {BaseKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, HeaderHash: 4753115417090238877, ResponseHash: responseHashes[`{"__typename":"Product","key":{"upc":"top-2"}}`], EntityType: "Product", DataSource: "reviews"}, + {BaseKey: `{"__typename":"Query","field":"topProducts"}`, HeaderHash: 4753115417090238877, ResponseHash: responseHashes[`{"__typename":"Query","field":"topProducts"}`], EntityType: "Query", DataSource: "products"}, + {BaseKey: `{"__typename":"User","key":{"id":"1234"}}`, HeaderHash: 4753115417090238877, ResponseHash: responseHashes[`{"__typename":"User","key":{"id":"1234"}}`], EntityType: "User", DataSource: "accounts"}, + }, + }), snap2) + }) + + t.Run("non-shadow mode - events on L2 miss, no events on L2 hit", func(t *testing.T) { + tracker := newSubgraphCallTracker(http.DefaultTransport) + + setup := federationtesting.NewFederationSetup(addCachingGateway( + withCachingEnableART(false), + withCachingLoaderCache(map[string]resolve.LoaderCache{"default": NewFakeLoaderCache()}), + withHTTPClient(&http.Client{Transport: tracker}), + withSubgraphHeadersBuilder(&headerForwardingMock{ + headers: map[string]http.Header{ + "products": {"Authorization": {"Bearer token-A"}}, + "reviews": {"Authorization": {"Bearer token-A"}}, + "accounts": {"Authorization": {"Bearer token-A"}}, + }, + }), + withCachingOptionsFunc(resolve.CachingOptions{EnableL2Cache: true, EnableCacheAnalytics: true}), + withSubgraphEntityCachingConfigs(engine.SubgraphCachingConfigs{ + { + SubgraphName: "products", + RootFieldCaching: plan.RootFieldCacheConfigurations{ + {TypeName: "Query", FieldName: "topProducts", CacheName: "default", TTL: 30 * time.Second, IncludeSubgraphHeaderPrefix: true}, + }, + }, + { + SubgraphName: "reviews", + EntityCaching: plan.EntityCacheConfigurations{ + {TypeName: "Product", CacheName: "default", TTL: 30 * time.Second, IncludeSubgraphHeaderPrefix: true}, + }, + }, + { + SubgraphName: "accounts", + EntityCaching: plan.EntityCacheConfigurations{ + {TypeName: "User", CacheName: "default", TTL: 30 * time.Second, IncludeSubgraphHeaderPrefix: true}, + }, + }, + }), + )) + t.Cleanup(setup.Close) + + gqlClient := NewGraphqlClient(http.DefaultClient) + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Request 1: L2 miss → fetch → HeaderImpactEvents recorded + tracker.Reset() + resp, headers := gqlClient.QueryStringWithHeaders(ctx, setup.GatewayServer.URL, + `query { topProducts { name reviews { body authorWithoutProvides { username } } } }`, nil, t) + assert.Equal(t, + `{"data":{"topProducts":[{"name":"Trilby","reviews":[{"body":"A highly effective form of birth control.","authorWithoutProvides":{"username":"Me"}}]},{"name":"Fedora","reviews":[{"body":"Fedoras are one of the most fashionable hats around and can look great with a variety of outfits.","authorWithoutProvides":{"username":"Me"}}]}]}}`, + string(resp)) + + snap1 := normalizeSnapshot(parseCacheAnalytics(t, headers)) + + // Capture response hashes (deterministic) + responseHashes := make(map[string]uint64, len(snap1.HeaderImpactEvents)) + for _, ev := range snap1.HeaderImpactEvents { + responseHashes[ev.BaseKey] = ev.ResponseHash + } + + assert.Equal(t, normalizeSnapshot(resolve.CacheAnalyticsSnapshot{ + L2Reads: []resolve.CacheKeyEvent{ + {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews"}, // L2 miss: cache empty + {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews"}, // L2 miss: cache empty + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products"}, // L2 miss: root field not yet cached + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts"}, // L2 miss: User not yet cached + }, + L2Writes: []resolve.CacheWriteEvent{ + // Authorization: Bearer token-A → header hash prefix 11945571715631340836 + {CacheKey: `11945571715631340836:{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", ByteSize: 177, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `11945571715631340836:{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", ByteSize: 233, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `11945571715631340836:{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", ByteSize: 127, DataSource: "products", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `11945571715631340836:{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", ByteSize: 49, DataSource: "accounts", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + }, + FieldHashes: []resolve.EntityFieldHash{ + {EntityType: "Product", FieldName: "name", FieldHash: 1032923585965781586, KeyRaw: `{"upc":"top-1"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "Product", FieldName: "name", FieldHash: 2432227032303632641, KeyRaw: `{"upc":"top-2"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceSubgraph}, + }, + EntityTypes: []resolve.EntityTypeInfo{ + {TypeName: "Product", Count: 2, UniqueKeys: 2}, + {TypeName: "User", Count: 2, UniqueKeys: 1}, + }, + HeaderImpactEvents: []resolve.HeaderImpactEvent{ + {BaseKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, HeaderHash: 11945571715631340836, ResponseHash: responseHashes[`{"__typename":"Product","key":{"upc":"top-1"}}`], EntityType: "Product", DataSource: "reviews"}, + {BaseKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, HeaderHash: 11945571715631340836, ResponseHash: responseHashes[`{"__typename":"Product","key":{"upc":"top-2"}}`], EntityType: "Product", DataSource: "reviews"}, + {BaseKey: `{"__typename":"Query","field":"topProducts"}`, HeaderHash: 11945571715631340836, ResponseHash: responseHashes[`{"__typename":"Query","field":"topProducts"}`], EntityType: "Query", DataSource: "products"}, + {BaseKey: `{"__typename":"User","key":{"id":"1234"}}`, HeaderHash: 11945571715631340836, ResponseHash: responseHashes[`{"__typename":"User","key":{"id":"1234"}}`], EntityType: "User", DataSource: "accounts"}, + }, + }), snap1) + + // Request 2: Same headers → L2 hit → no fetch → empty analytics (except L2 reads) + tracker.Reset() + resp, headers = gqlClient.QueryStringWithHeaders(ctx, setup.GatewayServer.URL, + `query { topProducts { name reviews { body authorWithoutProvides { username } } } }`, nil, t) + assert.Equal(t, + `{"data":{"topProducts":[{"name":"Trilby","reviews":[{"body":"A highly effective form of birth control.","authorWithoutProvides":{"username":"Me"}}]},{"name":"Fedora","reviews":[{"body":"Fedoras are one of the most fashionable hats around and can look great with a variety of outfits.","authorWithoutProvides":{"username":"Me"}}]}]}}`, + string(resp)) + + snap2 := normalizeSnapshot(parseCacheAnalytics(t, headers)) + assert.Equal(t, normalizeSnapshot(resolve.CacheAnalyticsSnapshot{ + L2Reads: []resolve.CacheKeyEvent{ + {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyHit, DataSource: "reviews", ByteSize: 177}, // L2 hit: populated by request 1 + {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", Kind: resolve.CacheKeyHit, DataSource: "reviews", ByteSize: 233}, // L2 hit: populated by request 1 + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyHit, DataSource: "products", ByteSize: 127}, // L2 hit: root field cached by request 1 + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyHit, DataSource: "accounts", ByteSize: 49}, // L2 hit: User cached by request 1 + }, + // No L2Writes, no HeaderImpactEvents: all served from cache, no fresh fetches + FieldHashes: []resolve.EntityFieldHash{ + {EntityType: "Product", FieldName: "name", FieldHash: 1032923585965781586, KeyRaw: `{"upc":"top-1"}`, Source: resolve.FieldSourceL2}, + {EntityType: "Product", FieldName: "name", FieldHash: 2432227032303632641, KeyRaw: `{"upc":"top-2"}`, Source: resolve.FieldSourceL2}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceL2}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceL2}, + }, + EntityTypes: []resolve.EntityTypeInfo{ + {TypeName: "Product", Count: 2, UniqueKeys: 2}, + {TypeName: "User", Count: 2, UniqueKeys: 1}, + }, + }), snap2) + }) + + t.Run("no events when IncludeSubgraphHeaderPrefix is false", func(t *testing.T) { + tracker := newSubgraphCallTracker(http.DefaultTransport) + + setup := federationtesting.NewFederationSetup(addCachingGateway( + withCachingEnableART(false), + withCachingLoaderCache(map[string]resolve.LoaderCache{"default": NewFakeLoaderCache()}), + withHTTPClient(&http.Client{Transport: tracker}), + withCachingOptionsFunc(resolve.CachingOptions{EnableL2Cache: true, EnableCacheAnalytics: true}), + withSubgraphEntityCachingConfigs(engine.SubgraphCachingConfigs{ + { + SubgraphName: "products", + RootFieldCaching: plan.RootFieldCacheConfigurations{ + {TypeName: "Query", FieldName: "topProducts", CacheName: "default", TTL: 30 * time.Second}, + }, + }, + { + SubgraphName: "reviews", + EntityCaching: plan.EntityCacheConfigurations{ + {TypeName: "Product", CacheName: "default", TTL: 30 * time.Second}, + }, + }, + { + SubgraphName: "accounts", + EntityCaching: plan.EntityCacheConfigurations{ + {TypeName: "User", CacheName: "default", TTL: 30 * time.Second}, + }, + }, + }), + )) + t.Cleanup(setup.Close) + + gqlClient := NewGraphqlClient(http.DefaultClient) + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + tracker.Reset() + resp, headers := gqlClient.QueryStringWithHeaders(ctx, setup.GatewayServer.URL, + `query { topProducts { name reviews { body authorWithoutProvides { username } } } }`, nil, t) + assert.Equal(t, + `{"data":{"topProducts":[{"name":"Trilby","reviews":[{"body":"A highly effective form of birth control.","authorWithoutProvides":{"username":"Me"}}]},{"name":"Fedora","reviews":[{"body":"Fedoras are one of the most fashionable hats around and can look great with a variety of outfits.","authorWithoutProvides":{"username":"Me"}}]}]}}`, + string(resp)) + + snap := normalizeSnapshot(parseCacheAnalytics(t, headers)) + assert.Equal(t, normalizeSnapshot(resolve.CacheAnalyticsSnapshot{ + L2Reads: []resolve.CacheKeyEvent{ + {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews"}, + {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews"}, + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products"}, + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts"}, + }, + L2Writes: []resolve.CacheWriteEvent{ + {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", ByteSize: 177, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", ByteSize: 233, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", ByteSize: 127, DataSource: "products", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", ByteSize: 49, DataSource: "accounts", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, + }, + FieldHashes: []resolve.EntityFieldHash{ + {EntityType: "Product", FieldName: "name", FieldHash: 1032923585965781586, KeyRaw: `{"upc":"top-1"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "Product", FieldName: "name", FieldHash: 2432227032303632641, KeyRaw: `{"upc":"top-2"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceSubgraph}, + {EntityType: "User", FieldName: "username", FieldHash: 4957449860898447395, KeyRaw: `{"id":"1234"}`, Source: resolve.FieldSourceSubgraph}, + }, + EntityTypes: []resolve.EntityTypeInfo{ + {TypeName: "Product", Count: 2, UniqueKeys: 2}, + {TypeName: "User", Count: 2, UniqueKeys: 1}, + }, + // No HeaderImpactEvents: IncludeSubgraphHeaderPrefix is false + }), snap) + }) +} diff --git a/execution/engine/federation_caching_helpers_test.go b/execution/engine/federation_caching_helpers_test.go index 0ec1cdbf20..8fa2423bae 100644 --- a/execution/engine/federation_caching_helpers_test.go +++ b/execution/engine/federation_caching_helpers_test.go @@ -14,6 +14,7 @@ import ( "testing" "time" + "github.com/cespare/xxhash/v2" "github.com/jensneuse/abstractlogger" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -179,6 +180,62 @@ func (m *mockSubgraphHeadersBuilder) HashAll() uint64 { return result } +// headerForwardingMock implements SubgraphHeadersBuilder with actual HTTP headers. +// Unlike mockSubgraphHeadersBuilder (which returns nil headers + manual hashes), +// this returns real HTTP headers and computes hashes from their content. +type headerForwardingMock struct { + mu sync.RWMutex + headers map[string]http.Header +} + +func (m *headerForwardingMock) HeadersForSubgraph(subgraphName string) (http.Header, uint64) { + m.mu.RLock() + defer m.mu.RUnlock() + h := m.headers[subgraphName] + if h == nil { + return nil, 0 + } + hash := hashHeaders(h) + // Clone to prevent mutation by downstream code (makeHTTPRequest adds Accept, Content-Type, etc.) + clone := h.Clone() + return clone, hash +} + +func (m *headerForwardingMock) HashAll() uint64 { + m.mu.RLock() + defer m.mu.RUnlock() + var result uint64 + for _, h := range m.headers { + result ^= hashHeaders(h) + } + return result +} + +func (m *headerForwardingMock) setAll(h http.Header) { + m.mu.Lock() + defer m.mu.Unlock() + for sg := range m.headers { + m.headers[sg] = h + } +} + +// hashHeaders computes a deterministic hash of HTTP headers using sorted key-value pairs. +func hashHeaders(h http.Header) uint64 { + keys := make([]string, 0, len(h)) + for k := range h { + keys = append(keys, k) + } + sort.Strings(keys) + var buf []byte + for _, k := range keys { + buf = append(buf, k...) + for _, v := range h[k] { + buf = append(buf, v...) + } + } + return xxhash.Sum64(buf) +} + func cachingTestQueryPath(name string) string { return path.Join("..", "federationtesting", "testdata", name) } @@ -891,6 +948,22 @@ func normalizeSnapshot(snap resolve.CacheAnalyticsSnapshot) resolve.CacheAnalyti snap.MutationEvents = sorted } + // Sort HeaderImpactEvents for deterministic comparison + if snap.HeaderImpactEvents != nil { + sorted := make([]resolve.HeaderImpactEvent, len(snap.HeaderImpactEvents)) + copy(sorted, snap.HeaderImpactEvents) + sort.Slice(sorted, func(i, j int) bool { + if sorted[i].BaseKey != sorted[j].BaseKey { + return sorted[i].BaseKey < sorted[j].BaseKey + } + if sorted[i].HeaderHash != sorted[j].HeaderHash { + return sorted[i].HeaderHash < sorted[j].HeaderHash + } + return sorted[i].DataSource < sorted[j].DataSource + }) + snap.HeaderImpactEvents = sorted + } + // Zero out non-deterministic FetchTimings (DurationMs varies between runs) // Use normalizeFetchTimings() when you need to assert FetchTimings fields. snap.FetchTimings = nil @@ -924,6 +997,9 @@ func normalizeSnapshot(snap resolve.CacheAnalyticsSnapshot) resolve.CacheAnalyti if len(snap.MutationEvents) == 0 { snap.MutationEvents = nil } + if len(snap.HeaderImpactEvents) == 0 { + snap.HeaderImpactEvents = nil + } return snap } diff --git a/v2/pkg/engine/resolve/cache_analytics.go b/v2/pkg/engine/resolve/cache_analytics.go index ccf0e8171d..cb22447034 100644 --- a/v2/pkg/engine/resolve/cache_analytics.go +++ b/v2/pkg/engine/resolve/cache_analytics.go @@ -140,6 +140,18 @@ type MutationEvent struct { FreshBytes int } +// HeaderImpactEvent records a fresh fetch that wrote to L2 cache with header-prefixed keys. +// A cross-request consumer can aggregate these events: when the same BaseKey appears with +// different HeaderHash values but identical ResponseHash values, the forwarded headers +// do not affect the subgraph response, and IncludeSubgraphHeaderPrefix can be disabled. +type HeaderImpactEvent struct { + BaseKey string // cache key WITHOUT header prefix (stable identity for grouping) + HeaderHash uint64 // hash of forwarded headers for this subgraph + ResponseHash uint64 // xxhash of the response value bytes written to L2 + EntityType string // entity type (e.g., "User") or "Query" for root fields + DataSource string // subgraph name +} + // CacheAnalyticsCollector accumulates cache analytics events during request execution. // All methods are designed to be called from a single goroutine (main thread) except // where noted. L2 events from goroutines are accumulated on per-result slices and @@ -156,8 +168,9 @@ type CacheAnalyticsCollector struct { l2ErrorEvents []SubgraphErrorEvent // accumulated in goroutines, merged on main thread l2FetchTimings []FetchTimingEvent // accumulated in goroutines, merged on main thread shadowComparisons []ShadowComparisonEvent // shadow mode staleness comparison events - mutationEvents []MutationEvent // mutation entity impact events - xxh *xxhash.Digest + mutationEvents []MutationEvent // mutation entity impact events + headerImpactEvents []HeaderImpactEvent // header impact events for L2 writes with header prefix + xxh *xxhash.Digest } // NewCacheAnalyticsCollector creates a new collector with pre-allocated slices. @@ -304,6 +317,11 @@ func (c *CacheAnalyticsCollector) RecordMutationEvent(event MutationEvent) { c.mutationEvents = append(c.mutationEvents, event) } +// RecordHeaderImpactEvent records a header impact event. Main thread only. +func (c *CacheAnalyticsCollector) RecordHeaderImpactEvent(event HeaderImpactEvent) { + c.headerImpactEvents = append(c.headerImpactEvents, event) +} + // EntitySource returns the source for a given entity instance. // Returns FieldSourceSubgraph if no record is found (the default). func (c *CacheAnalyticsCollector) EntitySource(entityType, keyJSON string) FieldSource { @@ -321,13 +339,14 @@ func (c *CacheAnalyticsCollector) EntitySource(entityType, keyJSON string) Field // one per CacheKey for writes, and one per CacheKey for shadow comparisons. func (c *CacheAnalyticsCollector) Snapshot() CacheAnalyticsSnapshot { snap := CacheAnalyticsSnapshot{ - L1Reads: deduplicateKeyEvents(c.l1KeyEvents), - L2Reads: deduplicateKeyEvents(c.l2KeyEvents), - FieldHashes: c.fieldHashes, - FetchTimings: c.fetchTimings, - ErrorEvents: c.errorEvents, - ShadowComparisons: deduplicateShadowComparisons(c.shadowComparisons), - MutationEvents: c.mutationEvents, + L1Reads: deduplicateKeyEvents(c.l1KeyEvents), + L2Reads: deduplicateKeyEvents(c.l2KeyEvents), + FieldHashes: c.fieldHashes, + FetchTimings: c.fetchTimings, + ErrorEvents: c.errorEvents, + ShadowComparisons: deduplicateShadowComparisons(c.shadowComparisons), + MutationEvents: c.mutationEvents, + HeaderImpactEvents: deduplicateHeaderImpactEvents(c.headerImpactEvents), } // Split write events into L1 and L2, then deduplicate each @@ -419,6 +438,29 @@ func deduplicateShadowComparisons(events []ShadowComparisonEvent) []ShadowCompar return out } +// deduplicateHeaderImpactEvents removes duplicate header impact events, +// keeping the first occurrence for each (BaseKey, HeaderHash) pair. +func deduplicateHeaderImpactEvents(events []HeaderImpactEvent) []HeaderImpactEvent { + if len(events) == 0 { + return events + } + type dedupKey struct { + baseKey string + headerHash uint64 + } + seen := make(map[dedupKey]struct{}, len(events)) + out := make([]HeaderImpactEvent, 0, len(events)) + for _, ev := range events { + k := dedupKey{baseKey: ev.BaseKey, headerHash: ev.HeaderHash} + if _, ok := seen[k]; ok { + continue + } + seen[k] = struct{}{} + out = append(out, ev) + } + return out +} + // CacheAnalyticsSnapshot is a read-only snapshot of cache analytics data. // Requires EnableCacheAnalytics to be set; returns empty when disabled. type CacheAnalyticsSnapshot struct { @@ -447,6 +489,9 @@ type CacheAnalyticsSnapshot struct { // Mutation entity impact events MutationEvents []MutationEvent + + // Header impact events (L2 writes with header-prefixed keys) + HeaderImpactEvents []HeaderImpactEvent } // L1HitRate returns the L1 cache hit rate as a float64 in [0, 1]. diff --git a/v2/pkg/engine/resolve/cache_analytics_test.go b/v2/pkg/engine/resolve/cache_analytics_test.go index 769f90a5c0..b77f85e2cc 100644 --- a/v2/pkg/engine/resolve/cache_analytics_test.go +++ b/v2/pkg/engine/resolve/cache_analytics_test.go @@ -1762,3 +1762,55 @@ func TestSnapshotDeduplication(t *testing.T) { assert.Equal(t, int64(49), snap.CachedBytesServed(), "bytes served from 1 unique hit") }) } + +func TestCacheAnalyticsCollector_HeaderImpactEvents(t *testing.T) { + t.Run("records and deduplicates header impact events", func(t *testing.T) { + c := NewCacheAnalyticsCollector() + + // Same (BaseKey, HeaderHash) → deduplicated to 1 + c.RecordHeaderImpactEvent(HeaderImpactEvent{ + BaseKey: "key1", HeaderHash: 111, ResponseHash: 999, + EntityType: "User", DataSource: "accounts", + }) + c.RecordHeaderImpactEvent(HeaderImpactEvent{ + BaseKey: "key1", HeaderHash: 111, ResponseHash: 999, + EntityType: "User", DataSource: "accounts", + }) + + // Same BaseKey + different HeaderHash → separate event + c.RecordHeaderImpactEvent(HeaderImpactEvent{ + BaseKey: "key1", HeaderHash: 222, ResponseHash: 999, + EntityType: "User", DataSource: "accounts", + }) + + // Different BaseKey → separate event + c.RecordHeaderImpactEvent(HeaderImpactEvent{ + BaseKey: "key2", HeaderHash: 111, ResponseHash: 888, + EntityType: "Product", DataSource: "products", + }) + + snap := c.Snapshot() + assert.Equal(t, 3, len(snap.HeaderImpactEvents), "should have exactly 3 deduplicated events") + + assert.Equal(t, HeaderImpactEvent{ + BaseKey: "key1", HeaderHash: 111, ResponseHash: 999, + EntityType: "User", DataSource: "accounts", + }, snap.HeaderImpactEvents[0]) + + assert.Equal(t, HeaderImpactEvent{ + BaseKey: "key1", HeaderHash: 222, ResponseHash: 999, + EntityType: "User", DataSource: "accounts", + }, snap.HeaderImpactEvents[1]) + + assert.Equal(t, HeaderImpactEvent{ + BaseKey: "key2", HeaderHash: 111, ResponseHash: 888, + EntityType: "Product", DataSource: "products", + }, snap.HeaderImpactEvents[2]) + }) + + t.Run("empty when no events recorded", func(t *testing.T) { + c := NewCacheAnalyticsCollector() + snap := c.Snapshot() + assert.Equal(t, 0, len(snap.HeaderImpactEvents), "should have no header impact events") + }) +} diff --git a/v2/pkg/engine/resolve/loader.go b/v2/pkg/engine/resolve/loader.go index ec4c2dd689..fbc592173d 100644 --- a/v2/pkg/engine/resolve/loader.go +++ b/v2/pkg/engine/resolve/loader.go @@ -166,6 +166,11 @@ type result struct { // Set during prepareCacheKeys, used by L2 write recording. analyticsEntityType string + // headerHash stores the subgraph header hash computed during prepareCacheKeys. + // Non-zero only when IncludeSubgraphHeaderPrefix is true and headers exist. + // Used by updateL2Cache to record HeaderImpactEvents. + headerHash uint64 + // shadowCachedValues stores cached L2 values when shadow mode is active. // After fresh data arrives, these are compared to detect staleness. // Key is the index into l1CacheKeys (entity fetches) or l2CacheKeys (root fetches). diff --git a/v2/pkg/engine/resolve/loader_cache.go b/v2/pkg/engine/resolve/loader_cache.go index f801e3614c..34d7f8abea 100644 --- a/v2/pkg/engine/resolve/loader_cache.go +++ b/v2/pkg/engine/resolve/loader_cache.go @@ -167,6 +167,7 @@ func (l *Loader) prepareCacheKeys(info *FetchInfo, cfg FetchCacheConfiguration, var buf [20]byte b := strconv.AppendUint(buf[:0], headersHash, 10) prefix = string(b) + res.headerHash = headersHash } // Render L2 cache keys with prefix @@ -874,6 +875,43 @@ func (l *Loader) updateL2Cache(res *result) { l.ctx.cacheAnalytics.RecordWrite(CacheLevelL2, res.analyticsEntityType, entry.Key, res.ds.Name, len(entry.Value), res.cacheConfig.TTL) } } + + // Record header impact events for cross-request analysis. + // Only when IncludeSubgraphHeaderPrefix is active (headerHash != 0). + if l.ctx.cacheAnalyticsEnabled() && res.headerHash != 0 && len(res.l1CacheKeys) > 0 { + // Build L2-to-L1 key mapping. L1 and L2 cache keys are generated from the same + // inputItems in prepareCacheKeys, so they have matching indices. + l2ToBaseKey := make(map[string]string, len(res.l2CacheKeys)) + for i, l2ck := range res.l2CacheKeys { + if i < len(res.l1CacheKeys) { + for j, l2key := range l2ck.Keys { + if j < len(res.l1CacheKeys[i].Keys) { + l2ToBaseKey[l2key] = res.l1CacheKeys[i].Keys[j] + } + } + } + } + + xxh := l.ctx.cacheAnalytics.xxh + for _, entry := range cacheEntries { + if entry == nil { + continue + } + baseKey, ok := l2ToBaseKey[entry.Key] + if !ok { + continue + } + xxh.Reset() + _, _ = xxh.Write(entry.Value) + l.ctx.cacheAnalytics.RecordHeaderImpactEvent(HeaderImpactEvent{ + BaseKey: baseKey, + HeaderHash: res.headerHash, + ResponseHash: xxh.Sum64(), + EntityType: res.analyticsEntityType, + DataSource: res.ds.Name, + }) + } + } } // saveShadowCachedValue saves a cached L2 value for later staleness comparison in shadow mode. From 0509aca78a959f758df129e4c856c3052b93667a Mon Sep 17 00:00:00 2001 From: Jens Neuse Date: Fri, 6 Mar 2026 08:30:53 +0100 Subject: [PATCH 2/5] style: fix gci import formatting for CI linter Co-Authored-By: Claude Opus 4.6 --- .../federation_caching_analytics_test.go | 16 +++++++------- v2/pkg/engine/resolve/cache_analytics.go | 22 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/execution/engine/federation_caching_analytics_test.go b/execution/engine/federation_caching_analytics_test.go index 69c0915f07..66a874a79f 100644 --- a/execution/engine/federation_caching_analytics_test.go +++ b/execution/engine/federation_caching_analytics_test.go @@ -1851,8 +1851,8 @@ func TestHeaderImpactAnalyticsE2E(t *testing.T) { L2Reads: []resolve.CacheKeyEvent{ {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews", Shadow: true}, // Shadow L2 miss: cache empty {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews", Shadow: true}, // Shadow L2 miss: cache empty - {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products", Shadow: false}, // L2 miss: shadow mode not implemented for root fields - {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts", Shadow: true}, // Shadow L2 miss: User not yet cached + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products", Shadow: false}, // L2 miss: shadow mode not implemented for root fields + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts", Shadow: true}, // Shadow L2 miss: User not yet cached }, L2Writes: []resolve.CacheWriteEvent{ {CacheKey: `11945571715631340836:{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", ByteSize: 177, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, @@ -1896,8 +1896,8 @@ func TestHeaderImpactAnalyticsE2E(t *testing.T) { L2Reads: []resolve.CacheKeyEvent{ {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews", Shadow: true}, // token-B prefix not in cache {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews", Shadow: true}, // token-B prefix not in cache - {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products", Shadow: false}, // shadow mode not implemented for root fields - {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts", Shadow: true}, // token-B prefix not in cache + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products", Shadow: false}, // shadow mode not implemented for root fields + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts", Shadow: true}, // token-B prefix not in cache }, L2Writes: []resolve.CacheWriteEvent{ {CacheKey: `4753115417090238877:{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", ByteSize: 177, DataSource: "reviews", CacheLevel: resolve.CacheLevelL2, TTL: 30 * time.Second}, @@ -1987,8 +1987,8 @@ func TestHeaderImpactAnalyticsE2E(t *testing.T) { L2Reads: []resolve.CacheKeyEvent{ {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews"}, // L2 miss: cache empty {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", Kind: resolve.CacheKeyMiss, DataSource: "reviews"}, // L2 miss: cache empty - {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products"}, // L2 miss: root field not yet cached - {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts"}, // L2 miss: User not yet cached + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyMiss, DataSource: "products"}, // L2 miss: root field not yet cached + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyMiss, DataSource: "accounts"}, // L2 miss: User not yet cached }, L2Writes: []resolve.CacheWriteEvent{ // Authorization: Bearer token-A → header hash prefix 11945571715631340836 @@ -2028,8 +2028,8 @@ func TestHeaderImpactAnalyticsE2E(t *testing.T) { L2Reads: []resolve.CacheKeyEvent{ {CacheKey: `{"__typename":"Product","key":{"upc":"top-1"}}`, EntityType: "Product", Kind: resolve.CacheKeyHit, DataSource: "reviews", ByteSize: 177}, // L2 hit: populated by request 1 {CacheKey: `{"__typename":"Product","key":{"upc":"top-2"}}`, EntityType: "Product", Kind: resolve.CacheKeyHit, DataSource: "reviews", ByteSize: 233}, // L2 hit: populated by request 1 - {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyHit, DataSource: "products", ByteSize: 127}, // L2 hit: root field cached by request 1 - {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyHit, DataSource: "accounts", ByteSize: 49}, // L2 hit: User cached by request 1 + {CacheKey: `{"__typename":"Query","field":"topProducts"}`, EntityType: "Query", Kind: resolve.CacheKeyHit, DataSource: "products", ByteSize: 127}, // L2 hit: root field cached by request 1 + {CacheKey: `{"__typename":"User","key":{"id":"1234"}}`, EntityType: "User", Kind: resolve.CacheKeyHit, DataSource: "accounts", ByteSize: 49}, // L2 hit: User cached by request 1 }, // No L2Writes, no HeaderImpactEvents: all served from cache, no fresh fetches FieldHashes: []resolve.EntityFieldHash{ diff --git a/v2/pkg/engine/resolve/cache_analytics.go b/v2/pkg/engine/resolve/cache_analytics.go index cb22447034..0768201384 100644 --- a/v2/pkg/engine/resolve/cache_analytics.go +++ b/v2/pkg/engine/resolve/cache_analytics.go @@ -157,17 +157,17 @@ type HeaderImpactEvent struct { // where noted. L2 events from goroutines are accumulated on per-result slices and // merged on the main thread via MergeL2Events. type CacheAnalyticsCollector struct { - l1KeyEvents []CacheKeyEvent - l2KeyEvents []CacheKeyEvent - writeEvents []CacheWriteEvent - fieldHashes []EntityFieldHash // flat slice (was: nested maps) - entityCounts []entityCount // simple type→count (was: map) - entitySources []entitySourceRecord // records where each entity's data came from - fetchTimings []FetchTimingEvent // main thread timings - errorEvents []SubgraphErrorEvent // main thread errors - l2ErrorEvents []SubgraphErrorEvent // accumulated in goroutines, merged on main thread - l2FetchTimings []FetchTimingEvent // accumulated in goroutines, merged on main thread - shadowComparisons []ShadowComparisonEvent // shadow mode staleness comparison events + l1KeyEvents []CacheKeyEvent + l2KeyEvents []CacheKeyEvent + writeEvents []CacheWriteEvent + fieldHashes []EntityFieldHash // flat slice (was: nested maps) + entityCounts []entityCount // simple type→count (was: map) + entitySources []entitySourceRecord // records where each entity's data came from + fetchTimings []FetchTimingEvent // main thread timings + errorEvents []SubgraphErrorEvent // main thread errors + l2ErrorEvents []SubgraphErrorEvent // accumulated in goroutines, merged on main thread + l2FetchTimings []FetchTimingEvent // accumulated in goroutines, merged on main thread + shadowComparisons []ShadowComparisonEvent // shadow mode staleness comparison events mutationEvents []MutationEvent // mutation entity impact events headerImpactEvents []HeaderImpactEvent // header impact events for L2 writes with header prefix xxh *xxhash.Digest From d4d5189c720aebb4f99d8fc47f2cb579f11cdc66 Mon Sep 17 00:00:00 2001 From: Jens Neuse Date: Fri, 6 Mar 2026 08:36:03 +0100 Subject: [PATCH 3/5] fix: include DataSource and ResponseHash in header impact event dedup key Broaden the dedup key to include all distinguishing fields so events from different data sources or with different response hashes are not collapsed. Co-Authored-By: Claude Opus 4.6 --- v2/pkg/engine/resolve/cache_analytics.go | 10 ++++++---- v2/pkg/engine/resolve/cache_analytics_test.go | 12 ++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/v2/pkg/engine/resolve/cache_analytics.go b/v2/pkg/engine/resolve/cache_analytics.go index 0768201384..e60f21b8c0 100644 --- a/v2/pkg/engine/resolve/cache_analytics.go +++ b/v2/pkg/engine/resolve/cache_analytics.go @@ -439,19 +439,21 @@ func deduplicateShadowComparisons(events []ShadowComparisonEvent) []ShadowCompar } // deduplicateHeaderImpactEvents removes duplicate header impact events, -// keeping the first occurrence for each (BaseKey, HeaderHash) pair. +// keeping the first occurrence for each unique event identity. func deduplicateHeaderImpactEvents(events []HeaderImpactEvent) []HeaderImpactEvent { if len(events) == 0 { return events } type dedupKey struct { - baseKey string - headerHash uint64 + baseKey string + headerHash uint64 + responseHash uint64 + dataSource string } seen := make(map[dedupKey]struct{}, len(events)) out := make([]HeaderImpactEvent, 0, len(events)) for _, ev := range events { - k := dedupKey{baseKey: ev.BaseKey, headerHash: ev.HeaderHash} + k := dedupKey{baseKey: ev.BaseKey, headerHash: ev.HeaderHash, responseHash: ev.ResponseHash, dataSource: ev.DataSource} if _, ok := seen[k]; ok { continue } diff --git a/v2/pkg/engine/resolve/cache_analytics_test.go b/v2/pkg/engine/resolve/cache_analytics_test.go index b77f85e2cc..832f79f828 100644 --- a/v2/pkg/engine/resolve/cache_analytics_test.go +++ b/v2/pkg/engine/resolve/cache_analytics_test.go @@ -1808,6 +1808,18 @@ func TestCacheAnalyticsCollector_HeaderImpactEvents(t *testing.T) { }, snap.HeaderImpactEvents[2]) }) + t.Run("same base key and header hash but different datasource are preserved", func(t *testing.T) { + c := NewCacheAnalyticsCollector() + c.RecordHeaderImpactEvent(HeaderImpactEvent{ + BaseKey: "key1", HeaderHash: 111, ResponseHash: 999, EntityType: "User", DataSource: "accounts", + }) + c.RecordHeaderImpactEvent(HeaderImpactEvent{ + BaseKey: "key1", HeaderHash: 111, ResponseHash: 777, EntityType: "User", DataSource: "reviews", + }) + snap := c.Snapshot() + assert.Equal(t, 2, len(snap.HeaderImpactEvents), "different datasource/response should not be deduped") + }) + t.Run("empty when no events recorded", func(t *testing.T) { c := NewCacheAnalyticsCollector() snap := c.Snapshot() From 86d4087049f19c0334c0a49b382ed387dca21b14 Mon Sep 17 00:00:00 2001 From: Jens Neuse Date: Fri, 6 Mar 2026 08:38:37 +0100 Subject: [PATCH 4/5] refactor: use HeaderImpactEvent directly as dedup map key Co-Authored-By: Claude Opus 4.6 --- v2/pkg/engine/resolve/cache_analytics.go | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/v2/pkg/engine/resolve/cache_analytics.go b/v2/pkg/engine/resolve/cache_analytics.go index e60f21b8c0..b4836c56f0 100644 --- a/v2/pkg/engine/resolve/cache_analytics.go +++ b/v2/pkg/engine/resolve/cache_analytics.go @@ -444,20 +444,13 @@ func deduplicateHeaderImpactEvents(events []HeaderImpactEvent) []HeaderImpactEve if len(events) == 0 { return events } - type dedupKey struct { - baseKey string - headerHash uint64 - responseHash uint64 - dataSource string - } - seen := make(map[dedupKey]struct{}, len(events)) + seen := make(map[HeaderImpactEvent]struct{}, len(events)) out := make([]HeaderImpactEvent, 0, len(events)) for _, ev := range events { - k := dedupKey{baseKey: ev.BaseKey, headerHash: ev.HeaderHash, responseHash: ev.ResponseHash, dataSource: ev.DataSource} - if _, ok := seen[k]; ok { + if _, ok := seen[ev]; ok { continue } - seen[k] = struct{}{} + seen[ev] = struct{}{} out = append(out, ev) } return out From 20b3e25206571b9ec6afebefa3ff56b570500363 Mon Sep 17 00:00:00 2001 From: Jens Neuse Date: Fri, 6 Mar 2026 08:39:43 +0100 Subject: [PATCH 5/5] test: add exhaustive unit tests for header impact event dedup Test each field independently to verify that changing any single field prevents deduplication while exact duplicates are collapsed. Co-Authored-By: Claude Opus 4.6 --- v2/pkg/engine/resolve/cache_analytics_test.go | 105 ++++++++++-------- 1 file changed, 60 insertions(+), 45 deletions(-) diff --git a/v2/pkg/engine/resolve/cache_analytics_test.go b/v2/pkg/engine/resolve/cache_analytics_test.go index 832f79f828..637c7ef548 100644 --- a/v2/pkg/engine/resolve/cache_analytics_test.go +++ b/v2/pkg/engine/resolve/cache_analytics_test.go @@ -1764,65 +1764,80 @@ func TestSnapshotDeduplication(t *testing.T) { } func TestCacheAnalyticsCollector_HeaderImpactEvents(t *testing.T) { - t.Run("records and deduplicates header impact events", func(t *testing.T) { + base := HeaderImpactEvent{ + BaseKey: "key1", HeaderHash: 111, ResponseHash: 999, + EntityType: "User", DataSource: "accounts", + } + + t.Run("exact duplicates are collapsed", func(t *testing.T) { c := NewCacheAnalyticsCollector() + c.RecordHeaderImpactEvent(base) + c.RecordHeaderImpactEvent(base) + c.RecordHeaderImpactEvent(base) + snap := c.Snapshot() + assert.Equal(t, []HeaderImpactEvent{base}, snap.HeaderImpactEvents) + }) - // Same (BaseKey, HeaderHash) → deduplicated to 1 - c.RecordHeaderImpactEvent(HeaderImpactEvent{ - BaseKey: "key1", HeaderHash: 111, ResponseHash: 999, - EntityType: "User", DataSource: "accounts", - }) - c.RecordHeaderImpactEvent(HeaderImpactEvent{ - BaseKey: "key1", HeaderHash: 111, ResponseHash: 999, - EntityType: "User", DataSource: "accounts", - }) + t.Run("different BaseKey is preserved", func(t *testing.T) { + c := NewCacheAnalyticsCollector() + other := base + other.BaseKey = "key2" + c.RecordHeaderImpactEvent(base) + c.RecordHeaderImpactEvent(other) + snap := c.Snapshot() + assert.Equal(t, []HeaderImpactEvent{base, other}, snap.HeaderImpactEvents) + }) - // Same BaseKey + different HeaderHash → separate event - c.RecordHeaderImpactEvent(HeaderImpactEvent{ - BaseKey: "key1", HeaderHash: 222, ResponseHash: 999, - EntityType: "User", DataSource: "accounts", - }) + t.Run("different HeaderHash is preserved", func(t *testing.T) { + c := NewCacheAnalyticsCollector() + other := base + other.HeaderHash = 222 + c.RecordHeaderImpactEvent(base) + c.RecordHeaderImpactEvent(other) + snap := c.Snapshot() + assert.Equal(t, []HeaderImpactEvent{base, other}, snap.HeaderImpactEvents) + }) - // Different BaseKey → separate event - c.RecordHeaderImpactEvent(HeaderImpactEvent{ - BaseKey: "key2", HeaderHash: 111, ResponseHash: 888, - EntityType: "Product", DataSource: "products", - }) + t.Run("different ResponseHash is preserved", func(t *testing.T) { + c := NewCacheAnalyticsCollector() + other := base + other.ResponseHash = 888 + c.RecordHeaderImpactEvent(base) + c.RecordHeaderImpactEvent(other) + snap := c.Snapshot() + assert.Equal(t, []HeaderImpactEvent{base, other}, snap.HeaderImpactEvents) + }) + + t.Run("different EntityType is preserved", func(t *testing.T) { + c := NewCacheAnalyticsCollector() + other := base + other.EntityType = "Product" + c.RecordHeaderImpactEvent(base) + c.RecordHeaderImpactEvent(other) + snap := c.Snapshot() + assert.Equal(t, []HeaderImpactEvent{base, other}, snap.HeaderImpactEvents) + }) + t.Run("different DataSource is preserved", func(t *testing.T) { + c := NewCacheAnalyticsCollector() + other := base + other.DataSource = "reviews" + c.RecordHeaderImpactEvent(base) + c.RecordHeaderImpactEvent(other) snap := c.Snapshot() - assert.Equal(t, 3, len(snap.HeaderImpactEvents), "should have exactly 3 deduplicated events") - - assert.Equal(t, HeaderImpactEvent{ - BaseKey: "key1", HeaderHash: 111, ResponseHash: 999, - EntityType: "User", DataSource: "accounts", - }, snap.HeaderImpactEvents[0]) - - assert.Equal(t, HeaderImpactEvent{ - BaseKey: "key1", HeaderHash: 222, ResponseHash: 999, - EntityType: "User", DataSource: "accounts", - }, snap.HeaderImpactEvents[1]) - - assert.Equal(t, HeaderImpactEvent{ - BaseKey: "key2", HeaderHash: 111, ResponseHash: 888, - EntityType: "Product", DataSource: "products", - }, snap.HeaderImpactEvents[2]) + assert.Equal(t, []HeaderImpactEvent{base, other}, snap.HeaderImpactEvents) }) - t.Run("same base key and header hash but different datasource are preserved", func(t *testing.T) { + t.Run("single event is preserved", func(t *testing.T) { c := NewCacheAnalyticsCollector() - c.RecordHeaderImpactEvent(HeaderImpactEvent{ - BaseKey: "key1", HeaderHash: 111, ResponseHash: 999, EntityType: "User", DataSource: "accounts", - }) - c.RecordHeaderImpactEvent(HeaderImpactEvent{ - BaseKey: "key1", HeaderHash: 111, ResponseHash: 777, EntityType: "User", DataSource: "reviews", - }) + c.RecordHeaderImpactEvent(base) snap := c.Snapshot() - assert.Equal(t, 2, len(snap.HeaderImpactEvents), "different datasource/response should not be deduped") + assert.Equal(t, []HeaderImpactEvent{base}, snap.HeaderImpactEvents) }) t.Run("empty when no events recorded", func(t *testing.T) { c := NewCacheAnalyticsCollector() snap := c.Snapshot() - assert.Equal(t, 0, len(snap.HeaderImpactEvents), "should have no header impact events") + assert.Equal(t, 0, len(snap.HeaderImpactEvents)) }) }