diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..35442ba9c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +.github/** merge=ours diff --git a/pkg/registry/file/applicationprofile_processor.go b/pkg/registry/file/applicationprofile_processor.go index 09d6b7d87..920db6262 100644 --- a/pkg/registry/file/applicationprofile_processor.go +++ b/pkg/registry/file/applicationprofile_processor.go @@ -17,10 +17,8 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) -const ( - OpenDynamicThreshold = 50 - EndpointDynamicThreshold = 100 -) +// Thresholds are defined in dynamicpathdetector.OpenDynamicThreshold and +// dynamicpathdetector.EndpointDynamicThreshold (single source of truth). type ApplicationProfileProcessor struct { defaultNamespace string @@ -109,12 +107,12 @@ func (a *ApplicationProfileProcessor) SetStorage(containerProfileStorage Contain } func deflateApplicationProfileContainer(container softwarecomposition.ApplicationProfileContainer, sbomSet mapset.Set[string]) softwarecomposition.ApplicationProfileContainer { - opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzer(OpenDynamicThreshold), sbomSet) + opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs), sbomSet) if err != nil { logger.L().Debug("falling back to DeflateStringer for opens", loggerhelpers.Error(err)) opens = DeflateStringer(container.Opens) } - endpoints := dynamicpathdetector.AnalyzeEndpoints(&container.Endpoints, dynamicpathdetector.NewPathAnalyzer(EndpointDynamicThreshold)) + endpoints := dynamicpathdetector.AnalyzeEndpoints(&container.Endpoints, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil)) identifiedCallStacks := callstack.UnifyIdentifiedCallStacks(container.IdentifiedCallStacks) return softwarecomposition.ApplicationProfileContainer{ diff --git a/pkg/registry/file/applicationprofile_processor_test.go b/pkg/registry/file/applicationprofile_processor_test.go index e727d20b6..55e9688d3 100644 --- a/pkg/registry/file/applicationprofile_processor_test.go +++ b/pkg/registry/file/applicationprofile_processor_test.go @@ -4,17 +4,26 @@ import ( "context" "fmt" "slices" + "strings" "testing" + mapset "github.com/deckarep/golang-set/v2" "github.com/kubescape/k8s-interface/instanceidhandler/v1/helpers" "github.com/kubescape/storage/pkg/apis/softwarecomposition" "github.com/kubescape/storage/pkg/apis/softwarecomposition/consts" "github.com/kubescape/storage/pkg/config" + "github.com/kubescape/storage/pkg/registry/file/dynamicpathdetector" "github.com/stretchr/testify/assert" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) +// openThreshold returns the collapse threshold used by deflateApplicationProfileContainer +// for file-open paths. NewPathAnalyzerWithConfigs uses OpenDynamicThreshold as the default. +func openThreshold() int { + return dynamicpathdetector.OpenDynamicThreshold +} + var ap = softwarecomposition.ApplicationProfile{ ObjectMeta: v1.ObjectMeta{ Annotations: map[string]string{}, @@ -247,3 +256,184 @@ func TestDeflateRulePolicies(t *testing.T) { }) } } + +// generateSOOpens creates N unique .so OpenCalls under /usr/lib/x86_64-linux-gnu/ +func generateSOOpens(n int) []softwarecomposition.OpenCalls { + opens := make([]softwarecomposition.OpenCalls, n) + for i := 0; i < n; i++ { + opens[i] = softwarecomposition.OpenCalls{ + Path: fmt.Sprintf("/usr/lib/x86_64-linux-gnu/lib%d.so.%d", i, i%5), + Flags: []string{"O_RDONLY", "O_CLOEXEC"}, + } + } + return opens +} + +func TestDeflateApplicationProfileContainer_CollapsesManyOpens(t *testing.T) { + // Generate enough opens to exceed the default threshold used by NewPathAnalyzerWithConfigs + numOpens := openThreshold() + 1 + opens := generateSOOpens(numOpens) + + container := softwarecomposition.ApplicationProfileContainer{ + Name: "test-container", + Opens: opens, + } + + result := deflateApplicationProfileContainer(container, nil) + + assert.Less(t, len(result.Opens), numOpens, + "%d .so files should be collapsed, got %d opens", numOpens, len(result.Opens)) + + // Verify collapsed paths contain dynamic or wildcard segments + for _, open := range result.Opens { + if strings.HasPrefix(open.Path, "/usr/lib/x86_64-linux-gnu/") { + assert.True(t, + strings.Contains(open.Path, "\u22ef") || strings.Contains(open.Path, "*"), + "path %q should contain a dynamic or wildcard segment", open.Path) + } + } + + // Flags should be preserved and merged + for _, open := range result.Opens { + assert.NotEmpty(t, open.Flags, "flags should be preserved after collapse") + } +} + +func TestDeflateApplicationProfileContainer_CollapsesWithSbomSet(t *testing.T) { + numOpens := openThreshold() + 1 + opens := generateSOOpens(numOpens) + + // Build sbomSet containing ALL the .so paths (realistic scenario) + sbomSet := mapset.NewSet[string]() + for _, open := range opens { + sbomSet.Add(open.Path) + } + + container := softwarecomposition.ApplicationProfileContainer{ + Name: "test-container", + Opens: opens, + } + + result := deflateApplicationProfileContainer(container, sbomSet) + + // Even though all paths are in SBOM, they should still be collapsed + assert.Less(t, len(result.Opens), numOpens, + "SBOM paths should be collapsed too, got %d opens", len(result.Opens)) +} + +func TestDeflateApplicationProfileContainer_MixedPathsCollapse(t *testing.T) { + var opens []softwarecomposition.OpenCalls + + // /usr/lib uses the default threshold from NewPathAnalyzerWithConfigs(OpenDynamicThreshold, ...) + usrLibThreshold := openThreshold() + for i := 0; i < usrLibThreshold+1; i++ { + opens = append(opens, softwarecomposition.OpenCalls{ + Path: fmt.Sprintf("/usr/lib/lib%d.so", i), + Flags: []string{"O_RDONLY"}, + }) + } + + // /etc uses the /etc config threshold from DefaultCollapseConfigs (100) + etcThreshold := 100 + for i := 0; i < etcThreshold+1; i++ { + opens = append(opens, softwarecomposition.OpenCalls{ + Path: fmt.Sprintf("/etc/conf%d.cfg", i), + Flags: []string{"O_RDONLY"}, + }) + } + + opens = append(opens, + softwarecomposition.OpenCalls{Path: "/tmp/file1.txt", Flags: []string{"O_RDWR"}}, + softwarecomposition.OpenCalls{Path: "/tmp/file2.txt", Flags: []string{"O_RDWR"}}, + ) + + container := softwarecomposition.ApplicationProfileContainer{ + Name: "test-container", + Opens: opens, + } + + result := deflateApplicationProfileContainer(container, nil) + + // Count paths by prefix + var usrLibPaths, etcPaths, tmpPaths int + for _, open := range result.Opens { + switch { + case strings.HasPrefix(open.Path, "/usr/lib/"): + usrLibPaths++ + case strings.HasPrefix(open.Path, "/etc/"): + etcPaths++ + case strings.HasPrefix(open.Path, "/tmp/"): + tmpPaths++ + } + } + + assert.LessOrEqual(t, usrLibPaths, 1, "/usr/lib/ paths should collapse to 1, got %d", usrLibPaths) + assert.LessOrEqual(t, etcPaths, 1, "/etc/ paths should collapse to 1, got %d", etcPaths) + assert.Equal(t, 2, tmpPaths, "/tmp/ paths should remain individual (below threshold)") +} + +// TestDeflateApplicationProfileContainer_NilSbomNoError verifies that nil sbomSet +// with a small number of opens (below threshold) works without error. +func TestDeflateApplicationProfileContainer_NilSbomNoError(t *testing.T) { + container := softwarecomposition.ApplicationProfileContainer{ + Name: "test-container", + Opens: []softwarecomposition.OpenCalls{ + {Path: "/etc/hosts", Flags: []string{"O_RDONLY"}}, + {Path: "/etc/resolv.conf", Flags: []string{"O_RDONLY"}}, + {Path: "/usr/lib/libc.so.6", Flags: []string{"O_RDONLY", "O_CLOEXEC"}}, + }, + } + + result := deflateApplicationProfileContainer(container, nil) + + // All 3 paths should remain (below any threshold) + assert.Equal(t, 3, len(result.Opens), "paths below threshold should not collapse") + // Paths should be sorted + for i := 1; i < len(result.Opens); i++ { + assert.True(t, result.Opens[i-1].Path <= result.Opens[i].Path, + "opens should be sorted, got %q before %q", result.Opens[i-1].Path, result.Opens[i].Path) + } +} + +// TestDeflateApplicationProfileContainer_PreSaveEndToEnd verifies the full +// PreSave flow with an ApplicationProfile containing many opens that should collapse. +func TestDeflateApplicationProfileContainer_PreSaveEndToEnd(t *testing.T) { + numOpens := openThreshold() + 1 + opens := generateSOOpens(numOpens) + + profile := &softwarecomposition.ApplicationProfile{ + ObjectMeta: v1.ObjectMeta{ + Annotations: map[string]string{}, + }, + Spec: softwarecomposition.ApplicationProfileSpec{ + Containers: []softwarecomposition.ApplicationProfileContainer{ + { + Name: "main", + Opens: opens, + }, + }, + }, + } + + processor := NewApplicationProfileProcessor(config.Config{ + DefaultNamespace: "kubescape", + MaxApplicationProfileSize: 100000, + }) + + err := processor.PreSave(context.TODO(), profile) + assert.NoError(t, err) + + resultOpens := profile.Spec.Containers[0].Opens + assert.Less(t, len(resultOpens), numOpens, + "PreSave should collapse %d .so files, got %d opens", numOpens, len(resultOpens)) + + // The collapsed path should contain dynamic or wildcard segments + hasCollapsed := false + for _, open := range resultOpens { + if strings.Contains(open.Path, "\u22ef") || strings.Contains(open.Path, "*") { + hasCollapsed = true + break + } + } + assert.True(t, hasCollapsed, "at least one path should contain a dynamic/wildcard segment after PreSave") +} diff --git a/pkg/registry/file/cleanup.go b/pkg/registry/file/cleanup.go index 7a98286b7..1dc714bc2 100644 --- a/pkg/registry/file/cleanup.go +++ b/pkg/registry/file/cleanup.go @@ -185,6 +185,11 @@ func (h *ResourcesCleanupHandler) cleanupNamespace(ctx context.Context, ns strin return nil } + // Skip user-managed resources (e.g., user-defined profiles) + if metadata.Labels[helpersv1.ManagedByMetadataKey] == helpersv1.ManagedByUserValue { + return nil + } + // either run single handler, or perform OR operation on multiple handlers var toDelete bool if len(handlers) == 1 { diff --git a/pkg/registry/file/containerprofile_processor.go b/pkg/registry/file/containerprofile_processor.go index 19d7accb9..b61a4faa2 100644 --- a/pkg/registry/file/containerprofile_processor.go +++ b/pkg/registry/file/containerprofile_processor.go @@ -707,12 +707,12 @@ func (a *ContainerProfileProcessor) getAggregatedData(ctx context.Context, key s } func DeflateContainerProfileSpec(container softwarecomposition.ContainerProfileSpec, sbomSet mapset.Set[string]) softwarecomposition.ContainerProfileSpec { - opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzer(OpenDynamicThreshold), sbomSet) + opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs), sbomSet) if err != nil { logger.L().Debug("ContainerProfileProcessor.deflateContainerProfileSpec - falling back to DeflateStringer for opens", loggerhelpers.Error(err)) opens = DeflateStringer(container.Opens) } - endpoints := dynamicpathdetector.AnalyzeEndpoints(&container.Endpoints, dynamicpathdetector.NewPathAnalyzer(EndpointDynamicThreshold)) + endpoints := dynamicpathdetector.AnalyzeEndpoints(&container.Endpoints, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil)) identifiedCallStacks := callstack.UnifyIdentifiedCallStacks(container.IdentifiedCallStacks) return softwarecomposition.ContainerProfileSpec{ diff --git a/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go b/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go index 46fbe11bd..d620e3083 100644 --- a/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go +++ b/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go @@ -10,23 +10,51 @@ import ( types "github.com/kubescape/storage/pkg/apis/softwarecomposition" ) +func isWildcardPort(port string) bool { + return port == "0" +} + +func rewritePort(endpoint, wildcardPort string) string { + if wildcardPort == "" { + return endpoint + } + port, pathPart := splitEndpointPortAndPath(endpoint) + if !isWildcardPort(port) { + return ":" + wildcardPort + pathPart + } + return endpoint +} + func AnalyzeEndpoints(endpoints *[]types.HTTPEndpoint, analyzer *PathAnalyzer) []types.HTTPEndpoint { if len(*endpoints) == 0 { return nil } - var newEndpoints []*types.HTTPEndpoint + // Detect wildcard port in input (port 0 means any port) + wildcardPort := "" + for _, ep := range *endpoints { + port, _ := splitEndpointPortAndPath(ep.Endpoint) + if isWildcardPort(port) { + wildcardPort = port + break + } + } + + // First pass: build tree, redirecting to wildcard port if needed for _, endpoint := range *endpoints { - _, _ = AnalyzeURL(endpoint.Endpoint, analyzer) + _, _ = AnalyzeURL(rewritePort(endpoint.Endpoint, wildcardPort), analyzer) } + // Second pass: process endpoints + var newEndpoints []*types.HTTPEndpoint for _, endpoint := range *endpoints { - processedEndpoint, err := ProcessEndpoint(&endpoint, analyzer, newEndpoints) + ep := endpoint + ep.Endpoint = rewritePort(ep.Endpoint, wildcardPort) + processedEndpoint, err := ProcessEndpoint(&ep, analyzer, newEndpoints) if processedEndpoint == nil && err == nil || err != nil { continue - } else { - newEndpoints = append(newEndpoints, processedEndpoint) } + newEndpoints = append(newEndpoints, processedEndpoint) } newEndpoints = MergeDuplicateEndpoints(newEndpoints) @@ -88,6 +116,15 @@ func AnalyzeURL(urlString string, analyzer *PathAnalyzer) (string, error) { return ":" + port + path, nil } +func splitEndpointPortAndPath(endpoint string) (string, string) { + s := strings.TrimPrefix(endpoint, ":") + idx := strings.Index(s, "/") + if idx == -1 { + return s, "/" + } + return s[:idx], s[idx:] +} + func MergeDuplicateEndpoints(endpoints []*types.HTTPEndpoint) []*types.HTTPEndpoint { seen := make(map[string]*types.HTTPEndpoint) var newEndpoints []*types.HTTPEndpoint @@ -97,10 +134,22 @@ func MergeDuplicateEndpoints(endpoints []*types.HTTPEndpoint) []*types.HTTPEndpo if existing, found := seen[key]; found { existing.Methods = MergeStrings(existing.Methods, endpoint.Methods) mergeHeaders(existing, endpoint) - } else { - seen[key] = endpoint - newEndpoints = append(newEndpoints, endpoint) + continue } + + // Check if a wildcard port variant already exists (port 0 means any port) + port, pathPart := splitEndpointPortAndPath(endpoint.Endpoint) + if !isWildcardPort(port) { + wildcardKey := fmt.Sprintf(":%s%s|%s", "0", pathPart, endpoint.Direction) + if existing, found := seen[wildcardKey]; found { + existing.Methods = MergeStrings(existing.Methods, endpoint.Methods) + mergeHeaders(existing, endpoint) + continue + } + } + + seen[key] = endpoint + newEndpoints = append(newEndpoints, endpoint) } return newEndpoints @@ -111,7 +160,6 @@ func getEndpointKey(endpoint *types.HTTPEndpoint) string { } func mergeHeaders(existing, new *types.HTTPEndpoint) { - // TODO: Find a better way to unmashal the headers existingHeaders, err := existing.GetHeaders() if err != nil { return diff --git a/pkg/registry/file/dynamicpathdetector/analyze_opens.go b/pkg/registry/file/dynamicpathdetector/analyze_opens.go index 554325e31..8750adff5 100644 --- a/pkg/registry/file/dynamicpathdetector/analyze_opens.go +++ b/pkg/registry/file/dynamicpathdetector/analyze_opens.go @@ -1,7 +1,6 @@ package dynamicpathdetector import ( - "errors" "maps" "slices" "strings" @@ -15,22 +14,14 @@ func AnalyzeOpens(opens []types.OpenCalls, analyzer *PathAnalyzer, sbomSet mapse return nil, nil } - if sbomSet == nil { - return nil, errors.New("sbomSet is nil") - } - + // First pass: build trie from all paths dynamicOpens := make(map[string]types.OpenCalls) for _, open := range opens { _, _ = AnalyzeOpen(open.Path, analyzer) } + // Second pass: read collapsed paths and merge for i := range opens { - // sbomSet files have to be always present in the dynamicOpens - if sbomSet.ContainsOne(opens[i].Path) { - dynamicOpens[opens[i].Path] = opens[i] - continue - } - result, err := AnalyzeOpen(opens[i].Path, analyzer) if err != nil { continue diff --git a/pkg/registry/file/dynamicpathdetector/analyzer.go b/pkg/registry/file/dynamicpathdetector/analyzer.go index 1f17d80af..be95e5bbb 100644 --- a/pkg/registry/file/dynamicpathdetector/analyzer.go +++ b/pkg/registry/file/dynamicpathdetector/analyzer.go @@ -1,166 +1,341 @@ package dynamicpathdetector import ( - "path" "strings" ) -func NewPathAnalyzer(threshold int) *PathAnalyzer { - return &PathAnalyzer{ - RootNodes: make(map[string]*SegmentNode), - threshold: threshold, - } +func NewPathAnalyzerWithConfigs(defaultThreshold int, configs []CollapseConfig) *PathAnalyzer { + return newAnalyzer(CollapseConfig{Prefix: "/", Threshold: defaultThreshold}, configs, true) } -func (ua *PathAnalyzer) AnalyzePath(p, identifier string) (string, error) { - p = path.Clean(p) - node, exists := ua.RootNodes[identifier] - if !exists { - node = &SegmentNode{ - SegmentName: identifier, - Count: 0, - Children: make(map[string]*SegmentNode), - } - ua.RootNodes[identifier] = node +func newAnalyzer(defaultCfg CollapseConfig, configs []CollapseConfig, collapseAdjacent bool) *PathAnalyzer { + matcher := &PathAnalyzer{ + root: NewTrieNode(), + identRoots: make(map[string]*TrieNode), + configs: make([]CollapseConfig, len(configs)), + defaultCfg: defaultCfg, + collapseAdjacent: collapseAdjacent, } - return ua.processSegments(node, p), nil + copy(matcher.configs, configs) + applyConfigsToNode(matcher.root, &matcher.defaultCfg, matcher.configs) + return matcher } -func (ua *PathAnalyzer) processSegments(node *SegmentNode, p string) string { - var result strings.Builder - currentNode := node - i := 0 - for { - start := i - for i < len(p) && p[i] != '/' { - i++ - } - segment := p[start:i] - currentNode = ua.processSegment(currentNode, segment) - ua.updateNodeStats(currentNode) - result.WriteString(currentNode.SegmentName) - i++ - if len(p) < i { - break - } - result.WriteByte('/') +func applyConfigsToNode(node *TrieNode, defaultCfg *CollapseConfig, configs []CollapseConfig) { + addConfigToNode(node, defaultCfg) + for i := range configs { + addConfigToNode(node, &configs[i]) } - return result.String() } -func (ua *PathAnalyzer) processSegment(node *SegmentNode, segment string) *SegmentNode { - if segment == DynamicIdentifier { - return ua.handleDynamicSegment(node) - } else if node.IsNextDynamic() { - if len(node.Children) > 1 { - temp := node.Children[DynamicIdentifier] - node.Children = map[string]*SegmentNode{} - node.Children[DynamicIdentifier] = temp +func addConfigToNode(root *TrieNode, config *CollapseConfig) { + node := root + segments := strings.Split(strings.Trim(config.Prefix, "/"), "/") + if segments[0] == "" { + node.Config = config + return + } + for _, segment := range segments { + if _, ok := node.Children[segment]; !ok { + node.Children[segment] = NewTrieNode() } - return node.Children[DynamicIdentifier] - } else if child, exists := node.Children[segment]; exists { - return child - } else { - return ua.handleNewSegment(node, segment) + node = node.Children[segment] } + node.Config = config } -func (ua *PathAnalyzer) handleNewSegment(node *SegmentNode, segment string) *SegmentNode { - node.Count++ - newNode := &SegmentNode{ - SegmentName: segment, - Count: 0, - Children: make(map[string]*SegmentNode), +func (pm *PathAnalyzer) getRoot(identifier string) *TrieNode { + if root, ok := pm.identRoots[identifier]; ok { + return root } - node.Children[segment] = newNode - return newNode + newRoot := NewTrieNode() + pm.identRoots[identifier] = newRoot + return newRoot } -func (ua *PathAnalyzer) handleDynamicSegment(node *SegmentNode) *SegmentNode { - if dynamicChild, exists := node.Children[DynamicIdentifier]; exists { - return dynamicChild - } else { - return ua.createDynamicNode(node) +// splitPath splits a path into non-empty segments. +func splitPath(path string) []string { + parts := strings.Split(strings.Trim(path, "/"), "/") + var result []string + for _, p := range parts { + if p != "" { + result = append(result, p) + } } + return result } -func (ua *PathAnalyzer) createDynamicNode(node *SegmentNode) *SegmentNode { - dynamicNode := &SegmentNode{ - SegmentName: DynamicIdentifier, - Count: 0, - Children: make(map[string]*SegmentNode), - } +func (pm *PathAnalyzer) AddPath(path string) { + pm.addPathToRoot(pm.root, path) +} - // Copy all existing children to the new dynamic node - for _, child := range node.Children { - shallowChildrenCopy(child, dynamicNode) +func (pm *PathAnalyzer) addPathToRoot(root *TrieNode, path string) { + parent := root + + segments := splitPath(path) + if len(segments) == 0 { + return } - // Replace all children with the new dynamic node - node.Children = map[string]*SegmentNode{ - DynamicIdentifier: dynamicNode, + // Use pm.root as config trie for per-prefix threshold lookup. + // Config advances AFTER navigation so threshold applies at the correct level. + configNode := pm.root + currentConfig := &pm.defaultCfg + if configNode != nil && configNode.Config != nil { + currentConfig = configNode.Config } - return dynamicNode -} + for _, segment := range segments { + // If a wildcard exists, it consumes the rest of the path. + if wildcardNode, ok := parent.Children[WildcardIdentifier]; ok { + wildcardNode.Count++ + return + } + + // If a dynamic node exists, absorb this segment and continue. + if dynamicNode, ok := parent.Children[DynamicIdentifier]; ok { + parent = dynamicNode + parent.Count++ + // Advance config after navigation + if configNode != nil { + if next, ok := configNode.Children[segment]; ok { + configNode = next + if configNode.Config != nil { + currentConfig = configNode.Config + } + } + } + continue + } -func (ua *PathAnalyzer) updateNodeStats(node *SegmentNode) { - if node.Count > ua.threshold && !node.IsNextDynamic() { - dynamicChild := &SegmentNode{ - SegmentName: DynamicIdentifier, - Count: 0, - Children: make(map[string]*SegmentNode), + // Handle DynamicIdentifier segment from input: merge siblings into new ⋯ node + if segment == DynamicIdentifier { + if _, exists := parent.Children[DynamicIdentifier]; !exists { + dynamicNode := NewTrieNode() + for _, child := range parent.Children { + dynamicNode.Count += child.Count + shallowChildrenCopy(child, dynamicNode) + } + parent.Children = map[string]*TrieNode{DynamicIdentifier: dynamicNode} + } + parent = parent.Children[DynamicIdentifier] + parent.Count++ + // Advance config after navigation + if configNode != nil { + if next, ok := configNode.Children[segment]; ok { + configNode = next + if configNode.Config != nil { + currentConfig = configNode.Config + } + } + } + continue } - // Copy all descendants - for _, child := range node.Children { - shallowChildrenCopy(child, dynamicChild) + // Add new node if it doesn't exist + child, exists := parent.Children[segment] + if !exists { + child = NewTrieNode() + parent.Children[segment] = child } + child.Count++ - node.Children = map[string]*SegmentNode{ - DynamicIdentifier: dynamicChild, + // Special case: threshold of 1 immediately creates a wildcard (only with collapseAdjacent) + if pm.collapseAdjacent && currentConfig != nil && currentConfig.Threshold == 1 && parent.Children[WildcardIdentifier] == nil { + pm.createWildcardNode(parent) + parent.Children[WildcardIdentifier].Count++ + return + } + + // Standard collapse: if unique children > threshold, collapse to dynamic node + if currentConfig != nil && len(parent.Children) > currentConfig.Threshold && parent.Children[DynamicIdentifier] == nil { + pm.createDynamicNode(parent) + } + + // After a potential collapse, find the correct child to traverse to next. + if nextNode, ok := parent.Children[DynamicIdentifier]; ok { + parent = nextNode + } else if nextNode, ok := parent.Children[segment]; ok { + parent = nextNode + } else if _, ok := parent.Children[WildcardIdentifier]; ok { + return + } else { + return + } + + // Advance config AFTER navigation so threshold applies at the correct level + if configNode != nil { + if next, ok := configNode.Children[segment]; ok { + configNode = next + if configNode.Config != nil { + currentConfig = configNode.Config + } + } } } } -func shallowChildrenCopy(src, dst *SegmentNode) { - for segmentName := range src.Children { - if _, ok := dst.Children[segmentName]; !ok { - dst.Children[segmentName] = src.Children[segmentName] +func shallowChildrenCopy(src, dst *TrieNode) { + for key, srcChild := range src.Children { + if dstChild, ok := dst.Children[key]; !ok { + dst.Children[key] = srcChild } else { - dst.Children[segmentName].Count += src.Children[segmentName].Count - shallowChildrenCopy(src.Children[segmentName], dst.Children[segmentName]) + dstChild.Count += srcChild.Count + shallowChildrenCopy(srcChild, dstChild) } } } -func CompareDynamic(dynamicPath, regularPath string) bool { - dynamicIndex, regularIndex := 0, 0 - dynamicLen, regularLen := len(dynamicPath), len(regularPath) +func (pm *PathAnalyzer) createDynamicNode(node *TrieNode) { + dynamicNode := NewTrieNode() + for _, child := range node.Children { + dynamicNode.Count += child.Count + shallowChildrenCopy(child, dynamicNode) + } + node.Children = map[string]*TrieNode{DynamicIdentifier: dynamicNode} +} + +func (pm *PathAnalyzer) createWildcardNode(node *TrieNode) { + wildcardNode := NewTrieNode() + for _, child := range node.Children { + wildcardNode.Count += child.Count + } + node.Children = map[string]*TrieNode{WildcardIdentifier: wildcardNode} +} - for dynamicIndex < dynamicLen && regularIndex < regularLen { - // Find the next segment in dynamicPath - dynamicSegmentStart := dynamicIndex - for dynamicIndex < dynamicLen && dynamicPath[dynamicIndex] != '/' { - dynamicIndex++ +func (pm *PathAnalyzer) FindConfigForPath(path string) *CollapseConfig { + node := pm.root + var lastFoundConfig *CollapseConfig + if node.Config != nil { + lastFoundConfig = node.Config + } + segments := splitPath(path) + for _, segment := range segments { + if nextNode, ok := node.Children[segment]; ok { + node = nextNode + if node.Config != nil { + lastFoundConfig = node.Config + } + } else { + break } - dynamicSegment := dynamicPath[dynamicSegmentStart:dynamicIndex] + } + return lastFoundConfig +} + +func (pm *PathAnalyzer) GetStoredPaths() []string { + var storedPaths []string + pm.collectPaths(pm.root, "", &storedPaths) + return storedPaths +} - // Find the next segment in regularPath - regularSegmentStart := regularIndex - for regularIndex < regularLen && regularPath[regularIndex] != '/' { - regularIndex++ +func (pm *PathAnalyzer) collectPaths(node *TrieNode, currentPath string, paths *[]string) { + if len(node.Children) == 0 { + if currentPath != "" { + *paths = append(*paths, currentPath) } - regularSegment := regularPath[regularSegmentStart:regularIndex] + return + } + for segment, child := range node.Children { + newPath := currentPath + "/" + segment + pm.collectPaths(child, newPath, paths) + } +} + +func (pm *PathAnalyzer) AnalyzePath(path string, identifier string) (string, error) { + cleanPath := strings.Trim(path, "/") + if cleanPath == "" { + return "/", nil + } + + root := pm.getRoot(identifier) + + segments := splitPath(cleanPath) + if len(segments) == 0 { + return "/", nil + } + + // Read the tree state BEFORE adding the new path. + // This ensures the current path doesn't see its own collapse. + node := root + var pathSegments []string - if dynamicSegment != DynamicIdentifier && dynamicSegment != regularSegment { - return false + for _, segment := range segments { + if nextNode, ok := node.Children[WildcardIdentifier]; ok { + node = nextNode + pathSegments = append(pathSegments, WildcardIdentifier) + break + } + if nextNode, ok := node.Children[DynamicIdentifier]; ok { + node = nextNode + pathSegments = append(pathSegments, DynamicIdentifier) + } else if nextNode, ok := node.Children[segment]; ok { + node = nextNode + pathSegments = append(pathSegments, segment) + } else { + pathSegments = append(pathSegments, segment) } + } - // Move to the next segment - dynamicIndex++ - regularIndex++ + // Now add the path to the tree (for future calls). + pm.addPathToRoot(root, cleanPath) + + finalPath := "/" + strings.Join(pathSegments, "/") + if pm.collapseAdjacent { + return CollapseAdjacentDynamicIdentifiers(finalPath), nil } + return finalPath, nil +} - return dynamicIndex > dynamicLen && regularIndex > regularLen +// CollapseAdjacentDynamicIdentifiers replaces sequences of truly adjacent dynamic identifiers with a wildcard. +// Only consecutive ⋯/⋯ segments are collapsed to *. Static segments between ⋯ prevent collapsing. +func CollapseAdjacentDynamicIdentifiers(p string) string { + segments := strings.Split(p, "/") + var result []string + i := 0 + for i < len(segments) { + if segments[i] == DynamicIdentifier && i+1 < len(segments) && segments[i+1] == DynamicIdentifier { + // Replace sequence of adjacent ⋯ with * + result = append(result, WildcardIdentifier) + for i < len(segments) && segments[i] == DynamicIdentifier { + i++ + } + continue + } + result = append(result, segments[i]) + i++ + } + return strings.Join(result, "/") +} + +func CompareDynamic(dynamicPath, regularPath string) bool { + dynamicSegments := strings.Split(dynamicPath, "/") + regularSegments := strings.Split(regularPath, "/") + return compareSegments(dynamicSegments, regularSegments) +} + +func compareSegments(dynamic, regular []string) bool { + if len(dynamic) == 0 { + return len(regular) == 0 + } + if dynamic[0] == WildcardIdentifier { + if len(dynamic) == 1 { + return true + } + nextDynamic := dynamic[1] + for i := range regular { + match := nextDynamic == DynamicIdentifier || (i < len(regular) && regular[i] == nextDynamic) + if match && compareSegments(dynamic[1:], regular[i:]) { + return true + } + } + return false + } + if len(regular) == 0 { + return false + } + if dynamic[0] == DynamicIdentifier || dynamic[0] == regular[0] { + return compareSegments(dynamic[1:], regular[1:]) + } + return false } diff --git a/pkg/registry/file/dynamicpathdetector/tests/analyze_endpoints_test.go b/pkg/registry/file/dynamicpathdetector/tests/analyze_endpoints_test.go index ab6565af8..93172a1aa 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/analyze_endpoints_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/analyze_endpoints_test.go @@ -12,7 +12,7 @@ import ( ) func TestAnalyzeEndpoints(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil) tests := []struct { name string @@ -72,6 +72,29 @@ func TestAnalyzeEndpoints(t *testing.T) { }, }, }, + { + name: "Test with 0 port", + input: []types.HTTPEndpoint{ + { + Endpoint: ":0/users/123/posts/\u22ef", + Methods: []string{"GET"}, + }, + { + Endpoint: ":80/users/\u22ef/posts/101", + Methods: []string{"POST"}, + }, + { + Endpoint: ":8770/users/blub/posts/101", + Methods: []string{"POST"}, + }, + }, + expected: []types.HTTPEndpoint{ + { + Endpoint: ":0/users/\u22ef/posts/\u22ef", + Methods: []string{"GET", "POST"}, + }, + }, + }, { name: "Test with different domains", input: []types.HTTPEndpoint{ @@ -145,10 +168,11 @@ func TestAnalyzeEndpoints(t *testing.T) { } func TestAnalyzeEndpointsWithThreshold(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + threshold := dynamicpathdetector.EndpointDynamicThreshold + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) var input []types.HTTPEndpoint - for i := 0; i < 101; i++ { + for i := 0; i < threshold+1; i++ { input = append(input, types.HTTPEndpoint{ Endpoint: fmt.Sprintf(":80/users/%d", i), Methods: []string{"GET"}, @@ -167,10 +191,11 @@ func TestAnalyzeEndpointsWithThreshold(t *testing.T) { } func TestAnalyzeEndpointsWithExactThreshold(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + threshold := dynamicpathdetector.EndpointDynamicThreshold + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) var input []types.HTTPEndpoint - for i := 0; i < 100; i++ { + for i := 0; i < threshold; i++ { input = append(input, types.HTTPEndpoint{ Endpoint: fmt.Sprintf(":80/users/%d", i), Methods: []string{"GET"}, @@ -179,18 +204,17 @@ func TestAnalyzeEndpointsWithExactThreshold(t *testing.T) { result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) - // Check that all 100 endpoints are still individual - assert.Equal(t, 100, len(result)) + // At exact threshold: all endpoints should remain individual + assert.Equal(t, threshold, len(result)) // Now add one more endpoint to trigger the dynamic behavior input = append(input, types.HTTPEndpoint{ - Endpoint: ":80/users/100", + Endpoint: fmt.Sprintf(":80/users/%d", threshold), Methods: []string{"GET"}, }) result = dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) - // Check that all endpoints are now merged into one dynamic endpoint expected := []types.HTTPEndpoint{ { Endpoint: ":80/users/\u22ef", @@ -201,7 +225,7 @@ func TestAnalyzeEndpointsWithExactThreshold(t *testing.T) { } func TestAnalyzeEndpointsWithInvalidURL(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil) input := []types.HTTPEndpoint{ { @@ -213,3 +237,98 @@ func TestAnalyzeEndpointsWithInvalidURL(t *testing.T) { result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) assert.Equal(t, 0, len(result)) } + +func TestAnalyzeEndpointsWildcardPortAbsorbsSpecificPort(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil) + + input := []types.HTTPEndpoint{ + { + Endpoint: ":0/users/123", + Methods: []string{"GET"}, + Direction: "outbound", + }, + { + Endpoint: ":80/users/456", + Methods: []string{"POST"}, + Direction: "outbound", + }, + } + + result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) + + for _, ep := range result { + port := ep.Endpoint[:len(":0")] + assert.Equal(t, ":0", port, "endpoint %s should have wildcard port", ep.Endpoint) + } +} + +func TestAnalyzeEndpointsWildcardPortAfterSpecificPorts(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil) + + input := []types.HTTPEndpoint{ + { + Endpoint: ":80/api/data", + Methods: []string{"GET"}, + Direction: "outbound", + }, + { + Endpoint: ":0/api/info", + Methods: []string{"POST"}, + Direction: "outbound", + }, + } + + result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) + + for _, ep := range result { + port := ep.Endpoint[:len(":0")] + assert.Equal(t, ":0", port, "endpoint %s should have wildcard port", ep.Endpoint) + } +} + +func TestAnalyzeEndpointsMultiplePortsMergeIntoWildcard(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil) + + input := []types.HTTPEndpoint{ + { + Endpoint: ":0/api/data", + Methods: []string{"GET"}, + Direction: "outbound", + }, + { + Endpoint: ":80/api/data", + Methods: []string{"POST"}, + Direction: "outbound", + }, + { + Endpoint: ":81/api/data", + Methods: []string{"PUT"}, + Direction: "outbound", + }, + } + + result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) + + assert.Equal(t, 1, len(result)) + assert.Equal(t, ":0/api/data", result[0].Endpoint) + assert.Equal(t, []string{"GET", "POST", "PUT"}, result[0].Methods) +} + +func TestMergeDuplicateEndpointsWildcardPort(t *testing.T) { + wildcardEP := &types.HTTPEndpoint{ + Endpoint: ":0/api/data", + Methods: []string{"GET"}, + Direction: "outbound", + } + specificEP := &types.HTTPEndpoint{ + Endpoint: ":80/api/data", + Methods: []string{"POST"}, + Direction: "outbound", + } + + result := dynamicpathdetector.MergeDuplicateEndpoints([]*types.HTTPEndpoint{wildcardEP, specificEP}) + + assert.Equal(t, 1, len(result)) + assert.Equal(t, ":0/api/data", result[0].Endpoint) + assert.Equal(t, []string{"GET", "POST"}, result[0].Methods) +} diff --git a/pkg/registry/file/dynamicpathdetector/tests/analyze_opens_test.go b/pkg/registry/file/dynamicpathdetector/tests/analyze_opens_test.go index bc3834e62..3de88ce4a 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/analyze_opens_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/analyze_opens_test.go @@ -2,6 +2,8 @@ package dynamicpathdetectortests import ( "fmt" + "sort" + "strings" "testing" mapset "github.com/deckarep/golang-set/v2" @@ -11,10 +13,11 @@ import ( ) func TestAnalyzeOpensWithThreshold(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + threshold := dynamicpathdetector.OpenDynamicThreshold + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) var input []types.OpenCalls - for i := 0; i < 101; i++ { + for i := 0; i < threshold+1; i++ { input = append(input, types.OpenCalls{ Path: fmt.Sprintf("/home/user%d/file.txt", i), }) @@ -32,49 +35,20 @@ func TestAnalyzeOpensWithThreshold(t *testing.T) { assert.Equal(t, expected, result) } -func TestAnalyzeOpensWithThresholdAndExclusion(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) - - var input []types.OpenCalls - for i := 0; i < 101; i++ { - input = append(input, types.OpenCalls{ - Path: fmt.Sprintf("/home/user%d/file.txt", i), - Flags: []string{"READ"}, - }) - } - - expected := []types.OpenCalls{ - { - Path: "/home/user42/file.txt", - Flags: []string{"READ"}, - }, - { - Path: "/home/\u22ef/file.txt", - Flags: []string{"READ"}, - }, - } - - result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]("/home/user42/file.txt")) - assert.NoError(t, err) - assert.Equal(t, expected, result) -} - func TestAnalyzeOpensWithFlagMergingAndThreshold(t *testing.T) { + // Use /var/run threshold (3) — low enough that hand-written subtests work + threshold := configThreshold("/var/run") + tests := []struct { name string input []types.OpenCalls expected []types.OpenCalls }{ { - name: "Merge flags for paths exceeding threshold", - input: []types.OpenCalls{ - {Path: "/home/user1/file.txt", Flags: []string{"READ"}}, - {Path: "/home/user2/file.txt", Flags: []string{"WRITE"}}, - {Path: "/home/user3/file.txt", Flags: []string{"APPEND"}}, - {Path: "/home/user4/file.txt", Flags: []string{"READ", "WRITE"}}, - }, + name: "Merge flags for paths exceeding threshold", + input: generateOpenCallsWithFlags("/home", "file.txt", threshold+1), expected: []types.OpenCalls{ - {Path: "/home/\u22ef/file.txt", Flags: []string{"APPEND", "READ", "WRITE"}}, + {Path: "/home/\u22ef/file.txt", Flags: flagsForN(threshold + 1)}, }, }, { @@ -90,42 +64,33 @@ func TestAnalyzeOpensWithFlagMergingAndThreshold(t *testing.T) { }, { name: "Partial merging for some paths exceeding threshold", - input: []types.OpenCalls{ - {Path: "/home/user1/common.txt", Flags: []string{"READ"}}, - {Path: "/home/user2/common.txt", Flags: []string{"WRITE"}}, - {Path: "/home/user3/common.txt", Flags: []string{"APPEND"}}, - {Path: "/home/user4/common.txt", Flags: []string{"READ", "WRITE"}}, - {Path: "/var/log/app1.log", Flags: []string{"READ"}}, - {Path: "/var/log/app2.log", Flags: []string{"WRITE"}}, - }, + input: append( + generateOpenCallsWithFlags("/home", "common.txt", threshold+1), + types.OpenCalls{Path: "/var/log/app1.log", Flags: []string{"READ"}}, + types.OpenCalls{Path: "/var/log/app2.log", Flags: []string{"WRITE"}}, + ), expected: []types.OpenCalls{ - {Path: "/home/\u22ef/common.txt", Flags: []string{"APPEND", "READ", "WRITE"}}, + {Path: "/home/\u22ef/common.txt", Flags: flagsForN(threshold + 1)}, {Path: "/var/log/app1.log", Flags: []string{"READ"}}, {Path: "/var/log/app2.log", Flags: []string{"WRITE"}}, }, }, { name: "Multiple dynamic segments", - input: []types.OpenCalls{ - {Path: "/home/user1/file1.txt", Flags: []string{"READ"}}, - {Path: "/home/user2/file1.txt", Flags: []string{"WRITE"}}, - {Path: "/home/user3/file1.txt", Flags: []string{"APPEND"}}, - {Path: "/home/user4/file1.txt", Flags: []string{"READ", "WRITE"}}, - {Path: "/home/user1/file2.txt", Flags: []string{"READ"}}, - {Path: "/home/user2/file2.txt", Flags: []string{"WRITE"}}, - {Path: "/home/user3/file2.txt", Flags: []string{"APPEND"}}, - {Path: "/home/user4/file2.txt", Flags: []string{"READ", "WRITE"}}, - }, + input: append( + generateOpenCallsWithFlags("/home", "file1.txt", threshold+1), + generateOpenCallsWithFlags("/home", "file2.txt", threshold+1)..., + ), expected: []types.OpenCalls{ - {Path: "/home/\u22ef/file1.txt", Flags: []string{"APPEND", "READ", "WRITE"}}, - {Path: "/home/\u22ef/file2.txt", Flags: []string{"APPEND", "READ", "WRITE"}}, + {Path: "/home/\u22ef/file1.txt", Flags: flagsForN(threshold + 1)}, + {Path: "/home/\u22ef/file2.txt", Flags: flagsForN(threshold + 1)}, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(3) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) result, err := dynamicpathdetector.AnalyzeOpens(tt.input, analyzer, mapset.NewSet[string]()) assert.NoError(t, err) @@ -139,7 +104,469 @@ func TestAnalyzeOpensWithFlagMergingAndThreshold(t *testing.T) { } } -// Helper function to check if a slice of strings contains only unique elements +func TestAnalyzeOpensWithAsteriskAndEllipsis(t *testing.T) { + threshold := configThreshold("/var/run") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + + // Generate threshold paths + one ⋯ path to trigger collapse + var input []types.OpenCalls + for i := 0; i < threshold; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/home/user%d/file.txt", i), Flags: []string{"READ"}, + }) + } + input = append(input, + types.OpenCalls{Path: "/home/\u22ef/file.txt", Flags: []string{"READ"}}, + types.OpenCalls{Path: fmt.Sprintf("/home/user%d/file.txt", threshold), Flags: []string{"READ"}}, + ) + + expected := []types.OpenCalls{ + {Path: "/home/\u22ef/file.txt", Flags: []string{"READ"}}, + } + + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + + assert.ElementsMatch(t, expected, result) +} + +func TestAnalyzeOpensWithMultiCollapse(t *testing.T) { + // NewPathAnalyzerWithConfigs with nil configs uses a uniform threshold (no per-prefix configs). + threshold := dynamicpathdetector.DefaultCollapseConfig.Threshold + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + + // Only 3 paths under /var/run — uniform threshold is 5, so 3 children <= 5. + // These should NOT collapse. + input := []types.OpenCalls{ + {Path: "/var/run/txt/file.txt", Flags: []string{"READ"}}, + {Path: "/var/run/txt1/file.txt", Flags: []string{"READ"}}, + {Path: "/var/run/txt2/file.txt", Flags: []string{"READ"}}, + } + + expected := []types.OpenCalls{ + {Path: "/var/run/txt/file.txt", Flags: []string{"READ"}}, + {Path: "/var/run/txt1/file.txt", Flags: []string{"READ"}}, + {Path: "/var/run/txt2/file.txt", Flags: []string{"READ"}}, + } + + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + + assert.ElementsMatch(t, expected, result) +} + +func TestAnalyzeOpensWithDynamicConfigs(t *testing.T) { + etcThreshold := configThreshold("/etc") + optThreshold := configThreshold("/opt") + varRunThreshold := configThreshold("/var/run") + appThreshold := configThreshold("/app") + tmpThreshold := 10 // custom for this test + + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, []dynamicpathdetector.CollapseConfig{ + {Prefix: "/etc", Threshold: etcThreshold}, + {Prefix: "/opt", Threshold: optThreshold}, + {Prefix: "/var/run", Threshold: varRunThreshold}, + {Prefix: "/app", Threshold: appThreshold}, + {Prefix: "/tmp", Threshold: tmpThreshold}, + }) + + var pathsToAdd []string + + // /etc paths (high threshold) - should not collapse + for i := 0; i < 8; i++ { + pathsToAdd = append(pathsToAdd, fmt.Sprintf("/etc/config/item%d", i)) + } + pathsToAdd = append(pathsToAdd, + "/etc/hosts", + "/etc/resolv.conf", + "/etc/hostname", + "/etc/systemd/system.conf", + ) + // Total /etc: 12, well below etcThreshold (50) + + // /opt paths — exceed optThreshold to trigger collapse + for i := 0; i < optThreshold+1; i++ { + pathsToAdd = append(pathsToAdd, fmt.Sprintf("/opt/app%d/binary", i)) + } + + // /var/run paths — exceed varRunThreshold to trigger collapse + for i := 0; i < varRunThreshold+1; i++ { + pathsToAdd = append(pathsToAdd, fmt.Sprintf("/var/run/pid%d.pid", i)) + } + + // /app paths — appThreshold is 1, so second child triggers wildcard + pathsToAdd = append(pathsToAdd, + "/app/some/deep/path", + "/app/another/path", + ) + + // /tmp paths — exceed tmpThreshold to trigger collapse + for i := 0; i < tmpThreshold+1; i++ { + pathsToAdd = append(pathsToAdd, fmt.Sprintf("/tmp/user%d/a", i)) + } + + var input []types.OpenCalls + for _, p := range pathsToAdd { + input = append(input, types.OpenCalls{Path: p, Flags: []string{"READ"}}) + } + + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + + // /etc paths (threshold 50) should NOT be collapsed + etcPaths := filterByPrefix(result, "/etc/") + assert.Equal(t, 12, len(etcPaths), "/etc paths should remain individual (below threshold %d)", etcThreshold) + + // /app (threshold 1) - immediately collapses to wildcard + assertContainsPath(t, result, "/app/*") + + // /opt — collapses; both wildcard and dynamic-with-subtree are acceptable + assertContainsOneOfPaths(t, result, "/opt/*", "/opt/\u22ef/binary") + + // /tmp — collapses + assertContainsOneOfPaths(t, result, "/tmp/*", "/tmp/\u22ef/a") + + // /var/run — collapses + assertContainsOneOfPaths(t, result, "/var/run/*", "/var/run/\u22ef") + + // Total: 12 etc + 1 app + 1 opt + 1 tmp + 1 var/run = 16 + assert.Equal(t, 16, len(result), "expected 16 total paths, got %d: %v", len(result), pathsFromResult(result)) +} + +// TestAnalyzeOpensCollapseExactBoundary verifies that threshold is strictly "greater than", +// not "greater than or equal". With threshold N, exactly N children should NOT collapse, +// but N+1 children SHOULD. +func TestAnalyzeOpensCollapseExactBoundary(t *testing.T) { + threshold := dynamicpathdetector.DefaultCollapseConfig.Threshold + + t.Run("at threshold - no collapse", func(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + var input []types.OpenCalls + for i := 0; i < threshold; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/data/item%d/info", i), + Flags: []string{"READ"}, + }) + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, threshold, len(result), "at exact threshold, paths should NOT collapse") + for _, r := range result { + assert.NotContains(t, r.Path, "\u22ef", "no dynamic segment expected") + assert.NotContains(t, r.Path, "*", "no wildcard expected") + } + }) + + t.Run("above threshold - collapse", func(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + var input []types.OpenCalls + for i := 0; i < threshold+1; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/data/item%d/info", i), + Flags: []string{"READ"}, + }) + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result), "above threshold, paths should collapse to 1") + assert.Equal(t, "/data/\u22ef/info", result[0].Path, "single \u22ef should not collapse to *") + }) +} + +// TestAnalyzeOpensDuplicatePathsNoCollapse verifies that repeating the same path +// many times does NOT trigger a collapse - only unique segment names count. +func TestAnalyzeOpensDuplicatePathsNoCollapse(t *testing.T) { + threshold := configThreshold("/var/run") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + var input []types.OpenCalls + // Repeat the same path many times — should NOT trigger collapse + for i := 0; i < threshold*10; i++ { + input = append(input, types.OpenCalls{ + Path: "/data/same-child/file.txt", + Flags: []string{"READ"}, + }) + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result)) + assert.Equal(t, "/data/same-child/file.txt", result[0].Path, "duplicate paths should not trigger collapse") +} + +// TestAnalyzeOpensVaryingDepthsUnderPrefix verifies collapse behavior when paths +// under the same prefix have different depths. +func TestAnalyzeOpensVaryingDepthsUnderPrefix(t *testing.T) { + threshold := configThreshold("/var/run") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + + // Generate threshold+1 unique children under /data to trigger collapse + var input []types.OpenCalls + for i := 0; i < threshold+1; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/data/%c/deep/file", 'a'+rune(i)), + Flags: []string{"READ"}, + }) + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + for _, r := range result { + assert.True(t, + strings.Contains(r.Path, "\u22ef") || strings.Contains(r.Path, "*"), + "path %q should contain a dynamic or wildcard segment after collapse", r.Path) + } +} + +// TestAnalyzeOpensNewPathAfterCollapse verifies that a new path arriving after +// the threshold was already crossed gets absorbed by the collapsed node. +func TestAnalyzeOpensNewPathAfterCollapse(t *testing.T) { + threshold := configThreshold("/var/run") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + + // First batch: trigger collapse with threshold+1 children + var batch1 []types.OpenCalls + for i := 0; i < threshold+1; i++ { + batch1 = append(batch1, types.OpenCalls{ + Path: fmt.Sprintf("/srv/%c/log", 'a'+rune(i)), Flags: []string{"READ"}, + }) + } + result1, err := dynamicpathdetector.AnalyzeOpens(batch1, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result1), "first batch should collapse to 1 path") + + // Second batch: add a completely new child — it should be absorbed + batch2 := append(batch1, types.OpenCalls{ + Path: "/srv/new-service/log", Flags: []string{"WRITE"}, + }) + result2, err := dynamicpathdetector.AnalyzeOpens(batch2, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result2), "new path after collapse should be absorbed") + assert.Contains(t, result2[0].Flags, "WRITE", "flags from new path should be merged") +} + +// TestAnalyzeOpensDefaultThresholdForUnconfiguredPrefix verifies that paths under +// a prefix without a specific config use the default threshold. +func TestAnalyzeOpensDefaultThresholdForUnconfiguredPrefix(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, []dynamicpathdetector.CollapseConfig{ + {Prefix: "/configured", Threshold: 2}, + }) + + // /configured has threshold 2: 3 children should collapse + configuredInput := []types.OpenCalls{ + {Path: "/configured/a/file", Flags: []string{"READ"}}, + {Path: "/configured/b/file", Flags: []string{"READ"}}, + {Path: "/configured/c/file", Flags: []string{"READ"}}, + } + result, err := dynamicpathdetector.AnalyzeOpens(configuredInput, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result), "/configured should collapse with threshold 2") + + // /unconfigured uses default threshold: 3 children should NOT collapse + defaultThreshold := dynamicpathdetector.DefaultCollapseConfig.Threshold + analyzer2 := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, []dynamicpathdetector.CollapseConfig{ + {Prefix: "/configured", Threshold: 2}, + }) + unconfiguredInput := []types.OpenCalls{ + {Path: "/unconfigured/a/file", Flags: []string{"READ"}}, + {Path: "/unconfigured/b/file", Flags: []string{"READ"}}, + {Path: "/unconfigured/c/file", Flags: []string{"READ"}}, + } + result2, err := dynamicpathdetector.AnalyzeOpens(unconfiguredInput, analyzer2, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 3, len(result2), + "/unconfigured should NOT collapse with default threshold %d", defaultThreshold) +} + +// TestAnalyzeOpensThreshold1ImmediateWildcard verifies that threshold 1 produces +// a wildcard (*) on the very first additional child. +func TestAnalyzeOpensThreshold1ImmediateWildcard(t *testing.T) { + appThreshold := configThreshold("/app") // threshold 1 + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, []dynamicpathdetector.CollapseConfig{ + {Prefix: "/instant", Threshold: appThreshold}, + }) + + t.Run("single path - no collapse yet", func(t *testing.T) { + input := []types.OpenCalls{ + {Path: "/instant/only-child/data", Flags: []string{"READ"}}, + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result)) + assert.Equal(t, "/instant/*", result[0].Path, "threshold 1 should wildcard immediately") + }) + + t.Run("two paths - collapsed", func(t *testing.T) { + analyzer2 := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, []dynamicpathdetector.CollapseConfig{ + {Prefix: "/instant", Threshold: appThreshold}, + }) + input := []types.OpenCalls{ + {Path: "/instant/first/data", Flags: []string{"READ"}}, + {Path: "/instant/second/data", Flags: []string{"WRITE"}}, + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer2, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result)) + assert.Equal(t, "/instant/*", result[0].Path) + assert.ElementsMatch(t, []string{"READ", "WRITE"}, result[0].Flags) + }) +} + +// TestAnalyzeOpensCollapseDoesNotAffectSiblingPrefixes verifies that collapsing +// one prefix does not affect paths under a sibling prefix. +func TestAnalyzeOpensCollapseDoesNotAffectSiblingPrefixes(t *testing.T) { + threshold := configThreshold("/var/run") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + + // /alpha: threshold+1 children → should collapse + var input []types.OpenCalls + for i := 0; i < threshold+1; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/alpha/a%d/file", i), Flags: []string{"READ"}, + }) + } + // /beta: 2 children → should NOT collapse (2 <= threshold) + input = append(input, + types.OpenCalls{Path: "/beta/b1/file", Flags: []string{"WRITE"}}, + types.OpenCalls{Path: "/beta/b2/file", Flags: []string{"WRITE"}}, + ) + + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + + betaPaths := filterByPrefix(result, "/beta/") + assert.Equal(t, 2, len(betaPaths), "/beta paths should remain individual") + + alphaPaths := filterByPrefix(result, "/alpha/") + assert.Equal(t, 1, len(alphaPaths), "/alpha paths should collapse to 1") +} + +// TestAnalyzeOpensFlagMergingAfterCollapse verifies that flags from all paths +// that collapse into the same dynamic node are properly merged and deduplicated. +func TestAnalyzeOpensFlagMergingAfterCollapse(t *testing.T) { + threshold := configThreshold("/var/run") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + + // Generate threshold+1 children to trigger collapse, with varied flags + var input []types.OpenCalls + flags := [][]string{{"READ", "WRITE"}, {"WRITE", "APPEND"}, {"READ"}, {"APPEND", "READ"}} + for i := 0; i < threshold+1; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/logs/service%d/app.log", i), + Flags: flags[i%len(flags)], + }) + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result)) + assert.ElementsMatch(t, []string{"APPEND", "READ", "WRITE"}, result[0].Flags, "flags should be merged and deduplicated") + assert.True(t, areStringSlicesUnique(result[0].Flags), "flags must be unique") +} + +// TestAnalyzeOpensMultipleLevelsOfCollapse verifies behavior when both parent and +// grandchild segments independently exceed their thresholds. +func TestAnalyzeOpensMultipleLevelsOfCollapse(t *testing.T) { + threshold := configThreshold("/var/run") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + + var input []types.OpenCalls + // threshold+1 unique children under /multi, each with threshold+1 unique grandchildren + for i := 0; i < threshold+1; i++ { + for j := 0; j < threshold+1; j++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/multi/level%d/sub%d/file", i, j), + Flags: []string{"READ"}, + }) + } + } + + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result), "double collapse should yield a single path") + assert.True(t, + strings.Contains(result[0].Path, "\u22ef") || strings.Contains(result[0].Path, "*"), + "result %q should contain dynamic or wildcard segments", result[0].Path) +} + +// TestAnalyzeOpensExistingDynamicSegmentInInput verifies that input paths +// already containing ⋯ are handled correctly and merge with new paths. +func TestAnalyzeOpensExistingDynamicSegmentInInput(t *testing.T) { + // Use a high threshold so that the two paths alone don't trigger collapse — + // instead, the existing ⋯ segment absorbs the specific path. + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, nil) + input := []types.OpenCalls{ + {Path: "/data/\u22ef/config", Flags: []string{"READ"}}, + {Path: "/data/specific/config", Flags: []string{"WRITE"}}, + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result)) + assert.Equal(t, "/data/\u22ef/config", result[0].Path) + assert.ElementsMatch(t, []string{"READ", "WRITE"}, result[0].Flags) +} + +// TestAnalyzeOpens_NilSbomSetNoError verifies that passing a nil sbomSet +// does not return an error. +func TestAnalyzeOpens_NilSbomSetNoError(t *testing.T) { + threshold := configThreshold("/var/run") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + input := []types.OpenCalls{ + {Path: "/usr/lib/libfoo.so", Flags: []string{"READ"}}, + {Path: "/usr/lib/libbar.so", Flags: []string{"READ"}}, + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, nil) + assert.NoError(t, err, "nil sbomSet should not cause an error") + assert.Equal(t, 2, len(result), "paths below threshold should remain individual") +} + +// TestAnalyzeOpens_NilSbomSetWithCollapse verifies that collapse works +// correctly even when sbomSet is nil. +func TestAnalyzeOpens_NilSbomSetWithCollapse(t *testing.T) { + threshold := configThreshold("/var/run") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + + var input []types.OpenCalls + for i := 0; i < threshold+1; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/usr/lib/lib%c.so", 'a'+rune(i)), + Flags: []string{"READ"}, + }) + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, nil) + assert.NoError(t, err) + assert.Equal(t, 1, len(result), "%d children > threshold %d, should collapse", threshold+1, threshold) + assert.True(t, + strings.Contains(result[0].Path, "\u22ef") || strings.Contains(result[0].Path, "*"), + "collapsed path should contain dynamic or wildcard segment, got %q", result[0].Path) +} + +// --- Helpers --- + +// generateOpenCallsWithFlags creates N OpenCalls under prefix/userN/filename with rotating flags. +func generateOpenCallsWithFlags(prefix, filename string, n int) []types.OpenCalls { + allFlags := []string{"READ", "WRITE", "APPEND"} + var result []types.OpenCalls + for i := 0; i < n; i++ { + result = append(result, types.OpenCalls{ + Path: fmt.Sprintf("%s/user%d/%s", prefix, i, filename), + Flags: []string{allFlags[i%len(allFlags)]}, + }) + } + return result +} + +// flagsForN returns the sorted, unique flags that generateOpenCallsWithFlags would produce for N items. +func flagsForN(n int) []string { + allFlags := []string{"READ", "WRITE", "APPEND"} + seen := map[string]bool{} + for i := 0; i < n; i++ { + seen[allFlags[i%len(allFlags)]] = true + } + var result []string + for f := range seen { + result = append(result, f) + } + sort.Strings(result) + return result +} + func areStringSlicesUnique(slice []string) bool { seen := make(map[string]struct{}) for _, s := range slice { @@ -150,3 +577,231 @@ func areStringSlicesUnique(slice []string) bool { } return true } + +func assertContainsPath(t *testing.T, result []types.OpenCalls, path string) { + t.Helper() + for _, r := range result { + if r.Path == path { + return + } + } + assert.Fail(t, fmt.Sprintf("result does not contain path %q, got: %v", path, pathsFromResult(result))) +} + +func assertContainsOneOfPaths(t *testing.T, result []types.OpenCalls, alternatives ...string) { + t.Helper() + for _, r := range result { + for _, alt := range alternatives { + if r.Path == alt { + return + } + } + } + assert.Fail(t, fmt.Sprintf("result does not contain any of %v, got: %v", alternatives, pathsFromResult(result))) +} + +func assertPathIsOneOf(t *testing.T, actual string, alternatives ...string) { + t.Helper() + for _, alt := range alternatives { + if actual == alt { + return + } + } + assert.Fail(t, fmt.Sprintf("path %q does not match any of %v", actual, alternatives)) +} + +func filterByPrefix(result []types.OpenCalls, prefix string) []types.OpenCalls { + var filtered []types.OpenCalls + for _, r := range result { + if strings.HasPrefix(r.Path, prefix) { + filtered = append(filtered, r) + } + } + return filtered +} + +func pathsFromResult(result []types.OpenCalls) []string { + paths := make([]string, len(result)) + for i, r := range result { + paths[i] = r.Path + } + return paths +} + +// TestAnalyzeOpensOverlappingPrefixConfigs verifies that overlapping prefix configs +// (e.g., /etc at 100 and /etc/apache2 at 5) work correctly: the most specific prefix wins. +func TestAnalyzeOpensOverlappingPrefixConfigs(t *testing.T) { + t.Run("/etc/apache2 uses threshold 5, not /etc's threshold 100", func(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs) + // 6 paths under /etc/apache2/mods-enabled/ — should collapse (6 > 5) + var input []types.OpenCalls + for i := 0; i < 6; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/etc/apache2/mods-enabled/mod%d.conf", i), + Flags: []string{"READ"}, + }) + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result), "6 paths > threshold 5 should collapse to 1, got: %v", pathsFromResult(result)) + assert.True(t, + strings.Contains(result[0].Path, "\u22ef") || strings.Contains(result[0].Path, "*"), + "collapsed path should contain dynamic segment, got %q", result[0].Path) + }) + + t.Run("/etc uses threshold 100, unaffected by /etc/apache2", func(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs) + // 8 paths directly under /etc/ — should NOT collapse (8 < 100) + input := []types.OpenCalls{ + {Path: "/etc/config1", Flags: []string{"READ"}}, + {Path: "/etc/config2", Flags: []string{"READ"}}, + {Path: "/etc/config3", Flags: []string{"READ"}}, + {Path: "/etc/config4", Flags: []string{"READ"}}, + {Path: "/etc/config5", Flags: []string{"READ"}}, + {Path: "/etc/config6", Flags: []string{"READ"}}, + {Path: "/etc/config7", Flags: []string{"READ"}}, + {Path: "/etc/config8", Flags: []string{"READ"}}, + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 8, len(result), "/etc paths should NOT collapse (8 < 100), got: %v", pathsFromResult(result)) + }) + + t.Run("unconfigured prefix /var/log uses default threshold", func(t *testing.T) { + defaultThreshold := dynamicpathdetector.DefaultCollapseConfig.Threshold + // At threshold — should NOT collapse + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs) + var input []types.OpenCalls + for i := 0; i < defaultThreshold; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/var/log/app%d.log", i), + Flags: []string{"READ"}, + }) + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, defaultThreshold, len(result), + "/var/log at exactly default threshold %d should NOT collapse", defaultThreshold) + + // One more — should collapse + analyzer2 := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs) + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/var/log/app%d.log", defaultThreshold), + Flags: []string{"READ"}, + }) + result2, err := dynamicpathdetector.AnalyzeOpens(input, analyzer2, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result2), + "/var/log exceeding default threshold %d should collapse", defaultThreshold) + }) + + t.Run("/var/run uses its own threshold 3, not default", func(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs) + // 4 paths under /var/run/ — should collapse (4 > 3) + input := []types.OpenCalls{ + {Path: "/var/run/pid1.pid", Flags: []string{"READ"}}, + {Path: "/var/run/pid2.pid", Flags: []string{"READ"}}, + {Path: "/var/run/pid3.pid", Flags: []string{"READ"}}, + {Path: "/var/run/pid4.pid", Flags: []string{"READ"}}, + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result), "4 paths > threshold 3 should collapse, got: %v", pathsFromResult(result)) + }) + + t.Run("/app uses threshold 1 (immediate wildcard)", func(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs) + input := []types.OpenCalls{ + {Path: "/app/service1/config", Flags: []string{"READ"}}, + } + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + assert.Equal(t, 1, len(result)) + assert.Equal(t, "/app/*", result[0].Path, "threshold 1 should produce wildcard immediately") + }) + + t.Run("mixed overlapping: /etc and /etc/apache2 coexist correctly", func(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs) + var input []types.OpenCalls + + // 6 paths under /etc/apache2/conf.d/ (should collapse at threshold 5) + for i := 0; i < 6; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/etc/apache2/conf.d/site%d.conf", i), + Flags: []string{"READ"}, + }) + } + + // 8 paths directly under /etc/ (should NOT collapse at threshold 100) + for i := 0; i < 8; i++ { + input = append(input, types.OpenCalls{ + Path: fmt.Sprintf("/etc/setting%d.conf", i), + Flags: []string{"READ"}, + }) + } + + result, err := dynamicpathdetector.AnalyzeOpens(input, analyzer, mapset.NewSet[string]()) + assert.NoError(t, err) + + // /etc/apache2 paths should have collapsed + apache2Paths := filterByPrefix(result, "/etc/apache2/") + assert.Equal(t, 1, len(apache2Paths), + "/etc/apache2 paths (6 > threshold 5) should collapse to 1, got: %v", pathsFromResult(apache2Paths)) + assert.True(t, + strings.Contains(apache2Paths[0].Path, "\u22ef") || strings.Contains(apache2Paths[0].Path, "*"), + "collapsed apache2 path should contain dynamic segment, got %q", apache2Paths[0].Path) + + // /etc direct paths should remain individual + etcDirectPaths := []types.OpenCalls{} + for _, r := range result { + if strings.HasPrefix(r.Path, "/etc/") && !strings.HasPrefix(r.Path, "/etc/apache2/") { + etcDirectPaths = append(etcDirectPaths, r) + } + } + assert.Equal(t, 8, len(etcDirectPaths), + "/etc direct paths (8 < threshold 100) should remain individual, got: %v", pathsFromResult(etcDirectPaths)) + }) +} + +// TestFindConfigForPath verifies the config lookup returns the most specific matching prefix. +func TestFindConfigForPath(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs) + + tests := []struct { + path string + expectedPrefix string + expectedThreshold int + }{ + { + path: "/etc/apache2/mods-enabled/file", + expectedPrefix: "/etc/apache2", + expectedThreshold: 5, + }, + { + path: "/etc/hosts", + expectedPrefix: "/etc", + expectedThreshold: 100, + }, + { + path: "/var/run/pid1.pid", + expectedPrefix: "/var/run", + expectedThreshold: 3, + }, + { + path: "/var/log/app.log", + expectedPrefix: "/", + expectedThreshold: dynamicpathdetector.DefaultCollapseConfig.Threshold, + }, + } + + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + config := analyzer.FindConfigForPath(tt.path) + assert.NotNil(t, config, "config should not be nil for path %q", tt.path) + assert.Equal(t, tt.expectedPrefix, config.Prefix, + "path %q should match prefix %q", tt.path, tt.expectedPrefix) + assert.Equal(t, tt.expectedThreshold, config.Threshold, + "path %q should have threshold %d", tt.path, tt.expectedThreshold) + }) + } +} diff --git a/pkg/registry/file/dynamicpathdetector/tests/benchmark_test.go b/pkg/registry/file/dynamicpathdetector/tests/benchmark_test.go index 4ca01af42..09dbdf56a 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/benchmark_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/benchmark_test.go @@ -13,7 +13,7 @@ import ( ) func BenchmarkAnalyzePath(b *testing.B) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, nil) paths := generateMixedPaths(10000, 0) // 0 means use default mixed lengths identifier := "test" @@ -33,7 +33,7 @@ func BenchmarkAnalyzePathWithDifferentLengths(b *testing.B) { for _, length := range pathLengths { b.Run(fmt.Sprintf("PathLength-%d", length), func(b *testing.B) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, nil) paths := generateMixedPaths(10000, length) identifier := "test" @@ -52,7 +52,7 @@ func BenchmarkAnalyzePathWithDifferentLengths(b *testing.B) { func BenchmarkAnalyzeOpensVsDeflateStringer(b *testing.B) { paths := pathsToOpens(generateMixedPaths(10000, 0)) - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, nil) b.Run("AnalyzeOpens", func(b *testing.B) { b.ResetTimer() @@ -72,14 +72,14 @@ func BenchmarkAnalyzeOpensVsDeflateStringer(b *testing.B) { }) } -func BenchmarkCompareDynamic(b *testing.B) { - dynamicPath := "/api/\u22ef/\u22ef" - regularPath := "/api/users/123" - for i := 0; i < b.N; i++ { - _ = dynamicpathdetector.CompareDynamic(dynamicPath, regularPath) - } - b.ReportAllocs() -} +// func BenchmarkCompareDynamic(b *testing.B) { +// dynamicPath := "/api/\u22ef/\u22ef" +// regularPath := "/api/users/123" +// for i := 0; i < b.N; i++ { +// _ = dynamicpathdetector.CompareDynamic(dynamicPath, regularPath) +// } +// b.ReportAllocs() +// } func generateMixedPaths(count int, fixedLength int) []string { paths := make([]string, count) diff --git a/pkg/registry/file/dynamicpathdetector/tests/coverage_test.go b/pkg/registry/file/dynamicpathdetector/tests/coverage_test.go index 8f05b9606..0f2a2c5d6 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/coverage_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/coverage_test.go @@ -8,15 +8,26 @@ import ( "github.com/stretchr/testify/assert" ) -func TestNewPathAnalyzer(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) +// configThreshold returns the collapse threshold for the given path prefix +// from DefaultCollapseConfigs. Falls back to DefaultCollapseConfig.Threshold. +func configThreshold(prefix string) int { + for _, cfg := range dynamicpathdetector.DefaultCollapseConfigs { + if cfg.Prefix == prefix { + return cfg.Threshold + } + } + return dynamicpathdetector.DefaultCollapseConfig.Threshold +} + +func TestNewPathAnalyzerWithConfigs(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, nil) if analyzer == nil { - t.Error("NewPathAnalyzer() returned nil") + t.Error("NewPathAnalyzerWithConfigs() returned nil") } } func TestAnalyzePath(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, nil) testCases := []struct { name string @@ -39,17 +50,46 @@ func TestAnalyzePath(t *testing.T) { } } +func TestCollapseAdjacentDynamicIdentifiers(t *testing.T) { + testCases := []struct { + name string + path string + expected string + }{ + {"No dynamic identifiers", "/a/b/c", "/a/b/c"}, + {"Single dynamic identifier", "/a/\u22ef/c", "/a/\u22ef/c"}, + {"Two adjacent dynamic identifiers", "/a/\u22ef/\u22ef/d", "/a/*/d"}, + {"Three adjacent dynamic identifiers", "/a/\u22ef/\u22ef/\u22ef/e", "/a/*/e"}, + {"Dynamic identifiers separated by static segment", "/\u22ef/b/\u22ef/d", "/\u22ef/b/\u22ef/d"}, + {"Multiple groups of adjacent identifiers", "/\u22ef/\u22ef/c/\u22ef/\u22ef/f", "/*/c/*/f"}, + {"Starts with adjacent identifiers", "/\u22ef/\u22ef/c", "/*/c"}, + {"Ends with adjacent identifiers", "/a/\u22ef/\u22ef", "/a/*"}, + {"Only adjacent identifiers", "/\u22ef/\u22ef", "/*"}, + {"Path with leading slash", "/\u22ef/\u22ef", "/*"}, + {"Empty path", "", ""}, + {"Single segment path", "a", "a"}, + {"Single dynamic segment path", "\u22ef", "\u22ef"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := dynamicpathdetector.CollapseAdjacentDynamicIdentifiers(tc.path) + assert.Equal(t, tc.expected, result, "Path was not collapsed as expected. Got %s, want %s", result, tc.expected) + }) + } +} + func TestDynamicSegments(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + threshold := dynamicpathdetector.OpenDynamicThreshold + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) - // Create 99 different paths under the 'users' segment - for i := 0; i < 101; i++ { + for i := 0; i < threshold+1; i++ { path := fmt.Sprintf("/api/users/%d", i) _, err := analyzer.AnalyzePath(path, "api") assert.NoError(t, err) } - result, err := analyzer.AnalyzePath("/api/users/101", "api") + result, err := analyzer.AnalyzePath(fmt.Sprintf("/api/users/%d", threshold+1), "api") if err != nil { t.Errorf("AnalyzePath() returned an error: %v", err) } @@ -57,16 +97,16 @@ func TestDynamicSegments(t *testing.T) { assert.Equal(t, expected, result) // Test with one of the original IDs to ensure it's also marked as dynamic - result, err = analyzer.AnalyzePath("/api/users/50", "api") + result, err = analyzer.AnalyzePath("/api/users/0", "api") assert.NoError(t, err) assert.Equal(t, expected, result) } func TestMultipleDynamicSegments(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + threshold := dynamicpathdetector.OpenDynamicThreshold + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) - // Create 99 different paths for both 'users' and 'posts' segments - for i := 0; i < 110; i++ { + for i := 0; i < threshold+10; i++ { path := fmt.Sprintf("/api/users/%d/posts/%d", i, i) _, err := analyzer.AnalyzePath(path, "api") if err != nil { @@ -74,18 +114,17 @@ func TestMultipleDynamicSegments(t *testing.T) { } } - // Test with the 100th unique user and post IDs (should trigger dynamic segments) - result, err := analyzer.AnalyzePath("/api/users/101/posts/1031", "api") + result, err := analyzer.AnalyzePath(fmt.Sprintf("/api/users/%d/posts/%d", threshold+11, threshold+11), "api") assert.NoError(t, err) expected := "/api/users/\u22ef/posts/\u22ef" assert.Equal(t, expected, result) } func TestMixedStaticAndDynamicSegments(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + threshold := dynamicpathdetector.OpenDynamicThreshold + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) - // Create 99 different paths for 'users' but keep 'posts' static - for i := 0; i < 101; i++ { + for i := 0; i < threshold+1; i++ { path := fmt.Sprintf("/api/users/%d/posts", i) _, err := analyzer.AnalyzePath(path, "api") if err != nil { @@ -93,42 +132,40 @@ func TestMixedStaticAndDynamicSegments(t *testing.T) { } } - // Test with the 100th unique user ID but same 'posts' segment (should trigger dynamic segment for users) - result, err := analyzer.AnalyzePath("/api/users/99/posts", "api") + result, err := analyzer.AnalyzePath("/api/users/0/posts", "api") assert.NoError(t, err) expected := "/api/users/\u22ef/posts" assert.Equal(t, expected, result) } func TestDifferentRootIdentifiers(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, nil) - // Analyze paths with different root identifiers result1, _ := analyzer.AnalyzePath("/api/users/123", "api") result2, _ := analyzer.AnalyzePath("/api/products/456", "store") assert.Equal(t, "/api/users/123", result1) - assert.Equal(t, "/api/products/456", result2) } func TestDynamicThreshold(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + threshold := dynamicpathdetector.OpenDynamicThreshold + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) - for i := 0; i < 101; i++ { + for i := 0; i < threshold+1; i++ { path := fmt.Sprintf("/api/users/%d", i) result, _ := analyzer.AnalyzePath(path, "api") if result != fmt.Sprintf("/api/users/%d", i) { - t.Errorf("Path became dynamic before reaching 99 different paths") + t.Errorf("Path became dynamic before reaching %d different paths", threshold) } } - result, _ := analyzer.AnalyzePath("/api/users/991", "api") + result, _ := analyzer.AnalyzePath(fmt.Sprintf("/api/users/%d", threshold+2), "api") assert.Equal(t, "/api/users/\u22ef", result) } func TestEdgeCases(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, nil) testCases := []struct { name string @@ -151,100 +188,52 @@ func TestEdgeCases(t *testing.T) { } func TestDynamicInsertion(t *testing.T) { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, nil) - // Insert a new path with a different identifier result, err := analyzer.AnalyzePath("/api/users/\u22ef", "api") assert.NoError(t, err) expected := "/api/users/\u22ef" assert.Equal(t, expected, result) - // Insert a new path with the same identifier result, err = analyzer.AnalyzePath("/api/users/102", "api") assert.NoError(t, err) expected = "/api/users/\u22ef" assert.Equal(t, expected, result) } -func TestCompareDynamic(t *testing.T) { - tests := []struct { - name string - dynamicPath string - regularPath string - want bool - }{ - { - name: "Equal paths", - dynamicPath: "/api/users/123", - regularPath: "/api/users/123", - want: true, - }, - { - name: "Different paths", - dynamicPath: "/api/users/123", - regularPath: "/api/users/456", - want: false, - }, - { - name: "Dynamic segment at the end", - dynamicPath: "/api/users/\u22ef", - regularPath: "/api/users/123", - want: true, - }, - { - name: "Dynamic segment at the end", - dynamicPath: "/api/users/\u22ef", - regularPath: "/api/users/123/posts", - want: false, - }, - { - name: "Dynamic segment at the end, no match", - dynamicPath: "/api/users/\u22ef", - regularPath: "/api/apps/123", - want: false, - }, - { - name: "Dynamic segment in the middle", - dynamicPath: "/api/\u22ef/123", - regularPath: "/api/users/123", - want: true, - }, - { - name: "Dynamic segment in the middle, no match", - dynamicPath: "/api/\u22ef/123", - regularPath: "/api/users/456", - want: false, - }, - { - name: "2 dynamic segments", - dynamicPath: "/api/\u22ef/\u22ef", - regularPath: "/api/users/123", - want: true, - }, - { - name: "2 dynamic segments, no match", - dynamicPath: "/api/\u22ef/\u22ef", - regularPath: "/papi/users/456", - want: false, - }, +func TestDynamic(t *testing.T) { + threshold := dynamicpathdetector.OpenDynamicThreshold + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(threshold, nil) + for i := 0; i < threshold+1; i++ { + path := fmt.Sprintf("/api/users/%d", i) + _, err := analyzer.AnalyzePath(path, "api") + assert.NoError(t, err) + } + result, err := analyzer.AnalyzePath(fmt.Sprintf("/api/users/%d", threshold+1), "api") + assert.NoError(t, err) + expected := "/api/users/\u22ef" + assert.Equal(t, expected, result) +} + +func TestCollapseConfig(t *testing.T) { + appThreshold := configThreshold("/app") + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, []dynamicpathdetector.CollapseConfig{ { - name: "2 other dynamic segments", - dynamicPath: "/\u22ef/users/\u22ef", - regularPath: "/api/users/123", - want: true, + Prefix: "/api", + Threshold: appThreshold, }, { - name: "2 other dynamic segments, no match", - dynamicPath: "/\u22ef/users/\u22ef", - regularPath: "/api/apps/456", - want: false, + Prefix: "/169.254.169.254", + Threshold: configThreshold("/etc"), }, + }) + for i := 0; i < appThreshold+1; i++ { + path := fmt.Sprintf("/api/users/%d", i) + _, err := analyzer.AnalyzePath(path, "api") + assert.NoError(t, err) } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := dynamicpathdetector.CompareDynamic(tt.dynamicPath, tt.regularPath); got != tt.want { - t.Errorf("CompareDynamic() = %v, want %v", got, tt.want) - } - }) - } + result, err := analyzer.AnalyzePath(fmt.Sprintf("/api/users/%d", appThreshold+1), "api") + assert.NoError(t, err) + expected := "/api/*" + assert.Equal(t, expected, result) } diff --git a/pkg/registry/file/dynamicpathdetector/types.go b/pkg/registry/file/dynamicpathdetector/types.go index 1bd2d52ff..0d187ad9f 100644 --- a/pkg/registry/file/dynamicpathdetector/types.go +++ b/pkg/registry/file/dynamicpathdetector/types.go @@ -1,16 +1,68 @@ package dynamicpathdetector -const DynamicIdentifier string = "\u22ef" +// --- Identifier constants --- +// DynamicIdentifier matches exactly one path segment (like a single-segment wildcard). +// WildcardIdentifier matches zero or more path segments (like a glob **). +const ( + DynamicIdentifier = "\u22ef" // U+22EF: ⋯ + WildcardIdentifier = "*" +) + +// --- Collapse configuration --- + +// CollapseConfig controls the threshold at which children of a trie node +// (under the given path Prefix) are collapsed into a dynamic or wildcard node. +type CollapseConfig struct { + Prefix string + Threshold int +} + +// DefaultCollapseConfigs defines per-prefix thresholds for path collapsing. +// Paths under these prefixes are collapsed when the number of unique children +// exceeds the threshold. +var DefaultCollapseConfigs = []CollapseConfig{ + {Prefix: "/etc", Threshold: 100}, + {Prefix: "/etc/apache2", Threshold: 5}, //this is mostly for our webapp standard test + {Prefix: "/opt", Threshold: 5}, + {Prefix: "/var/run", Threshold: 3}, + {Prefix: "/app", Threshold: 1}, +} + +const OpenDynamicThreshold = 50 +const EndpointDynamicThreshold = 100 + +var DefaultCollapseConfig = CollapseConfig{ + Prefix: "/", + Threshold: OpenDynamicThreshold, +} + +// --- Types --- type SegmentNode struct { SegmentName string Count int Children map[string]*SegmentNode + Config *CollapseConfig } type PathAnalyzer struct { - RootNodes map[string]*SegmentNode - threshold int + root *TrieNode + identRoots map[string]*TrieNode + configs []CollapseConfig + defaultCfg CollapseConfig + collapseAdjacent bool +} + +func NewTrieNode() *TrieNode { + return &TrieNode{ + Children: make(map[string]*TrieNode), + } +} + +type TrieNode struct { + Children map[string]*TrieNode + Config *CollapseConfig + Count int } func (sn *SegmentNode) IsNextDynamic() bool {