Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 238 additions & 0 deletions grype/vex/csaf/csaf.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
"slices"

"github.com/gocsaf/csaf/v3/csaf"

"github.com/anchore/grype/grype/pkg"
"github.com/anchore/grype/grype/version"
"github.com/anchore/packageurl-go"
)

// advisoryMatch captures the criteria that caused a vulnerability to match a CSAF advisory
Expand Down Expand Up @@ -127,3 +131,237 @@
}
return purls
}

// synthesisCandidate describes a (vulnerability, package) pair that should be
// added to grype's results based on a CSAF advisory, when no DB-backed match
// already exists.
type synthesisCandidate struct {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like we now have a synthesisCandidate which is converted to an advisoryMatch which is converted to a match.Match... could we avoid the middlemen on these and just directly create Vulnerability, IgnoreRule/IgnoreFilter, and Match objects or similar? We could move the IgnoreFilter indexing to some shared location

Vulnerability *csaf.Vulnerability
Status status
ProductID csaf.ProductID
Package *pkg.Package
}

// indexedPackage is a package whose purl has been parsed once and whose
// ecosystem version format has been resolved, so synthesis does not re-parse
// the same package on every statement comparison.
type indexedPackage struct {
pkg *pkg.Package
purl packageurl.PackageURL
format version.Format
}

// purlIdentityKey returns the (type, namespace, name) identity of a purl.
// packageMatchesStatement requires these three to be equal, so a package can
// only ever match a statement that shares this key. Indexing packages by it
// lets synthesis compare each statement against the handful of packages with a
// matching identity instead of the whole catalog.
func purlIdentityKey(p packageurl.PackageURL) string {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably use comparable types instead of concat'd strings as map keys -- these can definitely add up to noticable time with the scale we have sometimes, e.g.:

type purlKey struct {
  typ, namespace, name string
}

(same comment everywhere we are making strings as map keys)

return p.Type + "\x00" + p.Namespace + "\x00" + p.Name
}

// buildPackageIndex parses every package purl once and buckets the packages by
// their (type, namespace, name) identity.
func buildPackageIndex(pkgs []pkg.Package) map[string][]indexedPackage {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like both of these implementations have similar buildPackageIndex functions that operate on the full set of packages. I think we should flip this to instead build indexes for VEX rules. It's hard to say which would be a smaller set (definitely VEX rules with no vex files), but we operate a single-package at a time in the matcher world and already have indexes for IgnoreRules and other IgnoreFilters; I see a lot of similarity to matchers here and I think a future refactoring is likely to introduce per-package streaming, which this would be incompatible with.

index := make(map[string][]indexedPackage)
for i := range pkgs {
if pkgs[i].PURL == "" {
continue
}
parsed, err := packageurl.FromString(pkgs[i].PURL)
if err != nil {
continue
}
key := purlIdentityKey(parsed)
index[key] = append(index[key], indexedPackage{
pkg: &pkgs[i],
purl: parsed,
format: pkg.VersionFormat(pkgs[i]),
})
}
return index
}

// statusProducts pairs a CSAF product-status slice with the synthesis status it
// maps to. Using a fixed slice avoids allocating a map per vulnerability.
type statusProducts struct {
status status
products *csaf.Products
}

// synthesisStatuses returns the affected-like product-status buckets that are
// eligible for synthesis. fixed and known_not_affected are intentionally
// excluded.
func synthesisStatuses(ps *csaf.ProductStatus) []statusProducts {
if ps == nil {
return nil
}
return []statusProducts{
{firstAffected, ps.FirstAffected},
{knownAffected, ps.KnownAffected},
{lastAffected, ps.LastAffected},
{recommended, ps.Recommended},
{underInvestigation, ps.UnderInvestigation},
}
}

// findSynthesisCandidates walks every advisory and yields (vuln, package)
// pairs eligible for synthesis. Range semantics are applied per status:
// - last_affected: pkg.version <= stmt.version (ceiling)
// - first_affected: pkg.version >= stmt.version (floor)
// - known_affected, recommended, under_investigation: exact match
// (or wildcard if the statement purl has no version)
//
// Statuses that are not "affected-like" (fixed, known_not_affected) never
// trigger synthesis.
//
// Packages are pre-parsed and indexed by purl identity so each statement purl
// is matched against only the packages that share its (type, namespace, name)
// rather than the entire catalog. Per-advisory product purls are cached so the
// product tree is walked once per product instead of once per package.
//
//nolint:gocognit
func (advisories advisories) findSynthesisCandidates(pkgs []pkg.Package) []synthesisCandidate {
var out []synthesisCandidate
if len(pkgs) == 0 {
return out
}

index := buildPackageIndex(pkgs)
if len(index) == 0 {
return out
}

for _, adv := range advisories {
if adv == nil || adv.Vulnerabilities == nil {
continue
}

// Cache product purls per advisory so CollectProductIdentificationHelpers
// (which walks the whole product tree) runs once per product ID.
helpersCache := map[csaf.ProductID][]string{}
purlsForProduct := func(productID csaf.ProductID) []string {
if cached, ok := helpersCache[productID]; ok {
return cached
}
purls := purlsFromProductIdentificationHelpers(adv.ProductTree.CollectProductIdentificationHelpers(productID))
helpersCache[productID] = purls
return purls
}

for _, vuln := range adv.Vulnerabilities {
if vuln == nil || vuln.CVE == nil {
continue
}

for _, sp := range synthesisStatuses(vuln.ProductStatus) {
if sp.products == nil {
continue
}
for _, productIDPtr := range *sp.products {
if productIDPtr == nil {
continue
}
productID := *productIDPtr
for _, stmtPURL := range purlsForProduct(productID) {
stmt, err := packageurl.FromString(stmtPURL)
if err != nil {
continue
}
for _, cand := range index[purlIdentityKey(stmt)] {
if !packageMatchesParsed(stmt, cand.purl, cand.format, sp.status) {
continue
}
out = append(out, synthesisCandidate{
Vulnerability: vuln,
Status: sp.status,
ProductID: productID,
Package: cand.pkg,
})
}
}
}
}
}
}

return out
}

// packageMatchesStatement reports whether the given package's purl falls
// within the scope of a VEX statement that names stmtPURL with the given
// CSAF status. Type/namespace/name/qualifiers must always match; the version
// dimension is interpreted according to the status.
func packageMatchesStatement(stmtPURL string, p *pkg.Package, st status) bool {

Check failure on line 295 in grype/vex/csaf/csaf.go

View workflow job for this annotation

GitHub Actions / Static analysis

func packageMatchesStatement is unused (unused)
stmt, err := packageurl.FromString(stmtPURL)
if err != nil {
return false
}
pkgPURL, err := packageurl.FromString(p.PURL)
if err != nil {
return false
}
return packageMatchesParsed(stmt, pkgPURL, pkg.VersionFormat(*p), st)
}

// packageMatchesParsed is packageMatchesStatement operating on already-parsed
// purls and a resolved version format, so the hot synthesis loop does not
// re-parse purls it has already seen.
func packageMatchesParsed(stmt, pkgPURL packageurl.PackageURL, format version.Format, st status) bool {
if stmt.Type != pkgPURL.Type || stmt.Namespace != pkgPURL.Namespace || stmt.Name != pkgPURL.Name {
return false
}
if !qualifierSubset(stmt.Qualifiers, pkgPURL.Qualifiers) {
return false
}

// No version in the statement -> wildcard, matches any pkg version.
if stmt.Version == "" {
return true
}
if pkgPURL.Version == "" {
// Statement is version-specific but the package's purl has none.
return false
}

switch st {
case lastAffected:
return compareVersions(pkgPURL.Version, stmt.Version, format, version.LTE)
case firstAffected:
return compareVersions(pkgPURL.Version, stmt.Version, format, version.GTE)
default:
// knownAffected, recommended, underInvestigation: exact match.
return stmt.Version == pkgPURL.Version
}
}

func compareVersions(pkgVersion, stmtVersion string, format version.Format, op version.Operator) bool {
pkgV := version.New(pkgVersion, format)
stmtV := version.New(stmtVersion, format)
ok, err := pkgV.Is(op, stmtV)
if err != nil {
return false
}
return ok
}

func qualifierSubset(stmtQ, pkgQ packageurl.Qualifiers) bool {
pkgMap := pkgQ.Map()
for _, sq := range stmtQ {
if v, ok := pkgMap[sq.Key]; !ok || v != sq.Value {
return false
}
}
return true
}

// toAdvisoryMatch returns the advisoryMatch shape expected by the rest of the
// CSAF code (so a synthesis candidate plugs into matchingRule, statement(),
// etc.).
func (c synthesisCandidate) toAdvisoryMatch() *advisoryMatch {
return &advisoryMatch{
Vulnerability: c.Vulnerability,
Status: c.Status,
ProductID: c.ProductID,
}
}
86 changes: 84 additions & 2 deletions grype/vex/csaf/implementation.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/anchore/grype/grype/match"
"github.com/anchore/grype/grype/pkg"
vexStatus "github.com/anchore/grype/grype/vex/status"
"github.com/anchore/grype/grype/vulnerability"
)

// searchedBy captures the parameters used to search through the VEX data
Expand Down Expand Up @@ -121,9 +122,13 @@ func (*Processor) FilterMatches(

// AugmentMatches adds results to the match.Matches array when matching data
// about an affected VEX product is found on loaded VEX documents. Matches
// are moved from the ignore list back to active matches.
// are moved from the ignore list back to active matches, or synthesized from
// the package catalog when the vulnerability database has no record of the
// affected (vulnerability, package) pair. last_affected and first_affected
// statuses are interpreted as version range bounds; other affected-like
// statuses use exact version match.
func (*Processor) AugmentMatches(
docRaw any, ignoreRules []match.IgnoreRule, _ *pkg.Context, matches *match.Matches, ignoredMatches []match.IgnoredMatch,
docRaw any, ignoreRules []match.IgnoreRule, _ *pkg.Context, pkgs []pkg.Package, matches *match.Matches, ignoredMatches []match.IgnoredMatch,
) (*match.Matches, []match.IgnoredMatch, error) {
advisories, ok := docRaw.(advisories)
if !ok {
Expand Down Expand Up @@ -152,9 +157,86 @@ func (*Processor) AugmentMatches(
remainingIgnoredMatches = append(remainingIgnoredMatches, m)
}

synthesizeFromCatalog(advisories, ignoreRules, pkgs, matches, remainingIgnoredMatches)

return matches, remainingIgnoredMatches, nil
}

// synthesizeFromCatalog walks the package catalog and creates new matches for
// any (vulnerability, package) pair named as affected (or under_investigation)
// in the loaded CSAF advisories that is not already represented in the
// remaining or ignored match sets.
func synthesizeFromCatalog(
advs advisories,
ignoreRules []match.IgnoreRule,
pkgs []pkg.Package,
remainingMatches *match.Matches,
ignoredMatches []match.IgnoredMatch,
) {
candidates := advs.findSynthesisCandidates(pkgs)
if len(candidates) == 0 {
return
}

known := existingVulnPackageKeys(remainingMatches, ignoredMatches)

for _, c := range candidates {
advMatch := c.toAdvisoryMatch()
vulnID := advMatch.cve()
if vulnID == "" {
continue
}
key := vulnPackageKey(vulnID, c.Package.PURL)
if _, seen := known[key]; seen {
continue
}

synthesized := match.Match{
Vulnerability: vulnerability.Vulnerability{
Reference: vulnerability.Reference{
ID: vulnID,
Namespace: "vex",
},
},
Package: *c.Package,
}
if rule := matchingRule(ignoreRules, synthesized, advMatch, vexStatus.AugmentList()); rule == nil {
continue
}

synthesized.Details = []match.Detail{
{
Type: match.ExactDirectMatch,
SearchedBy: &searchedBy{
Vulnerability: vulnID,
Purl: c.Package.PURL,
},
Found: advMatch,
Matcher: match.CsafVexMatcher,
Confidence: 1,
},
}

remainingMatches.Add(synthesized)
known[key] = struct{}{}
}
}

func existingVulnPackageKeys(remainingMatches *match.Matches, ignoredMatches []match.IgnoredMatch) map[string]struct{} {
known := map[string]struct{}{}
for m := range remainingMatches.Enumerate() {
known[vulnPackageKey(m.Vulnerability.ID, m.Package.PURL)] = struct{}{}
}
for _, m := range ignoredMatches {
known[vulnPackageKey(m.Vulnerability.ID, m.Package.PURL)] = struct{}{}
}
return known
}

func vulnPackageKey(vulnID, purl string) string {
return vulnID + "\x00" + purl
}

// matchingRule cycles through a set of ignore rules and returns the first
// one that matches the statement and the match. Returns nil if none match.
func matchingRule(ignoreRules []match.IgnoreRule, m match.Match, advMatch *advisoryMatch, allowedStatuses []vexStatus.Status) *match.IgnoreRule {
Expand Down
Loading
Loading