From ffead616539e3a7ca05c51f3ceeb37949a453df1 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 27 May 2026 01:05:32 +0000 Subject: [PATCH 1/6] Differentiate between versions extracted from the CPE range and the CPE string. --- vulnfeeds/conversion/common.go | 19 ++++- .../nvd/__snapshots__/converter_test.snap | 84 +++++++++---------- vulnfeeds/conversion/versions.go | 7 +- vulnfeeds/models/metrics.go | 2 + 4 files changed, 67 insertions(+), 45 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index ed73618d886..4e41169a6d9 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -672,7 +672,24 @@ func ProcessRanges(ranges []models.RangeWithMetadata, repos []string, metrics *m } } - metrics.VersionSources = append(metrics.VersionSources, source) + // Dynamically record the precise sources from each processed range's metadata. + // This ensures that granular version sources (such as CPE-RANGE or CPE-STRING) are tracked in + // the final conversion metrics instead of a single generic fallback source. + for _, rng := range ranges { + if rng.Metadata.Source == "" { + continue + } + found := false + for _, s := range metrics.VersionSources { + if s == rng.Metadata.Source { + found = true + break + } + } + if !found { + metrics.VersionSources = append(metrics.VersionSources, rng.Metadata.Source) + } + } return r, un, sR } diff --git a/vulnfeeds/conversion/nvd/__snapshots__/converter_test.snap b/vulnfeeds/conversion/nvd/__snapshots__/converter_test.snap index 2587dd4d1b7..88462702cb5 100755 --- a/vulnfeeds/conversion/nvd/__snapshots__/converter_test.snap +++ b/vulnfeeds/conversion/nvd/__snapshots__/converter_test.snap @@ -1767,7 +1767,7 @@ } ], "source": [ - "CPE_FIELD", + "CPE_RANGE", "REFERENCES" ] }, @@ -1853,7 +1853,7 @@ } ], "source": [ - "CPE_FIELD", + "CPE_RANGE", "REFERENCES" ] }, @@ -1997,7 +1997,7 @@ } ], "source": [ - "CPE_FIELD", + "CPE_RANGE", "REFERENCES" ] }, @@ -2066,7 +2066,7 @@ "fixed": "7.61.1" } ], - "source": "CPE_FIELD" + "source": "CPE_RANGE" }, "events": [ { @@ -2105,7 +2105,7 @@ "last_affected": "18.04" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "canonical:ubuntu_linux" }, { @@ -2117,7 +2117,7 @@ "last_affected": "9.0" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "debian:debian_linux" }, { @@ -2145,7 +2145,7 @@ "last_affected": "7.6" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "redhat:enterprise_linux" } ] @@ -2230,7 +2230,7 @@ "last_affected": "38" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "fedoraproject:fedora" }, { @@ -2254,7 +2254,7 @@ "last_affected": "12.1" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "redhat:directory_server" } ] @@ -2388,7 +2388,7 @@ } ], "source": [ - "CPE_FIELD", + "CPE_STRING", "REFERENCES" ] }, @@ -2424,7 +2424,7 @@ "last_affected": "36" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "fedoraproject:fedora" } ] @@ -2810,7 +2810,7 @@ "last_affected": "2.8.4" } ], - "source": "CPE_FIELD" + "source": "CPE_STRING" }, "events": [ { @@ -3077,7 +3077,7 @@ "last_affected": "12.04" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "canonical:ubuntu_linux" }, { @@ -3089,7 +3089,7 @@ "last_affected": "42.1" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "opensuse:leap" } ] @@ -3243,7 +3243,7 @@ "fixed": "0.9.2" } ], - "source": "CPE_FIELD" + "source": "CPE_RANGE" }, "events": [ { @@ -3270,7 +3270,7 @@ "last_affected": "40" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "fedoraproject:fedora" }, { @@ -3286,7 +3286,7 @@ "last_affected": "8.0" } ], - "source": "CPE_FIELD", + "source": "CPE_STRING", "vendor_product": "redhat:enterprise_linux" } ] @@ -3329,26 +3329,6 @@ ], "database_specific": { "unresolved_ranges": [ - { - "cpes": [ - "cpe:2.3:o:fedoraproject:fedora:38:*:*:*:*:*:*:*", - "cpe:2.3:o:fedoraproject:fedora:39:*:*:*:*:*:*:*", - "cpe:2.3:o:fedoraproject:fedora:40:*:*:*:*:*:*:*" - ], - "extracted_events": [ - { - "last_affected": "38" - }, - { - "last_affected": "39" - }, - { - "last_affected": "40" - } - ], - "source": "CPE_FIELD", - "vendor_product": "fedoraproject:fedora" - }, { "cpes": [ "cpe:2.3:a:filezilla-project:filezilla_client:*:*:*:*:*:*:*:*" @@ -3358,7 +3338,7 @@ "fixed": "3.67.0" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "filezilla-project:filezilla_client" }, { @@ -3373,7 +3353,7 @@ "fixed": "0.81" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "putty:putty" }, { @@ -3385,7 +3365,7 @@ "fixed": "1.14.6" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "tigris:tortoisesvn" }, { @@ -3397,7 +3377,7 @@ "fixed": "2.15.0.1" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "tortoisegit:tortoisegit" }, { @@ -3409,9 +3389,29 @@ "fixed": "6.3.3" } ], - "source": "CPE_FIELD", + "source": "CPE_RANGE", "vendor_product": "winscp:winscp" }, + { + "cpes": [ + "cpe:2.3:o:fedoraproject:fedora:38:*:*:*:*:*:*:*", + "cpe:2.3:o:fedoraproject:fedora:39:*:*:*:*:*:*:*", + "cpe:2.3:o:fedoraproject:fedora:40:*:*:*:*:*:*:*" + ], + "extracted_events": [ + { + "last_affected": "38" + }, + { + "last_affected": "39" + }, + { + "last_affected": "40" + } + ], + "source": "CPE_STRING", + "vendor_product": "fedoraproject:fedora" + }, { "extracted_events": [ { diff --git a/vulnfeeds/conversion/versions.go b/vulnfeeds/conversion/versions.go index d827ea91b17..992d355b3d9 100644 --- a/vulnfeeds/conversion/versions.go +++ b/vulnfeeds/conversion/versions.go @@ -729,6 +729,8 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, vpRepoCa introduced := "" fixed := "" lastaffected := "" + source := models.VersionSourceCPERange + if match.VersionStartIncluding != nil { introduced = cleanVersion(*match.VersionStartIncluding) } else if match.VersionStartExcluding != nil { @@ -771,6 +773,7 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, vpRepoCa if CPE.Update != "ANY" { lastaffected += "-" + CPE.Update } + source = models.VersionSourceCPEString } if introduced == "" { @@ -809,7 +812,7 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, vpRepoCa Range: vr, Metadata: models.Metadata{ CPE: match.Criteria, - Source: models.VersionSourceCPE, + Source: source, }, }, ) @@ -821,7 +824,7 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, vpRepoCa Range: vr, Metadata: models.Metadata{ CPE: match.Criteria, - Source: models.VersionSourceCPE, + Source: source, }, }, ) diff --git a/vulnfeeds/models/metrics.go b/vulnfeeds/models/metrics.go index f0f56c8f9ca..d1334f20bcd 100644 --- a/vulnfeeds/models/metrics.go +++ b/vulnfeeds/models/metrics.go @@ -110,6 +110,8 @@ const ( VersionSourceAffected VersionSource = "AFFECTED_FIELD" VersionSourceGit VersionSource = "AFFECTED_FIELD_GIT" VersionSourceCPE VersionSource = "CPE_FIELD" + VersionSourceCPERange VersionSource = "CPE_RANGE" + VersionSourceCPEString VersionSource = "CPE_STRING" VersionSourceDescription VersionSource = "DESCRIPTION" VersionSourceText VersionSource = "TEXT_EXTRACTION" VersionSourceRefs VersionSource = "REFERENCES" From f9249a7bcecffcad5d0bb452262873268eb21af2 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 27 May 2026 01:08:47 +0000 Subject: [PATCH 2/6] remove unnecessary field --- vulnfeeds/conversion/common.go | 2 +- vulnfeeds/conversion/cve5/default_extractor.go | 2 +- vulnfeeds/conversion/nvd/converter.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 4e41169a6d9..23771858e40 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -654,7 +654,7 @@ func AddFieldToDatabaseSpecific(ds *structpb.Struct, field string, value any) er } // ProcessRanges attempts to resolve the given ranges to commits and updates the metrics accordingly. -func ProcessRanges(ranges []models.RangeWithMetadata, repos []string, metrics *models.ConversionMetrics, cache git.RepoTagsCache, source models.VersionSource) ([]models.RangeWithMetadata, []models.RangeWithMetadata, []string) { +func ProcessRanges(ranges []models.RangeWithMetadata, repos []string, metrics *models.ConversionMetrics, cache git.RepoTagsCache) ([]models.RangeWithMetadata, []models.RangeWithMetadata, []string) { if len(ranges) == 0 { return nil, nil, nil } diff --git a/vulnfeeds/conversion/cve5/default_extractor.go b/vulnfeeds/conversion/cve5/default_extractor.go index ef391c26731..c93a6afe0f6 100644 --- a/vulnfeeds/conversion/cve5/default_extractor.go +++ b/vulnfeeds/conversion/cve5/default_extractor.go @@ -42,7 +42,7 @@ func (d *DefaultVersionExtractor) ExtractVersions(cve models.CVE5, v *vulns.Vuln var unresolvedRanges []models.RangeWithMetadata processRanges := func(nr []models.RangeWithMetadata) bool { - r, un, sR := c.ProcessRanges(nr, repos, metrics, repoTagsCache, models.VersionSourceAffected) + r, un, sR := c.ProcessRanges(nr, repos, metrics, repoTagsCache) resolvedRanges = append(resolvedRanges, r...) unresolvedRanges = append(unresolvedRanges, un...) for _, s := range sR { diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 549e3bd1ad9..1e3047e6456 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -82,7 +82,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, vpRepoCache *c.VPRepoCache, cac } // If we have ranges, try to resolve them - r, un, sR := c.ProcessRanges(cpeRanges, repos, metrics, cache, models.VersionSourceCPE) + r, un, sR := c.ProcessRanges(cpeRanges, repos, metrics, cache) if metrics.Outcome == models.Error { return nil, metrics, models.Error } @@ -116,7 +116,7 @@ func CVEToOSV(cve models.NVDCVE, repos []string, vpRepoCache *c.VPRepoCache, cac if len(textRanges) > 0 { metrics.AddNote("Extracted versions from description: %v", textRanges) } - r, un, sR := c.ProcessRanges(textRanges, repos, metrics, cache, models.VersionSourceDescription) + r, un, sR := c.ProcessRanges(textRanges, repos, metrics, cache) if metrics.Outcome == models.Error { return nil, metrics, models.Error } From 3dd3e8f25da945d548c054154562b59e427a581e Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 27 May 2026 03:49:15 +0000 Subject: [PATCH 3/6] Update combine-to-osv logic to utilise updated metadata information --- vulnfeeds/cmd/combine-to-osv/DESIGN.md | 71 +++ vulnfeeds/cmd/combine-to-osv/main.go | 544 +++++++++++++++++++--- vulnfeeds/cmd/combine-to-osv/main_test.go | 370 ++++++++++++++- 3 files changed, 922 insertions(+), 63 deletions(-) create mode 100644 vulnfeeds/cmd/combine-to-osv/DESIGN.md diff --git a/vulnfeeds/cmd/combine-to-osv/DESIGN.md b/vulnfeeds/cmd/combine-to-osv/DESIGN.md new file mode 100644 index 00000000000..067a937ef27 --- /dev/null +++ b/vulnfeeds/cmd/combine-to-osv/DESIGN.md @@ -0,0 +1,71 @@ +# Combine-to-OSV Range Selection & Merging Design + +This document summarizes the design decisions and merging strategies implemented in the `combine-to-osv` tool to combine converted OSV records from NVD and CVE5 into a single enriched, schema-compliant OSV record. + +--- + +## 1. Structural Decisions + +### Unified Affected Package Grouping +Rather than outputting a separate `Affected` package object for each repository range, all repository-based Git ranges are grouped under a **single** `Affected` struct inside the final combined OSV record. Pure package-level entries (which contain only a package name without any ranges) are preserved as separate `Affected` objects. + +--- + +## 2. Range Selection & Merging Strategy (`pickBestRange`) + +When both NVD and CVE5 converted records contain Git ranges for the same repository, `pickBestRange` is used to determine the best combined range. + +```mermaid +graph TD + A[Start pickBestRange] --> B{One range is nil?} + B -- Yes --> C[Return non-nil range with last_affected cleanup] + B -- No --> D{One range is REFERENCES source only?} + D -- Yes --> E[Merge events & database_specifics] + D -- No --> F{Both simple ranges <= 2 events?} + F -- Yes --> G[Merge boundary versions & database_specifics] + F -- No --> H{Fixed event presence differs?} + H -- Yes --> I[Prefer range with Fixed event] + H -- No --> J{Introduced '0' presence differs?} + J -- Yes --> K[Prefer range with non-zero Introduced] + J -- No --> L{CPE_RANGE source presence differs?} + L -- Yes --> M[Prefer range with CPE_RANGE source] + L -- No --> N{extracted_events differ?} + N -- Yes --> O[Prioritize preferred source CVE5] + N -- No --> P[Fallback: Choose range with more events] + E --> Q[Remove last_affected if Fixed exists] + G --> Q + I --> Q + K --> Q + M --> Q + O --> Q + P --> Q + Q --> R[Return merged/chosen range] +``` + +### 1. References-Only Merging +If one range's metadata source is **only** `"REFERENCES"` (meaning its commits were directly parsed from fix references), its events are appended and merged into the other CVE range instead of choosing one range wholesale. This preserves precise fix commits extracted from advisory links. + +### 2. Boundary Version Merging +For simple version ranges (with two or fewer events), boundary versions are merged to combine the most complete and constrained information: +* We prefer more constrained introduced boundaries (e.g., a non-zero introduced version over a `"0"` version). +* We prefer defined fixed version boundaries over undefined ones. + +### 3. Preference Rules (Wholesale Fallbacks) +If ranges are not simple enough to merge boundaries, we select the best range using the following hierarchy: +1. **Fixed Priority**: A range with bounded `fixed` version or commit information is prioritized over a range with open-ended `last_affected` information. +2. **Constrained Range Priority**: We prefer ranges that define a specific non-zero `introduced` bound over those that start at `"0"`. +3. **CPE_RANGE Source Priority**: We prefer ranges whose metadata source is `"CPE_RANGE"` because they are extracted from explicit config nodes rather than inferred from text. +4. **Preferred Source**: If all else is equal, we prefer the range from the default preferred source (`CVE5` CNA-provided data). +5. **Completeness**: Choose the range that has a larger number of Git commit events. + +--- + +## 3. Metadata & Cleanup Rules + +### Database Specific Merging +Whenever ranges are merged (either via boundary version merging or references-only merging), their `database_specific` metadata fields are combined: +* String `source` tags are merged into a unified `ListValue` list (e.g., `"AFFECTED_FIELD"` and `"REFERENCES"` are merged into `["AFFECTED_FIELD", "REFERENCES"]`). +* Duplicate entries inside `extracted_events` are removed. + +### Last-Affected Cleanup +At the end of the selection or merging process, if the final range contains at least one explicit `fixed` commit or version event, any `last_affected` events are automatically removed from the range to maintain clean, bounded schema compliance. diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 06a6ada272d..7d8635937cb 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -18,10 +18,12 @@ import ( "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/conversion/writer" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/api/iterator" "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/types/known/structpb" ) const ( @@ -269,104 +271,528 @@ func combineTwoOSVRecords(cve5 *osvschema.Vulnerability, nvd *osvschema.Vulnerab // It returns a new slice and does not modify cve5Affected in place. func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []*osvschema.Affected) []*osvschema.Affected { if len(nvdAffected) == 0 { + for _, aff := range cve5Affected { + for _, r := range aff.GetRanges() { + cleanLastAffectedIfFixedExists(r) + } + } return cve5Affected } - // If NVD has more affected packages, prefer it entirely. - if len(cve5Affected) == 0 || len(nvdAffected) > len(cve5Affected) { + if len(cve5Affected) == 0 { + for _, aff := range nvdAffected { + for _, r := range aff.GetRanges() { + cleanLastAffectedIfFixedExists(r) + } + } return nvdAffected } - nvdRepoMap := make(map[string][]*osvschema.Range) - for _, affected := range nvdAffected { - for _, r := range affected.GetRanges() { + // Group all ranges by repository URL + cve5Ranges := make(map[string]*osvschema.Range) + for _, aff := range cve5Affected { + for _, r := range aff.GetRanges() { if r.GetRepo() != "" { repo := strings.ToLower(r.GetRepo()) - nvdRepoMap[repo] = append(nvdRepoMap[repo], r) + cve5Ranges[repo] = r } } } - cve5RepoMap := make(map[string][]*osvschema.Range) - for _, affected := range cve5Affected { - for _, r := range affected.GetRanges() { + nvdRanges := make(map[string]*osvschema.Range) + for _, aff := range nvdAffected { + for _, r := range aff.GetRanges() { if r.GetRepo() != "" { repo := strings.ToLower(r.GetRepo()) - cve5RepoMap[repo] = append(cve5RepoMap[repo], r) + nvdRanges[repo] = r } } } - newRepoAffectedMap := make(map[string]*osvschema.Affected) - - // Finds ranges with the same repo and merges them into one affected set. - for repo, cveRanges := range cve5RepoMap { - if nvdRanges, ok := nvdRepoMap[repo]; ok { - var newAffectedRanges []*osvschema.Range + // Collect all unique repo URLs + reposMap := make(map[string]bool) + for repo := range cve5Ranges { + reposMap[repo] = true + } + for repo := range nvdRanges { + reposMap[repo] = true + } - // Found a match. If NVD has more ranges, use its ranges. - if len(nvdRanges) > len(cveRanges) { - // just use the nvd ranges - newAffectedRanges = nvdRanges - } else if len(cveRanges) == 1 && len(nvdRanges) == 1 { - c5Intro, c5Fixed := getRangeBoundaryVersions(cveRanges[0].GetEvents()) - nvdIntro, nvdFixed := getRangeBoundaryVersions(nvdRanges[0].GetEvents()) + var finalRanges []*osvschema.Range + for repo := range reposMap { + bestRange := pickBestRange(cve5Ranges[repo], nvdRanges[repo]) + if bestRange != nil { + finalRanges = append(finalRanges, bestRange) + } + } - // Prefer cve5 data, but use nvd data if cve5 data is missing. - if c5Intro == "" { - c5Intro = nvdIntro + // Separate output Affected list: + // 1. Those that have ranges are grouped under a single Affected struct. + // 2. Those that do not have ranges are kept as separate Affected structs. + var combinedAffected []*osvschema.Affected + + if len(finalRanges) > 0 { + // Sort final ranges by repo for stability + slices.SortFunc(finalRanges, func(a, b *osvschema.Range) int { + return cmp.Compare(strings.ToLower(a.GetRepo()), strings.ToLower(b.GetRepo())) + }) + + // Find Package and EcosystemSpecific if any were present in the input ranges + var pkg *osvschema.Package + var ecosystemSpecific *structpb.Struct + for _, aff := range cve5Affected { + if len(aff.GetRanges()) > 0 { + if aff.GetPackage() != nil { + pkg = aff.GetPackage() } - if c5Fixed == "" { - c5Fixed = nvdFixed + if aff.GetEcosystemSpecific() != nil { + ecosystemSpecific = aff.GetEcosystemSpecific() } - - if c5Intro != "" || c5Fixed != "" { - newRange := conversion.BuildGitVersionRange(c5Intro, "", c5Fixed, repo) - newAffectedRanges = append(newAffectedRanges, newRange) - } else { - newAffectedRanges = cveRanges + } + } + if pkg == nil || ecosystemSpecific == nil { + for _, aff := range nvdAffected { + if len(aff.GetRanges()) > 0 { + if pkg == nil && aff.GetPackage() != nil { + pkg = aff.GetPackage() + } + if ecosystemSpecific == nil && aff.GetEcosystemSpecific() != nil { + ecosystemSpecific = aff.GetEcosystemSpecific() + } } - } else { - newAffectedRanges = cveRanges } + } - // Remove from map so we know which NVD packages are left. - delete(nvdRepoMap, repo) - newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: newAffectedRanges, + combinedAffected = append(combinedAffected, &osvschema.Affected{ + Ranges: finalRanges, + Package: pkg, + EcosystemSpecific: ecosystemSpecific, + }) + } + + // Copy over affected objects from cve5 and nvd that have NO ranges (e.g. pure package entries), + // deduplicating them by package name. + seenPackages := make(map[string]bool) + for _, aff := range cve5Affected { + if len(aff.GetRanges()) == 0 && aff.GetPackage() != nil { + pkgName := strings.ToLower(aff.GetPackage().GetName()) + if !seenPackages[pkgName] { + combinedAffected = append(combinedAffected, aff) + seenPackages[pkgName] = true } - } else { - newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: cveRanges, + } + } + for _, aff := range nvdAffected { + if len(aff.GetRanges()) == 0 && aff.GetPackage() != nil { + pkgName := strings.ToLower(aff.GetPackage().GetName()) + if !seenPackages[pkgName] { + combinedAffected = append(combinedAffected, aff) + seenPackages[pkgName] = true } } } - // Add remaining NVD packages that were not in cve5. - for repo, nvdRange := range nvdRepoMap { - newRepoAffectedMap[repo] = &osvschema.Affected{ - Ranges: nvdRange, + // Sort the combinedAffected array: first entries with ranges, then by package name if present. + slices.SortFunc(combinedAffected, func(a, b *osvschema.Affected) int { + hasRangeA := len(a.GetRanges()) > 0 + hasRangeB := len(b.GetRanges()) > 0 + if hasRangeA != hasRangeB { + if hasRangeA { + return -1 + } + + return 1 } + var pkgA, pkgB string + if a.GetPackage() != nil { + pkgA = a.GetPackage().GetName() + } + if b.GetPackage() != nil { + pkgB = b.GetPackage().GetName() + } + + return cmp.Compare(strings.ToLower(pkgA), strings.ToLower(pkgB)) + }) + + return combinedAffected +} + +type ExtractedEvent struct { + Introduced string + Fixed string + LastAffected string + Limit string +} + +func getExtractedEvents(r *osvschema.Range) []*structpb.Value { + if r.GetDatabaseSpecific() == nil { + return nil } + fields := r.GetDatabaseSpecific().GetFields() + if fields == nil { + return nil + } + val, ok := fields["extracted_events"] + if !ok || val.GetListValue() == nil { + return nil + } + + return val.GetListValue().GetValues() +} - var combinedAffected []*osvschema.Affected //nolint:prealloc - for _, aff := range newRepoAffectedMap { - combinedAffected = append(combinedAffected, aff) +func parseExtractedEvent(v *structpb.Value) ExtractedEvent { + s := v.GetStructValue() + if s == nil { + return ExtractedEvent{} + } + fields := s.GetFields() + var ev ExtractedEvent + if intro, ok := fields["introduced"]; ok { + ev.Introduced = intro.GetStringValue() + } + if fixed, ok := fields["fixed"]; ok { + ev.Fixed = fixed.GetStringValue() + } + if la, ok := fields["last_affected"]; ok { + ev.LastAffected = la.GetStringValue() + } + if lim, ok := fields["limit"]; ok { + ev.Limit = lim.GetStringValue() } - // sort by repo - slices.SortFunc(combinedAffected, func(a, b *osvschema.Affected) int { - var repoA, repoB string - if len(a.GetRanges()) > 0 { - repoA = a.GetRanges()[0].GetRepo() + return ev +} + +func parseExtractedEvents(r *osvschema.Range) []ExtractedEvent { + rawValues := getExtractedEvents(r) + if len(rawValues) == 0 { + return nil + } + var events []ExtractedEvent + for _, val := range rawValues { + events = append(events, parseExtractedEvent(val)) + } + + return events +} + +func sameVersionRanges(evs1, evs2 []ExtractedEvent) bool { + if len(evs1) != len(evs2) { + return false + } + for i := range evs1 { + if evs1[i] != evs2[i] { + return false + } + } + + return true +} + +func hasFixedEvent(r *osvschema.Range) bool { + for _, e := range r.GetEvents() { + if e.GetFixed() != "" { + return true + } + } + return false +} + +func hasIntroducedZero(r *osvschema.Range) bool { + for _, e := range r.GetEvents() { + if e.GetIntroduced() == "0" { + return true + } + } + return false +} + +func isCPERange(r *osvschema.Range) bool { + if r.GetDatabaseSpecific() == nil { + return false + } + fields := r.GetDatabaseSpecific().GetFields() + if fields == nil { + return false + } + val, ok := fields["source"] + if !ok { + return false + } + if val.GetStringValue() == "CPE_RANGE" { + return true + } + if listVal := val.GetListValue(); listVal != nil { + for _, item := range listVal.GetValues() { + if item.GetStringValue() == "CPE_RANGE" { + return true + } + } + } + return false +} + +func cleanLastAffectedIfFixedExists(r *osvschema.Range) { + if r == nil { + return + } + hasFixed := false + for _, e := range r.GetEvents() { + if e.GetFixed() != "" { + hasFixed = true + break + } + } + if !hasFixed { + return + } + var cleanEvents []*osvschema.Event + for _, e := range r.GetEvents() { + if e.GetLastAffected() == "" { + cleanEvents = append(cleanEvents, e) + } + } + r.Events = cleanEvents +} + +func pickBestDatabaseSpecific(cve5Range, nvdRange *osvschema.Range) *structpb.Struct { + c5HasFixed := hasFixedEvent(cve5Range) + nvdHasFixed := hasFixedEvent(nvdRange) + if c5HasFixed != nvdHasFixed { + if c5HasFixed { + return cve5Range.GetDatabaseSpecific() + } + return nvdRange.GetDatabaseSpecific() + } + + c5HasIntroZero := hasIntroducedZero(cve5Range) + nvdHasIntroZero := hasIntroducedZero(nvdRange) + if c5HasIntroZero != nvdHasIntroZero { + if !c5HasIntroZero { + return cve5Range.GetDatabaseSpecific() + } + return nvdRange.GetDatabaseSpecific() + } + + c5IsCPERange := isCPERange(cve5Range) + nvdIsCPERange := isCPERange(nvdRange) + if c5IsCPERange != nvdIsCPERange { + if c5IsCPERange { + return cve5Range.GetDatabaseSpecific() + } + return nvdRange.GetDatabaseSpecific() + } + + if len(nvdRange.GetEvents()) > len(cve5Range.GetEvents()) { + return nvdRange.GetDatabaseSpecific() + } + return cve5Range.GetDatabaseSpecific() +} + +func isReferencesOnly(r *osvschema.Range) bool { + if r.GetDatabaseSpecific() == nil { + return false + } + fields := r.GetDatabaseSpecific().GetFields() + if fields == nil { + return false + } + val, ok := fields["source"] + if !ok { + return false + } + if val.GetStringValue() == "REFERENCES" { + return true + } + if listVal := val.GetListValue(); listVal != nil { + values := listVal.GetValues() + if len(values) == 1 && values[0].GetStringValue() == "REFERENCES" { + return true } - if len(b.GetRanges()) > 0 { - repoB = b.GetRanges()[0].GetRepo() + } + return false +} + +func mergeDatabaseSpecifics(ds1, ds2 *structpb.Struct) *structpb.Struct { + if ds1 == nil { + return ds2 + } + if ds2 == nil { + return ds1 + } + + mergedMap := make(map[string]any) + for k, v := range ds1.GetFields() { + mergedMap[k] = v.AsInterface() + } + + for k, v := range ds2.GetFields() { + val2 := v.AsInterface() + if existing, ok := mergedMap[k]; ok { + mergedVal, err := conversion.MergeDatabaseSpecificValues(existing, val2) + if err == nil { + mergedMap[k] = mergedVal + } + } else { + mergedMap[k] = val2 } + } + + if ds, err := utility.NewStructpbFromMap(mergedMap); err == nil { + return ds + } + return ds1 +} - return cmp.Compare(repoA, repoB) +func mergeRanges(base, other *osvschema.Range) *osvschema.Range { + merged := &osvschema.Range{ + Type: base.GetType(), + Repo: base.GetRepo(), + Events: append([]*osvschema.Event{}, base.GetEvents()...), + DatabaseSpecific: mergeDatabaseSpecifics(base.GetDatabaseSpecific(), other.GetDatabaseSpecific()), + } + for _, e := range other.GetEvents() { + found := false + for _, existing := range merged.GetEvents() { + if e.GetIntroduced() != "" && e.GetIntroduced() == existing.GetIntroduced() { + found = true + break + } + if e.GetFixed() != "" && e.GetFixed() == existing.GetFixed() { + found = true + break + } + if e.GetLastAffected() != "" && e.GetLastAffected() == existing.GetLastAffected() { + found = true + break + } + } + if !found { + if e.GetIntroduced() != "" { + merged.Events = append([]*osvschema.Event{e}, merged.Events...) + } else { + merged.Events = append(merged.Events, e) + } + } + } + slices.SortStableFunc(merged.Events, func(a, b *osvschema.Event) int { + if a.GetIntroduced() != "" && b.GetIntroduced() == "" { + return -1 + } + if a.GetIntroduced() == "" && b.GetIntroduced() != "" { + return 1 + } + return 0 }) + return merged +} - return combinedAffected +func pickBestRange(cve5Range *osvschema.Range, nvdRange *osvschema.Range) *osvschema.Range { + if cve5Range == nil { + cleanLastAffectedIfFixedExists(nvdRange) + return nvdRange + } + if nvdRange == nil { + cleanLastAffectedIfFixedExists(cve5Range) + return cve5Range + } + + // 1. If one of the ranges is references-only, merge them instead of choosing one + if isReferencesOnly(nvdRange) { + merged := mergeRanges(cve5Range, nvdRange) + cleanLastAffectedIfFixedExists(merged) + return merged + } + if isReferencesOnly(cve5Range) { + merged := mergeRanges(nvdRange, cve5Range) + cleanLastAffectedIfFixedExists(merged) + return merged + } + + // 2. Try to merge boundary versions first for simple 1-event/2-event ranges. + var merged *osvschema.Range + if len(cve5Range.GetEvents()) <= 2 && len(nvdRange.GetEvents()) <= 2 { + c5Intro, c5Fixed := getRangeBoundaryVersions(cve5Range.GetEvents()) + nvdIntro, nvdFixed := getRangeBoundaryVersions(nvdRange.GetEvents()) + + // Prefer cve5 bounds, but use nvd if cve5 is missing them + if c5Intro == "" { + c5Intro = nvdIntro + } + if c5Fixed == "" { + c5Fixed = nvdFixed + } + + if c5Intro != "" || c5Fixed != "" { + merged = conversion.BuildGitVersionRange(c5Intro, "", c5Fixed, cve5Range.GetRepo()) + merged.DatabaseSpecific = mergeDatabaseSpecifics(cve5Range.GetDatabaseSpecific(), nvdRange.GetDatabaseSpecific()) + } + } + + if merged == nil { + // 2. Prioritize range with fixed information over last_affected / open-ended ranges + c5HasFixed := hasFixedEvent(cve5Range) + nvdHasFixed := hasFixedEvent(nvdRange) + + if c5HasFixed != nvdHasFixed { + if c5HasFixed { + merged = cve5Range + } else { + merged = nvdRange + } + } + } + + if merged == nil { + // 3. Prefer constrained ranges (no introduced "0") + c5HasIntroZero := hasIntroducedZero(cve5Range) + nvdHasIntroZero := hasIntroducedZero(nvdRange) + + if c5HasIntroZero != nvdHasIntroZero { + if !c5HasIntroZero { + merged = cve5Range + } else { + merged = nvdRange + } + } + } + + if merged == nil { + // 4. Prefer CPE_RANGE if it exists, otherwise fall back to preferred source (CVE5) + c5IsCPERange := isCPERange(cve5Range) + nvdIsCPERange := isCPERange(nvdRange) + + if c5IsCPERange != nvdIsCPERange { + if c5IsCPERange { + merged = cve5Range + } else { + merged = nvdRange + } + } + } + + if merged == nil { + cve5Evs := parseExtractedEvents(cve5Range) + nvdEvs := parseExtractedEvents(nvdRange) + + if !sameVersionRanges(cve5Evs, nvdEvs) && len(cve5Evs) > 0 && len(nvdEvs) > 0 { + // Different version ranges defined, prioritize preferred source (CVE5) + merged = cve5Range + } + } + + if merged == nil { + // Fallback: choose the one with more complete Git commits (more events) + if len(nvdRange.GetEvents()) > len(cve5Range.GetEvents()) { + merged = nvdRange + } else { + merged = cve5Range + } + } + + // 5. Remove last_affected events if a fixed commit exists + cleanLastAffectedIfFixedExists(merged) + + return merged } func hasRanges(affected []*osvschema.Affected) bool { diff --git a/vulnfeeds/cmd/combine-to-osv/main_test.go b/vulnfeeds/cmd/combine-to-osv/main_test.go index 51504d9b34b..be2c998f1e2 100644 --- a/vulnfeeds/cmd/combine-to-osv/main_test.go +++ b/vulnfeeds/cmd/combine-to-osv/main_test.go @@ -11,6 +11,7 @@ import ( "github.com/google/osv/vulnfeeds/models" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/structpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -177,9 +178,20 @@ func TestPickAffectedInformation(t *testing.T) { nvdAffected: append(append([]*osvschema.Affected(nil), nvdBase...), &osvschema.Affected{ Package: &osvschema.Package{Name: "another"}, }), - wantAffected: append(append([]*osvschema.Affected(nil), nvdBase...), &osvschema.Affected{ - Package: &osvschema.Package{Name: "another"}, - }), + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: cve5Base[0].GetRanges()[0].GetEvents(), + }, + }, + }, + { + Package: &osvschema.Package{Name: "another"}, + }, + }, }, { name: "Same repo, same number of ranges, cve5 data is preferred", @@ -316,9 +328,9 @@ func TestPickAffectedInformation(t *testing.T) { }, }, wantAffected: []*osvschema.Affected{ - cve5Base[0], // From cve5 { Ranges: []*osvschema.Range{ + cve5Base[0].GetRanges()[0], { Type: osvschema.Range_GIT, Repo: repoB, @@ -331,6 +343,356 @@ func TestPickAffectedInformation(t *testing.T) { }, }, }, + { + name: "Fixed overrides LastAffected (CVE5 has Fixed)", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {LastAffected: "1.0.2"}, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + }, + { + name: "Fixed overrides LastAffected (NVD has Fixed)", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {LastAffected: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.2"}, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.2"}, + }, + }, + }, + }, + }, + }, + { + name: "Prefer constrained range (non-zero introduced)", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0.9.0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0.9.0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + }, + { + name: "Prefer CPE_RANGE source over CVE5", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewStringValue("CPE_RANGE"), + }, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewStringValue("CPE_RANGE"), + }, + }, + }, + }, + }, + }, + }, + { + name: "Prefer CPE_RANGE source over CVE5 when source is array", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{ + structpb.NewStringValue("CPE_RANGE"), + structpb.NewStringValue("REFERENCES"), + }, + }), + }, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{ + structpb.NewStringValue("CPE_RANGE"), + structpb.NewStringValue("REFERENCES"), + }, + }), + }, + }, + }, + }, + }, + }, + }, + { + name: "Cleanup last_affected if fixed exists", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Fixed: "1.0.1"}, + {LastAffected: "1.0.0"}, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{}, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Fixed: "1.0.1"}, + }, + }, + }, + }, + }, + }, + { + name: "Merge references-only range with CVE range", + cve5Affected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "1.0.0"}, + {LastAffected: "1.0.1"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewStringValue("AFFECTED_FIELD"), + }, + }, + }, + }, + }, + }, + nvdAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "2c1762b85acb"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewStringValue("REFERENCES"), + }, + }, + }, + }, + }, + }, + wantAffected: []*osvschema.Affected{ + { + Ranges: []*osvschema.Range{ + { + Type: osvschema.Range_GIT, + Repo: repoA, + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Introduced: "1.0.0"}, + {Fixed: "2c1762b85acb"}, + }, + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "source": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{ + structpb.NewStringValue("AFFECTED_FIELD"), + structpb.NewStringValue("REFERENCES"), + }, + }), + }, + }, + }, + }, + }, + }, + }, } // Sorter for comparing slices of Affected, ignoring order. From 5e78b06ccb487e55a572eb8d9a4f17bb82e4cc61 Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 27 May 2026 03:52:20 +0000 Subject: [PATCH 4/6] fix lint --- vulnfeeds/cmd/combine-to-osv/main.go | 51 ++++++++-------------------- 1 file changed, 14 insertions(+), 37 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 7d8635937cb..8e56cb8fbe6 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -276,6 +276,7 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* cleanLastAffectedIfFixedExists(r) } } + return cve5Affected } if len(cve5Affected) == 0 { @@ -284,6 +285,7 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* cleanLastAffectedIfFixedExists(r) } } + return nvdAffected } @@ -467,7 +469,7 @@ func parseExtractedEvents(r *osvschema.Range) []ExtractedEvent { if len(rawValues) == 0 { return nil } - var events []ExtractedEvent + events := make([]ExtractedEvent, 0, len(rawValues)) for _, val := range rawValues { events = append(events, parseExtractedEvent(val)) } @@ -494,6 +496,7 @@ func hasFixedEvent(r *osvschema.Range) bool { return true } } + return false } @@ -503,6 +506,7 @@ func hasIntroducedZero(r *osvschema.Range) bool { return true } } + return false } @@ -528,6 +532,7 @@ func isCPERange(r *osvschema.Range) bool { } } } + return false } @@ -554,40 +559,6 @@ func cleanLastAffectedIfFixedExists(r *osvschema.Range) { r.Events = cleanEvents } -func pickBestDatabaseSpecific(cve5Range, nvdRange *osvschema.Range) *structpb.Struct { - c5HasFixed := hasFixedEvent(cve5Range) - nvdHasFixed := hasFixedEvent(nvdRange) - if c5HasFixed != nvdHasFixed { - if c5HasFixed { - return cve5Range.GetDatabaseSpecific() - } - return nvdRange.GetDatabaseSpecific() - } - - c5HasIntroZero := hasIntroducedZero(cve5Range) - nvdHasIntroZero := hasIntroducedZero(nvdRange) - if c5HasIntroZero != nvdHasIntroZero { - if !c5HasIntroZero { - return cve5Range.GetDatabaseSpecific() - } - return nvdRange.GetDatabaseSpecific() - } - - c5IsCPERange := isCPERange(cve5Range) - nvdIsCPERange := isCPERange(nvdRange) - if c5IsCPERange != nvdIsCPERange { - if c5IsCPERange { - return cve5Range.GetDatabaseSpecific() - } - return nvdRange.GetDatabaseSpecific() - } - - if len(nvdRange.GetEvents()) > len(cve5Range.GetEvents()) { - return nvdRange.GetDatabaseSpecific() - } - return cve5Range.GetDatabaseSpecific() -} - func isReferencesOnly(r *osvschema.Range) bool { if r.GetDatabaseSpecific() == nil { return false @@ -609,6 +580,7 @@ func isReferencesOnly(r *osvschema.Range) bool { return true } } + return false } @@ -640,6 +612,7 @@ func mergeDatabaseSpecifics(ds1, ds2 *structpb.Struct) *structpb.Struct { if ds, err := utility.NewStructpbFromMap(mergedMap); err == nil { return ds } + return ds1 } @@ -668,21 +641,23 @@ func mergeRanges(base, other *osvschema.Range) *osvschema.Range { } if !found { if e.GetIntroduced() != "" { - merged.Events = append([]*osvschema.Event{e}, merged.Events...) + merged.Events = append([]*osvschema.Event{e}, merged.GetEvents()...) } else { merged.Events = append(merged.Events, e) } } } - slices.SortStableFunc(merged.Events, func(a, b *osvschema.Event) int { + slices.SortStableFunc(merged.GetEvents(), func(a, b *osvschema.Event) int { if a.GetIntroduced() != "" && b.GetIntroduced() == "" { return -1 } if a.GetIntroduced() == "" && b.GetIntroduced() != "" { return 1 } + return 0 }) + return merged } @@ -700,11 +675,13 @@ func pickBestRange(cve5Range *osvschema.Range, nvdRange *osvschema.Range) *osvsc if isReferencesOnly(nvdRange) { merged := mergeRanges(cve5Range, nvdRange) cleanLastAffectedIfFixedExists(merged) + return merged } if isReferencesOnly(cve5Range) { merged := mergeRanges(nvdRange, cve5Range) cleanLastAffectedIfFixedExists(merged) + return merged } From 710178ec01e9f488bb1ba84574dc89eef6be0b9c Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 27 May 2026 04:13:07 +0000 Subject: [PATCH 5/6] clean some funcs up --- vulnfeeds/conversion/common.go | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index 23771858e40..6c99603ede0 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -673,21 +673,14 @@ func ProcessRanges(ranges []models.RangeWithMetadata, repos []string, metrics *m } // Dynamically record the precise sources from each processed range's metadata. - // This ensures that granular version sources (such as CPE-RANGE or CPE-STRING) are tracked in + // This ensures that granular version sources (such as CPE_RANGE or CPE_STRING) are tracked in // the final conversion metrics instead of a single generic fallback source. - for _, rng := range ranges { - if rng.Metadata.Source == "" { + for _, ra := range ranges { + if ra.Metadata.Source == "" { continue } - found := false - for _, s := range metrics.VersionSources { - if s == rng.Metadata.Source { - found = true - break - } - } - if !found { - metrics.VersionSources = append(metrics.VersionSources, rng.Metadata.Source) + if !slices.Contains(metrics.VersionSources, ra.Metadata.Source) { + metrics.VersionSources = append(metrics.VersionSources, ra.Metadata.Source) } } From 1dd94ca64386a451b166a265910470b23c6353eb Mon Sep 17 00:00:00 2001 From: Jess Lowe Date: Wed, 27 May 2026 23:25:46 +0000 Subject: [PATCH 6/6] Add comments --- vulnfeeds/cmd/combine-to-osv/main.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/vulnfeeds/cmd/combine-to-osv/main.go b/vulnfeeds/cmd/combine-to-osv/main.go index 8e56cb8fbe6..67e236e5bd2 100644 --- a/vulnfeeds/cmd/combine-to-osv/main.go +++ b/vulnfeeds/cmd/combine-to-osv/main.go @@ -102,7 +102,7 @@ func main() { func extractCVEName(filename string, prefix string) string { cleaned := strings.TrimPrefix(filename, prefix) cleaned = strings.TrimSuffix(cleaned, ".json") - pre := strings.SplitAfter(cleaned, "-") + pre := strings.Split(cleaned, "-") if pre[0] != "CVE" { return "" } @@ -130,7 +130,7 @@ func listBucketObjects(bucketName string, prefix string) ([]string, error) { if err != nil { return nil, fmt.Errorf("bucket.Objects: %w", err) } - filenames = append(filenames, attrs.Name, prefix) + filenames = append(filenames, attrs.Name) } return filenames, nil @@ -279,6 +279,7 @@ func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []* return cve5Affected } + if len(cve5Affected) == 0 { for _, aff := range nvdAffected { for _, r := range aff.GetRanges() { @@ -477,6 +478,7 @@ func parseExtractedEvents(r *osvschema.Range) []ExtractedEvent { return events } +// sameVersionRanges checks if two ranges have the same extracted events. func sameVersionRanges(evs1, evs2 []ExtractedEvent) bool { if len(evs1) != len(evs2) { return false @@ -490,6 +492,7 @@ func sameVersionRanges(evs1, evs2 []ExtractedEvent) bool { return true } +// hasFixedEvent checks if any event in the range has a fixed field. func hasFixedEvent(r *osvschema.Range) bool { for _, e := range r.GetEvents() { if e.GetFixed() != "" { @@ -500,6 +503,7 @@ func hasFixedEvent(r *osvschema.Range) bool { return false } +// hasIntroducedZero checks if any event in the range has an introduced field with "0". func hasIntroducedZero(r *osvschema.Range) bool { for _, e := range r.GetEvents() { if e.GetIntroduced() == "0" { @@ -510,6 +514,7 @@ func hasIntroducedZero(r *osvschema.Range) bool { return false } +// isCPERange checks if the range is a CPE range. func isCPERange(r *osvschema.Range) bool { if r.GetDatabaseSpecific() == nil { return false @@ -536,6 +541,8 @@ func isCPERange(r *osvschema.Range) bool { return false } +// cleanLastAffectedIfFixedExists removes the last_affected field from all +// events in the range if any event has a fixed field. This happens in place. func cleanLastAffectedIfFixedExists(r *osvschema.Range) { if r == nil { return @@ -559,6 +566,8 @@ func cleanLastAffectedIfFixedExists(r *osvschema.Range) { r.Events = cleanEvents } +// isReferencesOnly checks if the range 'source' field is only "REFERENCES" +// or ["REFERENCES"]. func isReferencesOnly(r *osvschema.Range) bool { if r.GetDatabaseSpecific() == nil { return false @@ -616,6 +625,8 @@ func mergeDatabaseSpecifics(ds1, ds2 *structpb.Struct) *structpb.Struct { return ds1 } +// mergeRanges merges two ranges into one. It prefers base over other if +// both ranges have the same type and repo. func mergeRanges(base, other *osvschema.Range) *osvschema.Range { merged := &osvschema.Range{ Type: base.GetType(), @@ -661,6 +672,10 @@ func mergeRanges(base, other *osvschema.Range) *osvschema.Range { return merged } +// pickBestRange picks the best range between two ranges. +// It prefers cve5Range over nvdRange if both ranges have fixed information. +// If one range is references-only, it merges them instead of choosing one. +// More information can be found in the DESIGN.md file in this folder func pickBestRange(cve5Range *osvschema.Range, nvdRange *osvschema.Range) *osvschema.Range { if cve5Range == nil { cleanLastAffectedIfFixedExists(nvdRange)