Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ func main() {

close(jobs)
wg.Wait()

timesBlocked := int64(0)
if gcsHelper != nil {
timesBlocked = gcsHelper.GetTimesBlocked()
Expand Down
4 changes: 2 additions & 2 deletions vulnfeeds/cmd/mirrors/cpe-repo-gen/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ func analyzeCPEDictionary(cpes []CPE) (productToRepo c.VendorProductToRepoMap, d
logger.Info("Disregarding derived repo", slog.String("repo", repo), slog.String("vendor", vp.Vendor), slog.String("product", vp.Product), slog.Any("err", err))
continue
}
if !git.ValidRepoAndHasUsableRefs(repo) {
if valid, _ := git.ValidRepoAndHasUsableRefs(repo); !valid {
logger.Info("Disregarding derived repo as unusable", slog.String("repo", repo), slog.String("vendor", vp.Vendor), slog.String("product", vp.Product))
continue
}
Expand All @@ -407,7 +407,7 @@ func validateRepos(prm c.VendorProductToRepoMap) (validated c.VendorProductToRep
entryCount++
// As a side-effect, this also omits any with no repos.
for _, r := range prm[vp] {
if !git.ValidRepoAndHasUsableRefs(r) {
if valid, _ := git.ValidRepoAndHasUsableRefs(r); !valid {
logger.Info("Invalid repo", slog.Int("count", entryCount), slog.Int("total", len(prm)), slog.String("repo", r), slog.String("vendor", vp.Vendor), slog.String("product", vp.Product))
continue
}
Expand Down
28 changes: 22 additions & 6 deletions vulnfeeds/conversion/versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"slices"
"strings"
"sync"
"time"

"github.com/google/osv/vulnfeeds/git"
"github.com/google/osv/vulnfeeds/models"
Expand Down Expand Up @@ -1058,7 +1059,7 @@ func (c *VPRepoCache) MaybeUpdate(vp *VendorProduct, repo string) {
return
}
// Avoid polluting the cache with existent-but-useless repos.
if git.ValidRepoAndHasUsableRefs(repo) {
if valid, _ := git.ValidRepoAndHasUsableRefs(repo); valid {
c.m[*vp] = append(c.m[*vp], repo)
}
}
Expand Down Expand Up @@ -1232,11 +1233,7 @@ func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Re
continue
}
// If it's any other repo-shaped URL, it's only useful if it has tags.
if (err == nil && !git.ValidRepo(repo)) || (err != nil && !git.ValidRepoAndHasUsableRefs(repo)) {
if repoTagsCache != nil {
repoTagsCache.SetInvalid(repo)
}

if isValid, _ := validateRepo(repo, err == nil, repoTagsCache); !isValid {
continue
}
repos = append(repos, repo)
Expand Down Expand Up @@ -1281,3 +1278,22 @@ func ReposFromReferencesCVEList(refs []models.Reference, tagDenyList []string, m

return repos
}

func validateRepo(repo string, isCommit bool, cache git.RepoTagsCache) (bool, error) {
var valid bool
var err error
if isCommit {
valid, err = git.ValidRepo(repo)
} else {
valid, err = git.ValidRepoAndHasUsableRefs(repo)
}
if !valid && cache != nil {
if err != nil && (errors.Is(err, git.ErrRateLimit) || strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "Too Many Requests")) {
cache.SetInvalidWithTTL(repo, 1*time.Hour)
} else {
cache.SetInvalid(repo)
}
}

return valid, err
}
64 changes: 33 additions & 31 deletions vulnfeeds/git/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ type RepoTagsCache interface {
Get(repo string) (RepoTagsMap, bool)
Set(repo string, tags RepoTagsMap)
SetInvalid(repo string)
SetInvalidWithTTL(repo string, ttl time.Duration)
IsInvalid(repo string) bool
SetCanonicalLink(repo string, canonicalLink string)
GetCanonicalLink(repo string) (string, bool)
Expand Down Expand Up @@ -124,6 +125,10 @@ func (c *InMemoryRepoTagsCache) SetInvalid(repo string) {
c.invalid[repo] = true
}

func (c *InMemoryRepoTagsCache) SetInvalidWithTTL(repo string, _ time.Duration) {
c.SetInvalid(repo)
}

func (c *InMemoryRepoTagsCache) IsInvalid(repo string) bool {
c.RLock()
defer c.RUnlock()
Expand Down Expand Up @@ -191,11 +196,14 @@ func (c *RedisRepoTagsCache) Set(repo string, tags RepoTagsMap) {
}

func (c *RedisRepoTagsCache) SetInvalid(repo string) {
c.SetInvalidWithTTL(repo, randomTTL())
}

func (c *RedisRepoTagsCache) SetInvalidWithTTL(repo string, ttl time.Duration) {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
defer cancel()

key := "invalid_repo:" + repo
ttl := randomTTL()
c.redisClient.Set(ctx, key, "true", ttl)
}

Expand Down Expand Up @@ -246,7 +254,9 @@ func NewRepoTagsCache() RepoTagsCache {
Protocol: 2,
})

return &RedisRepoTagsCache{redisClient: client}
return &RedisRepoTagsCache{
redisClient: client,
}
}

return &InMemoryRepoTagsCache{
Expand Down Expand Up @@ -412,16 +422,14 @@ func RepoTags(repoURL string, repoTagsCache RepoTagsCache) (tags Tags, e error)
}
}
// Cache miss.
var refs []*plumbing.Reference
var err error
if os.Getenv("GITTER_HOST") != "" {
refs, err = gitterRepoRefs(repoURL)
} else {
refs, err = RemoteRepoRefsWithRetry(repoURL, 3)
}
refs, err := getRepoRefs(repoURL)
if err != nil {
if repoTagsCache != nil {
repoTagsCache.SetInvalid(repoURL)
if errors.Is(err, ErrRateLimit) || strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "Too Many Requests") {
repoTagsCache.SetInvalidWithTTL(repoURL, 1*time.Hour)
} else {
repoTagsCache.SetInvalid(repoURL)
}
}

return tags, err
Expand Down Expand Up @@ -547,34 +555,28 @@ func RefBranches(refs []*plumbing.Reference) (branches []*plumbing.Reference) {
return branches
}

// Validate the repo by attempting to query it's references.
// *** Does external calls to verify repos ***
func ValidRepo(repoURL string) bool {
func getRepoRefs(repoURL string) ([]*plumbing.Reference, error) {
if os.Getenv("GITTER_HOST") != "" {
_, err := gitterRepoRefs(repoURL)
return err == nil
return gitterRepoRefs(repoURL)
}
_, err := RemoteRepoRefsWithRetry(repoURL, 3)

return err == nil
return RemoteRepoRefsWithRetry(repoURL, 3)
}

// Otherwise functional repos that don't have any tags are not valid.
// Validate the repo by attempting to query it's references.
// *** Does external calls to verify repos ***
func ValidRepoAndHasUsableRefs(repoURL string) (valid bool) {
if os.Getenv("GITTER_HOST") != "" {
refs, err := gitterRepoRefs(repoURL)
if err != nil || len(refs) == 0 {
return false
}
func ValidRepo(repoURL string) (bool, error) {
_, err := getRepoRefs(repoURL)
return err == nil, err
}

return len(RefTags(refs)) > 0
}
refs, err := RemoteRepoRefsWithRetry(repoURL, 3)
// Return false if there's an error, or if the repo has no refs (e.g. is empty)
// Otherwise functional repos that don't have any tags are not valid.
// *** Does external calls to verify repos ***
func ValidRepoAndHasUsableRefs(repoURL string) (bool, error) {
refs, err := getRepoRefs(repoURL)
if err != nil || len(refs) == 0 {
return false
return false, err
}
// Repos with no tags aren't useful.
return len(RefTags(refs)) > 0

return len(RefTags(refs)) > 0, nil
}
33 changes: 29 additions & 4 deletions vulnfeeds/git/repository_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ func TestValidRepo(t *testing.T) {
if time.Now().Before(tc.disableExpiryDate) {
t.Skipf("test %q: TestValidRepo(%q) has been skipped due to known outage and will be reenabled on %s.", tc.description, tc.repoURL, tc.disableExpiryDate)
}
got := ValidRepoAndHasUsableRefs(tc.repoURL)
got, _ := ValidRepoAndHasUsableRefs(tc.repoURL)
if diff := cmp.Diff(got, tc.expectedResult); diff != "" {
t.Errorf("test %q: ValidRepo(%q) was incorrect: %s", tc.description, tc.repoURL, diff)
t.Logf("Confirm that %s is reachable with `git ls-remote %s`", tc.repoURL, tc.repoURL)
Expand All @@ -396,7 +396,7 @@ func TestInvalidRepos(t *testing.T) {
testutils.SetupGitVCR(t)
redundantRepos := []string{}
for _, repo := range models.InvalidRepos {
if !ValidRepoAndHasUsableRefs(repo) {
if valid, _ := ValidRepoAndHasUsableRefs(repo); !valid {
redundantRepos = append(redundantRepos, repo)
}
}
Expand Down Expand Up @@ -537,12 +537,37 @@ func TestValidRepoWithGitter(t *testing.T) {
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
responseStatus = tc.status
if got := ValidRepo(tc.repo); got != tc.wantValidRepo {
if got, _ := ValidRepo(tc.repo); got != tc.wantValidRepo {
t.Errorf("ValidRepo(%q) = %v, want %v", tc.repo, got, tc.wantValidRepo)
}
if got := ValidRepoAndHasUsableRefs(tc.repo); got != tc.wantValidRepoAndHasRefs {
if got, _ := ValidRepoAndHasUsableRefs(tc.repo); got != tc.wantValidRepoAndHasRefs {
t.Errorf("ValidRepoAndHasUsableRefs(%q) = %v, want %v", tc.repo, got, tc.wantValidRepoAndHasRefs)
}
})
}
}

func TestInMemoryRepoTagsCache(t *testing.T) {
cache := &InMemoryRepoTagsCache{}
repo := "https://github.com/example/test-repo"

// Test that it starts as valid
if cache.IsInvalid(repo) {
t.Errorf("Expected repo to be valid initially")
}

// Test SetInvalid
cache.SetInvalid(repo)
if !cache.IsInvalid(repo) {
t.Errorf("Expected repo to be invalid after SetInvalid")
}

// Reset the cache
cache = &InMemoryRepoTagsCache{}

// Test SetInvalidWithTTL (which behaves as SetInvalid)
cache.SetInvalidWithTTL(repo, 1*time.Hour)
if !cache.IsInvalid(repo) {
t.Errorf("Expected repo to be invalid after SetInvalidWithTTL")
}
}
Loading