-
Notifications
You must be signed in to change notification settings - Fork 17
refactor(hash): extract jsonMD5Raw helper and fix DeterministicUUID #308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,4 +20,4 @@ y | |
| cliff.toml | ||
| .todos/ | ||
| cmd/hx/fixtures/hx | ||
| .ginkgo | ||
| .ginkgo/ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -64,17 +64,22 @@ import ( | |
| // hash, err := JSONMD5Hash(config) | ||
| // // hash will be consistent for the same config values | ||
| func JSONMD5Hash(obj any) (string, error) { | ||
| data, err := json.Marshal(obj) | ||
| raw, err := jsonMD5Raw(obj) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
| return hex.EncodeToString(raw[:]), nil | ||
| } | ||
|
|
||
| hash := md5.Sum(data) | ||
| // jsonMD5Raw marshals the object into JSON and returns the raw 16-byte MD5 | ||
| // digest. Internal helper shared by JSONMD5Hash (which hex-encodes it) and | ||
| // DeterministicUUID (which uses the raw bytes as UUID bytes). | ||
| func jsonMD5Raw(obj any) ([16]byte, error) { | ||
| data, err := json.Marshal(obj) | ||
| if err != nil { | ||
| return "", err | ||
| return [16]byte{}, err | ||
| } | ||
|
|
||
| return hex.EncodeToString(hash[:]), nil | ||
| return md5.Sum(data), nil | ||
| } | ||
|
|
||
| // Sha256Hex computes the SHA256 hash of the input string and returns it | ||
|
|
@@ -113,15 +118,56 @@ func Sha256Hex(in string) string { | |
| // // Generate UUID from string | ||
| // id2, err := DeterministicUUID("unique-resource-name") | ||
| func DeterministicUUID(seed any) (uuid.UUID, error) { | ||
| byteHash, err := JSONMD5Hash(seed) | ||
| // If the seed is already a UUID (value, pointer, 16 bytes, or parseable | ||
| // string — uuid.Nil included), return it verbatim. Re-hashing a UUID would | ||
| // produce a different UUID, defeating the caller's intent. | ||
| if id, ok := asUUID(seed); ok { | ||
| return id, nil | ||
| } | ||
|
|
||
| raw, err := jsonMD5Raw(seed) | ||
| if err != nil { | ||
| return uuid.Nil, err | ||
| } | ||
|
|
||
| id, err := uuid.FromBytes([]byte(byteHash[0:16])) | ||
| // md5.Sum returns exactly 16 bytes, which is the size of a UUID. Use the | ||
| // raw digest directly — NOT the hex-encoded representation. | ||
| id, err := uuid.FromBytes(raw[:]) | ||
| if err != nil { | ||
| return uuid.Nil, err | ||
| } | ||
|
|
||
| return id, nil | ||
| } | ||
|
|
||
| // asUUID reports whether seed is already a UUID in one of its common forms: | ||
| // uuid.UUID, *uuid.UUID, [16]byte, []byte of length 16, or a string that | ||
| // parses as a UUID. uuid.Nil counts as a valid UUID. | ||
| // | ||
| // Composite seeds ([]string, pq.StringArray, structs, etc.) are not unwrapped | ||
| // — a single-element slice containing a UUID is still a composite and should | ||
| // be hashed. | ||
| func asUUID(seed any) (uuid.UUID, bool) { | ||
| switch v := seed.(type) { | ||
| case uuid.UUID: | ||
| return v, true | ||
| case *uuid.UUID: | ||
| if v == nil { | ||
| return uuid.Nil, false | ||
| } | ||
| return *v, true | ||
| case [16]byte: | ||
| return uuid.UUID(v), true | ||
| case []byte: | ||
| if len(v) == 16 { | ||
| var id uuid.UUID | ||
| copy(id[:], v) | ||
| return id, true | ||
| } | ||
| case string: | ||
| if id, err := uuid.Parse(v); err == nil { | ||
| return id, true | ||
| } | ||
|
Comment on lines
+167
to
+170
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
# Expect: existing tests do not cover 32-char compact hex string behavior yet.
rg -n -C3 --glob '**/*_test.go' \
'32|compact|[[:xdigit:]]{32}|NonUUIDStringStillHashes|DeterministicUUID' hashRepository: flanksource/commons Length of output: 6466 🏁 Script executed: cat -n hash/hash.go | sed -n '150,180p'Repository: flanksource/commons Length of output: 664 🏁 Script executed: rg -n 'uuid.Parse' hash/hash.go -A 2 -B 2Repository: flanksource/commons Length of output: 167 🏁 Script executed: rg -n 'compact\|32.*hex\|[a-f0-9]{32}' hash/hash_test.goRepository: flanksource/commons Length of output: 45 🏁 Script executed: cat > /tmp/test_uuid.go << 'EOF'
package main
import (
"fmt"
"github.com/google/uuid"
)
func main() {
// Test with canonical UUID format
canonical := "550e8400-e29b-41d4-a716-446655440000"
id1, err := uuid.Parse(canonical)
fmt.Printf("Canonical UUID parse: err=%v, String()=%s\n", err, id1.String())
fmt.Printf("Canonical == input: %v\n", id1.String() == canonical)
// Test with 32-char compact hex (no hyphens)
compact := "550e8400e29b41d4a716446655440000"
id2, err := uuid.Parse(compact)
fmt.Printf("\nCompact hex parse: err=%v, String()=%s\n", err, id2.String())
fmt.Printf("Compact == input: %v\n", id2.String() == compact)
// Test with a valid MD5-like hex string
md5Like := "5d41402abc4b2a76b9719d911017c592"
id3, err := uuid.Parse(md5Like)
fmt.Printf("\nMD5-like hex parse: err=%v, String()=%s\n", err, id3.String())
fmt.Printf("MD5-like == input: %v\n", id3.String() == md5Like)
// Show that both canonical and compact parse to same UUID
fmt.Printf("\nCanonical UUID == Compact UUID: %v\n", id1 == id2)
}
EOF
cd /tmp && go run test_uuid.goRepository: flanksource/commons Length of output: 230 🏁 Script executed: cat > /tmp/test_uuid.go << 'EOF'
package main
import (
"fmt"
"github.com/google/uuid"
)
func main() {
// Test with canonical UUID format
canonical := "550e8400-e29b-41d4-a716-446655440000"
id1, err := uuid.Parse(canonical)
fmt.Printf("Canonical UUID parse: err=%v, String()=%s\n", err, id1.String())
fmt.Printf("Canonical == input: %v\n", id1.String() == canonical)
// Test with 32-char compact hex (no hyphens)
compact := "550e8400e29b41d4a716446655440000"
id2, err := uuid.Parse(compact)
fmt.Printf("\nCompact hex parse: err=%v, String()=%s\n", err, id2.String())
fmt.Printf("Compact == input: %v\n", id2.String() == compact)
// Test with a valid MD5-like hex string
md5Like := "5d41402abc4b2a76b9719d911017c592"
id3, err := uuid.Parse(md5Like)
fmt.Printf("\nMD5-like hex parse: err=%v, String()=%s\n", err, id3.String())
fmt.Printf("MD5-like == input: %v\n", id3.String() == md5Like)
// Show that both canonical and compact parse to same UUID
fmt.Printf("\nCanonical UUID == Compact UUID: %v\n", id1 == id2)
fmt.Printf("Canonical String() == Compact String(): %v\n", id1.String() == id2.String())
}
EOF
cd hash && go run /tmp/test_uuid.goRepository: flanksource/commons Length of output: 516 🏁 Script executed: cat > /tmp/test_impact.go << 'EOF'
package main
import (
"fmt"
"github.com/google/uuid"
"strings"
)
func testAsUUID(seed any, withFix bool) (uuid.UUID, bool) {
switch v := seed.(type) {
case string:
if id, err := uuid.Parse(v); err == nil {
// Current code (no fix)
if !withFix {
return id, true
}
// Proposed fix
if strings.EqualFold(id.String(), v) {
return id, true
}
}
}
return uuid.Nil, false
}
func main() {
// Test with 32-char MD5-like hex (should NOT be treated as UUID)
md5Like := "5d41402abc4b2a76b9719d911017c592"
// Current behavior (without fix)
id, ok := testAsUUID(md5Like, false)
fmt.Printf("Current code with MD5-like hex: ok=%v, id=%s\n", ok, id)
// With proposed fix
id, ok = testAsUUID(md5Like, true)
fmt.Printf("Proposed fix with MD5-like hex: ok=%v, id=%s\n", ok, id)
// Test with canonical UUID (should still pass through)
canonical := "550e8400-e29b-41d4-a716-446655440000"
// Current behavior
id, ok = testAsUUID(canonical, false)
fmt.Printf("Current code with canonical UUID: ok=%v, id=%s\n", ok, id)
// With proposed fix
id, ok = testAsUUID(canonical, true)
fmt.Printf("Proposed fix with canonical UUID: ok=%v, id=%s\n", ok, id)
// Test with compact hex UUID (should NOT pass through without hyphens)
compact := "550e8400e29b41d4a716446655440000"
// Current behavior
id, ok = testAsUUID(compact, false)
fmt.Printf("Current code with compact hex: ok=%v, id=%s\n", ok, id)
// With proposed fix
id, ok = testAsUUID(compact, true)
fmt.Printf("Proposed fix with compact hex: ok=%v, id=%s\n", ok, id)
}
EOF
cd hash && go run /tmp/test_impact.goRepository: flanksource/commons Length of output: 552 🏁 Script executed: # Check if there are any tests that would fail if we reject compact hex
rg -n 'compact\|32.*hex\|without.*hyphen' hash/hash_test.goRepository: flanksource/commons Length of output: 45 🏁 Script executed: # Check if DeterministicUUID currently has a test with MD5-like hex string
rg -n '5d41402abc4b2a76b9719d911017c592|[a-f0-9]{32}' hash/hash_test.goRepository: flanksource/commons Length of output: 45 🏁 Script executed: # Let's see the full context of the TestDeterministicUUID_NonUUIDStringStillHashes test
sed -n '173,184p' hash/hash_test.goRepository: flanksource/commons Length of output: 412 Add EqualFold validation to prevent 32-character hex strings from bypassing hashing.
Restrict the string-to-UUID passthrough to canonical UUID format by validating that the parsed UUID's string representation matches the input: Proposed fix import (
"crypto/md5"
"crypto/sha256"
"encoding/hex"
"encoding/json"
+ "strings"
"github.com/google/uuid"
) case string:
- if id, err := uuid.Parse(v); err == nil {
+ if id, err := uuid.Parse(v); err == nil && strings.EqualFold(id.String(), v) {
return id, true
}Add a test case for compact hex strings to prevent regression: func TestDeterministicUUID_CompactHexStringStillHashes(t *testing.T) {
// Ensure 32-char hex strings (e.g., MD5 hashes) without hyphens are hashed, not treated as UUIDs
compactHex := "5d41402abc4b2a76b9719d911017c592"
got, err := DeterministicUUID(compactHex)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
data, _ := json.Marshal(compactHex)
sum := md5.Sum(data)
if !bytes.Equal(got[:], sum[:]) {
t.Fatalf("compact hex strings must hash; want %x, got %x", sum, got[:])
}
}🤖 Prompt for AI Agents |
||
| } | ||
| return uuid.Nil, false | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,198 @@ | ||
| package hash | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "crypto/md5" | ||
| "encoding/hex" | ||
| "encoding/json" | ||
| "testing" | ||
|
|
||
| "github.com/google/uuid" | ||
| ) | ||
|
|
||
| func TestJSONMD5Hash_ReturnsHexEncoded(t *testing.T) { | ||
| // Hex encoding of an MD5 digest is always 32 characters. | ||
| h, err := JSONMD5Hash("hello") | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
| if len(h) != 32 { | ||
| t.Fatalf("expected 32-char hex hash, got %d chars: %q", len(h), h) | ||
| } | ||
| if _, err := hex.DecodeString(h); err != nil { | ||
| t.Fatalf("hash is not valid hex: %v", err) | ||
| } | ||
| } | ||
|
|
||
| func TestJSONMD5Hash_Deterministic(t *testing.T) { | ||
| a, _ := JSONMD5Hash(map[string]string{"k": "v"}) | ||
| b, _ := JSONMD5Hash(map[string]string{"k": "v"}) | ||
| if a != b { | ||
| t.Fatalf("expected deterministic hash, got %q vs %q", a, b) | ||
| } | ||
|
Comment on lines
+27
to
+32
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Check errors in determinism tests before comparing zero values. These tests can pass falsely if both calls return an error and the zero value. The other tests already fail fast on unexpected errors; do the same here. ✅ Proposed test hardening func TestJSONMD5Hash_Deterministic(t *testing.T) {
- a, _ := JSONMD5Hash(map[string]string{"k": "v"})
- b, _ := JSONMD5Hash(map[string]string{"k": "v"})
+ a, err := JSONMD5Hash(map[string]string{"k": "v"})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ b, err := JSONMD5Hash(map[string]string{"k": "v"})
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
if a != b {
t.Fatalf("expected deterministic hash, got %q vs %q", a, b)
}
} func TestDeterministicUUID_Deterministic(t *testing.T) {
- a, _ := DeterministicUUID("same-seed")
- b, _ := DeterministicUUID("same-seed")
+ a, err := DeterministicUUID("same-seed")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ b, err := DeterministicUUID("same-seed")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
if a != b {
t.Fatalf("expected deterministic UUID, got %q vs %q", a, b)
}
}
func TestDeterministicUUID_DifferentSeedsProduceDifferentUUIDs(t *testing.T) {
- a, _ := DeterministicUUID("seed-one")
- b, _ := DeterministicUUID("seed-two")
+ a, err := DeterministicUUID("seed-one")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ b, err := DeterministicUUID("seed-two")
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
if a == b {
t.Fatalf("expected distinct UUIDs for distinct seeds, got %q twice", a)
}
}Also applies to: 67-80 🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| func TestDeterministicUUID_UsesRawBytes(t *testing.T) { | ||
| // Regression for a bug where DeterministicUUID fed the *hex-encoded* | ||
| // JSONMD5Hash string into uuid.FromBytes, producing UUIDs whose bytes | ||
| // were the ASCII codes of hex digits (e.g. 30663964-3638-3061-... where | ||
| // 0x30='0', 0x66='f', 0x39='9', 0x64='d'). The correct behavior is to | ||
| // use the raw 16-byte md5 digest as the UUID bytes. | ||
|
|
||
| seed := "test-seed" | ||
| got, err := DeterministicUUID(seed) | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
|
|
||
| data, _ := json.Marshal(seed) | ||
| sum := md5.Sum(data) | ||
| // The UUID bytes must equal the raw md5 bytes. | ||
| for i := 0; i < 16; i++ { | ||
| if got[i] != sum[i] { | ||
| t.Fatalf("UUID byte %d: want %#x, got %#x", i, sum[i], got[i]) | ||
| } | ||
| } | ||
|
|
||
| // Also assert the UUID is NOT the ASCII-hex-encoded variant of the hex | ||
| // representation of the md5, which is what the old buggy code produced. | ||
| hexStr := hex.EncodeToString(sum[:]) | ||
| var bogus [16]byte | ||
| copy(bogus[:], hexStr[:16]) | ||
| if got == bogus { | ||
| t.Fatal("DeterministicUUID regressed: still uses ASCII hex bytes as UUID bytes") | ||
| } | ||
| } | ||
|
|
||
| func TestDeterministicUUID_Deterministic(t *testing.T) { | ||
| a, _ := DeterministicUUID("same-seed") | ||
| b, _ := DeterministicUUID("same-seed") | ||
| if a != b { | ||
| t.Fatalf("expected deterministic UUID, got %q vs %q", a, b) | ||
| } | ||
| } | ||
|
|
||
| func TestDeterministicUUID_DifferentSeedsProduceDifferentUUIDs(t *testing.T) { | ||
| a, _ := DeterministicUUID("seed-one") | ||
| b, _ := DeterministicUUID("seed-two") | ||
| if a == b { | ||
| t.Fatalf("expected distinct UUIDs for distinct seeds, got %q twice", a) | ||
| } | ||
| } | ||
|
|
||
| const passthroughUUIDStr = "550e8400-e29b-41d4-a716-446655440000" | ||
|
|
||
| func TestDeterministicUUID_PassesThroughValidUUIDString(t *testing.T) { | ||
| got, err := DeterministicUUID(passthroughUUIDStr) | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
| if got.String() != passthroughUUIDStr { | ||
| t.Fatalf("expected passthrough %q, got %q", passthroughUUIDStr, got.String()) | ||
| } | ||
| } | ||
|
|
||
| func TestDeterministicUUID_PassesThroughUUIDValue(t *testing.T) { | ||
| in := uuid.MustParse(passthroughUUIDStr) | ||
| got, err := DeterministicUUID(in) | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
| if got != in { | ||
| t.Fatalf("expected passthrough %q, got %q", in, got) | ||
| } | ||
| } | ||
|
|
||
| func TestDeterministicUUID_PassesThroughUUIDPointer(t *testing.T) { | ||
| in := uuid.MustParse(passthroughUUIDStr) | ||
| got, err := DeterministicUUID(&in) | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
| if got != in { | ||
| t.Fatalf("expected passthrough %q, got %q", in, got) | ||
| } | ||
| } | ||
|
|
||
| func TestDeterministicUUID_PassesThroughUUIDBytes(t *testing.T) { | ||
| in := uuid.MustParse(passthroughUUIDStr) | ||
|
|
||
| gotArr, err := DeterministicUUID([16]byte(in)) | ||
| if err != nil { | ||
| t.Fatalf("unexpected error ([16]byte): %v", err) | ||
| } | ||
| if gotArr != in { | ||
| t.Fatalf("[16]byte passthrough: want %q, got %q", in, gotArr) | ||
| } | ||
|
|
||
| gotSlice, err := DeterministicUUID(in[:]) | ||
| if err != nil { | ||
| t.Fatalf("unexpected error ([]byte): %v", err) | ||
| } | ||
| if gotSlice != in { | ||
| t.Fatalf("[]byte passthrough: want %q, got %q", in, gotSlice) | ||
| } | ||
| } | ||
|
|
||
| func TestDeterministicUUID_PassesThroughNilUUID(t *testing.T) { | ||
| const nilStr = "00000000-0000-0000-0000-000000000000" | ||
|
|
||
| cases := []struct { | ||
| name string | ||
| in any | ||
| }{ | ||
| {"uuid.Nil value", uuid.Nil}, | ||
| {"nil string", nilStr}, | ||
| {"zero [16]byte", [16]byte{}}, | ||
| } | ||
| for _, tc := range cases { | ||
| got, err := DeterministicUUID(tc.in) | ||
| if err != nil { | ||
| t.Fatalf("%s: unexpected error: %v", tc.name, err) | ||
| } | ||
| if got != uuid.Nil { | ||
| t.Fatalf("%s: expected uuid.Nil passthrough, got %q", tc.name, got) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func TestDeterministicUUID_SingleElementSliceIsHashed(t *testing.T) { | ||
| in := []string{passthroughUUIDStr} | ||
| got, err := DeterministicUUID(in) | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
| if got.String() == passthroughUUIDStr { | ||
| t.Fatal("single-element slice must be treated as a composite and hashed, not unwrapped") | ||
| } | ||
| if got == uuid.Nil { | ||
| t.Fatal("hashed composite should not be uuid.Nil") | ||
| } | ||
| } | ||
|
|
||
| func TestDeterministicUUID_NonUUIDStringStillHashes(t *testing.T) { | ||
| got, err := DeterministicUUID("test-seed") | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
|
|
||
| data, _ := json.Marshal("test-seed") | ||
| sum := md5.Sum(data) | ||
| if !bytes.Equal(got[:], sum[:]) { | ||
| t.Fatalf("non-UUID strings must still hash; want %x, got %x", sum, got[:]) | ||
| } | ||
| } | ||
|
|
||
| func TestDeterministicUUID_ShortByteSliceStillHashes(t *testing.T) { | ||
| in := []byte{1, 2, 3} | ||
| got, err := DeterministicUUID(in) | ||
| if err != nil { | ||
| t.Fatalf("unexpected error: %v", err) | ||
| } | ||
|
|
||
| data, _ := json.Marshal(in) | ||
| sum := md5.Sum(data) | ||
| if !bytes.Equal(got[:], sum[:]) { | ||
| t.Fatalf("len != 16 []byte must hash via JSON-MD5; want %x, got %x", sum, got[:]) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
❓ Verification inconclusive
Script executed:
Repository: flanksource/commons
Repository: flanksource/commons
Exit code: 2
stderr:
Script executed:
Repository: flanksource/commons
Repository: flanksource/commons
Exit code: 0
stdout:
Script executed:
Repository: flanksource/commons
Repository: flanksource/commons
Exit code: 0
stdout:
Script executed:
Repository: flanksource/commons
Repository: flanksource/commons
Exit code: 0
stdout:
Script executed:
Repository: flanksource/commons
Repository: flanksource/commons
Exit code: 0
stdout:
Pin the Gavel action and tool version before granting write permissions.
This job grants
pull-requests: write(line 18) but runsflanksource/gavel@main(line 32) withversion: latest(line 35). Pin both to immutable references to prevent supply-chain risk when using a write-scoped token.🔒 Proposed hardening
Other actions in this workflow are pinned to commit SHAs; apply the same hardening to Gavel.
🤖 Prompt for AI Agents