diff --git a/CHANGELOG.md b/CHANGELOG.md index ed1c4e4f1..dc6018c4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,11 @@ The following emojis are used to highlight certain changes: ### Added +- `routing/http`: ✨ added `generic` schema support per [IPIP-518](https://github.com/ipfs/specs/pull/518) + - new `GenericRecord` type with duck-typed `Addrs` (multiaddrs and URIs) and arbitrary string `ID` (not limited to PeerIDs) + - `contentrouter` converts GenericRecord to `peer.AddrInfo` for backward compatibility: HTTP(S) URLs become `/dns/host/tcp/port/https` multiaddrs; PeerID is derived from `did:key:` or generated as a deterministic placeholder when the ID is not a native libp2p PeerID + - `filter-addrs` extended to match URI schemes (e.g. `?filter-addrs=https`) in addition to multiaddr protocol names + - generic records are capped at 10 KiB per IPIP-518 spec - `ipld/unixfs/io`: added `SizeEstimationMode` for configurable HAMT sharding threshold decisions. Supports legacy link-based estimation (`SizeEstimationLinks`), accurate block-based estimation (`SizeEstimationBlock`), or disabling size-based thresholds (`SizeEstimationDisabled`). [#1088](https://github.com/ipfs/boxo/pull/1088), [IPIP-499](https://github.com/ipfs/specs/pull/499) - `ipld/unixfs/io`: added `UnixFSProfile` with `UnixFS_v0_2015` and `UnixFS_v1_2025` presets for CID-deterministic file and directory DAG construction. [#1088](https://github.com/ipfs/boxo/pull/1088), [IPIP-499](https://github.com/ipfs/specs/pull/499) - `files`: `NewSerialFileWithOptions` now supports controlling whether symlinks are preserved or dereferenced before being added to IPFS. See `SerialFileOptions.DereferenceSymlinks`. [#1088](https://github.com/ipfs/boxo/pull/1088), [IPIP-499](https://github.com/ipfs/specs/pull/499) diff --git a/go.mod b/go.mod index 4ebc97979..c0142ed4a 100644 --- a/go.mod +++ b/go.mod @@ -51,6 +51,7 @@ require ( github.com/multiformats/go-multicodec v0.10.0 github.com/multiformats/go-multihash v0.2.3 github.com/multiformats/go-multistream v0.6.1 + github.com/multiformats/go-varint v0.1.0 github.com/polydawn/refmt v0.89.0 github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_model v0.6.2 @@ -115,7 +116,6 @@ require ( github.com/minio/sha256-simd v1.0.1 // indirect github.com/multiformats/go-base36 v0.2.0 // indirect github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect - github.com/multiformats/go-varint v0.1.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect diff --git a/routing/http/client/client.go b/routing/http/client/client.go index ce9c989c0..dd34b814b 100644 --- a/routing/http/client/client.go +++ b/routing/http/client/client.go @@ -180,8 +180,9 @@ func WithUserAgent(ua string) Option { func WithProviderInfo(peerID peer.ID, addrs []multiaddr.Multiaddr) Option { return func(c *Client) error { c.peerID = peerID - for _, a := range addrs { - c.addrs = append(c.addrs, types.Multiaddr{Multiaddr: a}) + c.addrs = make([]types.Multiaddr, len(addrs)) + for i, a := range addrs { + c.addrs[i] = types.Multiaddr{Multiaddr: a} } return nil } diff --git a/routing/http/client/client_test.go b/routing/http/client/client_test.go index 6edd9adc0..8708c7dc2 100644 --- a/routing/http/client/client_test.go +++ b/routing/http/client/client_test.go @@ -153,18 +153,19 @@ func makeCID() cid.Cid { return c } -func drAddrsToAddrs(drmas []types.Multiaddr) (addrs []multiaddr.Multiaddr) { - for _, a := range drmas { +func drAddrsToAddrs(draddrs []types.Multiaddr) (addrs []multiaddr.Multiaddr) { + for _, a := range draddrs { addrs = append(addrs, a.Multiaddr) } return } -func addrsToDRAddrs(addrs []multiaddr.Multiaddr) (drmas []types.Multiaddr) { - for _, a := range addrs { - drmas = append(drmas, types.Multiaddr{Multiaddr: a}) +func addrsToDRAddrs(addrs []multiaddr.Multiaddr) []types.Multiaddr { + draddrs := make([]types.Multiaddr, len(addrs)) + for i, a := range addrs { + draddrs[i] = types.Multiaddr{Multiaddr: a} } - return + return draddrs } func makePeerRecord(protocols []string) types.PeerRecord { @@ -410,6 +411,94 @@ func TestClient_FindProviders(t *testing.T) { } } +// TestClient_FindProvidersWithGenericRecord verifies that a client correctly +// receives and deserializes GenericRecord from a FindProviders response. +// This is an end-to-end test through the real server handler and HTTP client. +func TestClient_FindProvidersWithGenericRecord(t *testing.T) { + peerRecord := makePeerRecord([]string{"transport-bitswap"}) + genericRecord := types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "did:key:z6Mkm1example", + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{ + mustAddr(t, "https://trustless-gateway.example.com"), + mustAddr(t, "/ip4/1.2.3.4/tcp/5000"), + }, + } + + routerResults := []iter.Result[types.Record]{ + {Val: &peerRecord}, + {Val: &genericRecord}, + } + + t.Run("streaming NDJSON response", func(t *testing.T) { + deps := makeTestDeps(t, []Option{WithProtocolFilter([]string{})}, nil) + client := deps.client + router := deps.router + + cid := makeCID() + router.On("FindProviders", mock.Anything, cid, 0). + Return(iter.FromSlice(routerResults), nil) + + resultIter, err := client.FindProviders(context.Background(), cid) + require.NoError(t, err) + + results := iter.ReadAll[iter.Result[types.Record]](resultIter) + require.Len(t, results, 2) + + // First result: PeerRecord + require.NoError(t, results[0].Err) + _, ok := results[0].Val.(*types.PeerRecord) + require.True(t, ok, "first result should be PeerRecord") + + // Second result: GenericRecord + require.NoError(t, results[1].Err) + gr, ok := results[1].Val.(*types.GenericRecord) + require.True(t, ok, "second result should be GenericRecord") + assert.Equal(t, types.SchemaGeneric, gr.Schema) + assert.Equal(t, "did:key:z6Mkm1example", gr.ID) + assert.Equal(t, []string{"transport-ipfs-gateway-http"}, gr.Protocols) + require.Len(t, gr.Addrs, 2) + assert.Equal(t, "https://trustless-gateway.example.com", gr.Addrs[0].String()) + assert.Equal(t, "/ip4/1.2.3.4/tcp/5000", gr.Addrs[1].String()) + }) + + t.Run("non-streaming JSON response", func(t *testing.T) { + deps := makeTestDeps(t, + []Option{WithProtocolFilter([]string{})}, + []server.Option{server.WithStreamingResultsDisabled()}, + ) + client := deps.client + router := deps.router + + cid := makeCID() + router.On("FindProviders", mock.Anything, cid, 20). + Return(iter.FromSlice(routerResults), nil) + + resultIter, err := client.FindProviders(context.Background(), cid) + require.NoError(t, err) + + results := iter.ReadAll[iter.Result[types.Record]](resultIter) + require.Len(t, results, 2) + + // Verify GenericRecord survived the JSON (non-streaming) path + require.NoError(t, results[1].Err) + gr, ok := results[1].Val.(*types.GenericRecord) + require.True(t, ok, "second result should be GenericRecord") + assert.Equal(t, "did:key:z6Mkm1example", gr.ID) + require.Len(t, gr.Addrs, 2) + assert.True(t, gr.Addrs[0].IsURL()) + assert.True(t, gr.Addrs[1].IsMultiaddr()) + }) +} + +func mustAddr(t *testing.T, s string) types.Address { + t.Helper() + a, err := types.NewAddress(s) + require.NoError(t, err) + return a +} + func TestClient_Provide(t *testing.T) { cases := []struct { name string diff --git a/routing/http/contentrouter/contentrouter.go b/routing/http/contentrouter/contentrouter.go index 07be56d0f..2bbe2cd11 100644 --- a/routing/http/contentrouter/contentrouter.go +++ b/routing/http/contentrouter/contentrouter.go @@ -3,6 +3,7 @@ package contentrouter import ( "context" "reflect" + "slices" "strings" "time" @@ -142,8 +143,21 @@ func (c *contentRouter) Ready() bool { return true } -// readProviderResponses reads peer records (and bitswap records for legacy -// compatibility) from the iterator into the given channel. +// readProviderResponses reads provider records from the iterator into the given +// channel. PeerRecord and BitswapRecord are converted directly. GenericRecord +// is converted on a best-effort basis: +// - If the ID is a valid libp2p PeerID, the record is always converted +// regardless of Protocols. This supports the legacy pattern where a +// PeerID + /https multiaddr was used as a hint to probe for a Trustless +// IPFS HTTP Gateway (even without explicit protocol declaration). +// - If the ID is not a PeerID but the record advertises +// transport-ipfs-gateway-http with HTTP(S) URLs, a PeerID is derived +// from did:key: or generated as a placeholder. +// - Other records with non-PeerID identifiers are skipped. +// +// Addresses are converted via [types.Address.ToMultiaddr]; HTTPS URLs +// become /dns/host/tcp/443/https multiaddrs. Non-convertible addresses +// are dropped. func readProviderResponses(ctx context.Context, iter iter.ResultIter[types.Record], ch chan<- peer.AddrInfo) { defer close(ch) defer iter.Close() @@ -175,6 +189,60 @@ func readProviderResponses(ctx context.Context, iter iter.ResultIter[types.Recor }: } + case types.SchemaGeneric: + result, ok := v.(*types.GenericRecord) + if !ok { + logger.Errorw( + "problem casting find providers result", + "Schema", v.GetSchema(), + "Type", reflect.TypeOf(v).String(), + ) + continue + } + + pid, err := peer.Decode(result.ID) + if err != nil { + // For HTTP gateway providers, try harder to derive a PeerID. + // Kubo and Rainbow need a PeerID to pass multiaddr addresses + // over legacy routing APIs even when the provider uses + // non-PeerID identifiers like did:key:. + if slices.Contains(result.Protocols, "transport-ipfs-gateway-http") && hasHTTPURL(result.Addrs) { + pid, err = peerIDFromDIDKey(result.ID) + if err != nil { + pid = peerIDPlaceholderFromArbitraryID(result.ID) + } + } else { + // Records with non-PeerID identifiers and no recognized + // protocol are skipped: without a protocol hint we cannot + // determine how to use the addresses in legacy routing APIs. + logger.Debugw("skipping generic record with non-PeerID identifier", "ID", result.ID) + continue + } + } + + // Convert addresses to multiaddrs. URLs are converted via + // ToMultiaddr (e.g. https://host -> /dns/host/tcp/443/https). + // Addresses that cannot be converted are dropped. + var addrs []multiaddr.Multiaddr + for i := range result.Addrs { + if ma := result.Addrs[i].ToMultiaddr(); ma != nil { + addrs = append(addrs, ma) + } + } + if len(addrs) == 0 { + logger.Debugw("skipping generic record with no convertible addresses", "ID", result.ID) + continue + } + + select { + case <-ctx.Done(): + return + case ch <- peer.AddrInfo{ + ID: pid, + Addrs: addrs, + }: + } + //nolint:staticcheck //lint:ignore SA1019 // ignore staticcheck case types.SchemaBitswap: diff --git a/routing/http/contentrouter/contentrouter_test.go b/routing/http/contentrouter/contentrouter_test.go index 927ee0d44..7700ac349 100644 --- a/routing/http/contentrouter/contentrouter_test.go +++ b/routing/http/contentrouter/contentrouter_test.go @@ -15,7 +15,10 @@ import ( "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/routing" "github.com/multiformats/go-multiaddr" + "github.com/multiformats/go-multibase" + "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" + "github.com/multiformats/go-varint" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -111,6 +114,36 @@ func TestProvideMany(t *testing.T) { require.NoError(t, err) } +// mustAddr is a test helper that creates a types.Address or fails the test. +func mustAddr(t *testing.T, s string) types.Address { + t.Helper() + a, err := types.NewAddress(s) + require.NoError(t, err) + return a +} + +// collectProviders drains a FindProvidersAsync channel into a slice. +func collectProviders(ch <-chan peer.AddrInfo) []peer.AddrInfo { + var out []peer.AddrInfo + for ai := range ch { + out = append(out, ai) + } + return out +} + +// findProvidersWithRecords is a test helper that sets up a mock client with the +// given records and returns the peer.AddrInfo results from FindProvidersAsync. +func findProvidersWithRecords(t *testing.T, records []types.Record) []peer.AddrInfo { + t.Helper() + key := makeCID() + ctx := context.Background() + client := &mockClient{} + crc := NewContentRoutingClient(client) + aisIter := iter.ToResultIter[types.Record](iter.FromSlice(records)) + client.On("FindProviders", ctx, key).Return(aisIter, nil) + return collectProviders(crc.FindProvidersAsync(ctx, key, len(records))) +} + func makeCID() cid.Cid { buf := make([]byte, 63) _, err := rand.Read(buf) @@ -184,6 +217,196 @@ func TestFindProvidersAsync(t *testing.T) { require.Equal(t, expected, actualAIs) } +// TestFindProvidersAsyncConvertsGenericRecords verifies that GenericRecord +// entries with a valid PeerID and convertible addresses are converted to +// peer.AddrInfo. HTTPS URLs become /dns/host/tcp/443/https multiaddrs. +func TestFindProvidersAsyncConvertsGenericRecords(t *testing.T) { + p1 := peer.ID("peer1") + gatewayPID, err := peer.Decode("12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn") + require.NoError(t, err) + p2 := peer.ID("peer2") + + results := findProvidersWithRecords(t, []types.Record{ + &types.PeerRecord{ + Schema: types.SchemaPeer, + ID: &p1, + Protocols: []string{"transport-bitswap"}, + }, + &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{ + mustAddr(t, "https://trustless-gateway.example.com"), + mustAddr(t, "/ip4/1.2.3.4/tcp/5000"), + }, + }, + &types.PeerRecord{ + Schema: types.SchemaPeer, + ID: &p2, + Protocols: []string{"transport-bitswap"}, + }, + }) + + require.Len(t, results, 3) + assert.Equal(t, p1, results[0].ID) + + // GenericRecord converted: PeerID decoded, HTTPS URL becomes multiaddr + assert.Equal(t, gatewayPID, results[1].ID) + require.Len(t, results[1].Addrs, 2) + assert.Equal(t, "/dns/trustless-gateway.example.com/tcp/443/https", results[1].Addrs[0].String()) + assert.Equal(t, "/ip4/1.2.3.4/tcp/5000", results[1].Addrs[1].String()) + + assert.Equal(t, p2, results[2].ID) +} + +// TestFindProvidersAsyncSkipsNonConvertibleGenericRecords verifies that +// GenericRecord entries are skipped when they cannot produce a usable +// peer.AddrInfo. The PeerID heuristic only applies to records with +// transport-ipfs-gateway-http + HTTP(S) URL, so all other combinations +// with non-PeerID identifiers or no convertible addresses are dropped. +func TestFindProvidersAsyncSkipsNonConvertibleGenericRecords(t *testing.T) { + p1 := peer.ID("peer1") + + results := findProvidersWithRecords(t, []types.Record{ + &types.PeerRecord{ + Schema: types.SchemaPeer, + ID: &p1, + Protocols: []string{"transport-bitswap"}, + }, + // non-PeerID + non-HTTP-gateway protocol: no heuristic applies, skipped + &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "did:key:z6Mkm1example", + Protocols: []string{"transport-foo"}, + Addrs: types.Addresses{mustAddr(t, "https://gateway.example.com")}, + }, + // valid PeerID but only non-convertible addresses: skipped + &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + Protocols: []string{"transport-foo"}, + Addrs: types.Addresses{mustAddr(t, "foo://custom.example.com")}, + }, + // valid PeerID but empty address list: skipped + &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + Protocols: []string{"transport-ipfs-gateway-http"}, + }, + }) + + // Only the PeerRecord should come through; all GenericRecords are skipped + require.Len(t, results, 1) + assert.Equal(t, p1, results[0].ID) +} + +// encodeDIDKey encodes an ed25519 public key as a did:key: identifier. +func encodeDIDKey(t *testing.T, pubKey crypto.PubKey) string { + t.Helper() + raw, err := pubKey.Raw() + require.NoError(t, err) + + // multicodec prefix for ed25519-pub (0xed) as varint + raw key bytes + prefix := varint.ToUvarint(uint64(multicodec.Ed25519Pub)) + data := append(prefix, raw...) + + encoded, err := multibase.Encode(multibase.Base58BTC, data) + require.NoError(t, err) + return "did:key:" + encoded +} + +// TestFindProvidersAsyncDIDKeyConversion verifies the end-to-end path for +// HTTP gateway providers that use did:key: ed25519 identifiers instead of +// PeerIDs. The contentrouter should extract the ed25519 public key from the +// did:key, derive the corresponding PeerID, and return it in the AddrInfo +// so that Kubo/Rainbow can route to the provider over legacy APIs. +func TestFindProvidersAsyncDIDKeyConversion(t *testing.T) { + _, pubKey, err := crypto.GenerateEd25519Key(rand.Reader) + require.NoError(t, err) + expectedPID, err := peer.IDFromPublicKey(pubKey) + require.NoError(t, err) + + results := findProvidersWithRecords(t, []types.Record{ + &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: encodeDIDKey(t, pubKey), + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{mustAddr(t, "https://gateway.example.com")}, + }, + }) + + require.Len(t, results, 1) + assert.Equal(t, expectedPID, results[0].ID) + require.Len(t, results[0].Addrs, 1) + assert.Equal(t, "/dns/gateway.example.com/tcp/443/https", results[0].Addrs[0].String()) +} + +// TestFindProvidersAsyncPlaceholderPeerID verifies the end-to-end fallback +// path: when a GenericRecord has transport-ipfs-gateway-http + HTTPS URL but +// its ID is neither a PeerID nor a did:key, the contentrouter generates a +// placeholder PeerID so the record is not dropped by legacy routing APIs. +func TestFindProvidersAsyncPlaceholderPeerID(t *testing.T) { + results := findProvidersWithRecords(t, []types.Record{ + &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "custom-provider-123", + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{mustAddr(t, "https://provider.example.com")}, + }, + }) + + require.Len(t, results, 1, "record with placeholder PeerID should not be skipped") + assert.NotEmpty(t, results[0].ID) + require.Len(t, results[0].Addrs, 1) + assert.Equal(t, "/dns/provider.example.com/tcp/443/https", results[0].Addrs[0].String()) +} + +// TestFindProvidersAsyncGenericRecordEmptyProtocols verifies that a +// GenericRecord with a valid PeerID and HTTPS URL but empty Protocols is +// still converted to a peer.AddrInfo. This supports the legacy pattern where +// a PeerID + /https multiaddr was used as a hint to probe for a Trustless +// IPFS HTTP Gateway, even without an explicit protocol declaration. +func TestFindProvidersAsyncGenericRecordEmptyProtocols(t *testing.T) { + gatewayPID, err := peer.Decode("12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn") + require.NoError(t, err) + + results := findProvidersWithRecords(t, []types.Record{ + &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + Addrs: types.Addresses{mustAddr(t, "https://dag.w3s.link")}, + // No Protocols field - still works via PeerID + URL conversion + }, + }) + + require.Len(t, results, 1) + assert.Equal(t, gatewayPID, results[0].ID) + require.Len(t, results[0].Addrs, 1) + assert.Equal(t, "/dns/dag.w3s.link/tcp/443/https", results[0].Addrs[0].String()) +} + +// TestFindProvidersAsyncGenericRecordHTTPURL verifies that plain http:// URLs +// (not https://) are also converted to multiaddrs with the correct port. +func TestFindProvidersAsyncGenericRecordHTTPURL(t *testing.T) { + gatewayPID, err := peer.Decode("12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn") + require.NoError(t, err) + + results := findProvidersWithRecords(t, []types.Record{ + &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{mustAddr(t, "http://gateway.example.com:8080")}, + }, + }) + + require.Len(t, results, 1) + assert.Equal(t, gatewayPID, results[0].ID) + require.Len(t, results[0].Addrs, 1) + assert.Equal(t, "/dns/gateway.example.com/tcp/8080/http", results[0].Addrs[0].String()) +} + func TestFindPeer(t *testing.T) { ctx := context.Background() client := &mockClient{} diff --git a/routing/http/contentrouter/interop.go b/routing/http/contentrouter/interop.go new file mode 100644 index 000000000..e6f5b6f0d --- /dev/null +++ b/routing/http/contentrouter/interop.go @@ -0,0 +1,90 @@ +package contentrouter + +import ( + "crypto/rand" + "encoding/hex" + "fmt" + "strings" + + "github.com/ipfs/boxo/routing/http/types" + "github.com/libp2p/go-libp2p/core/crypto" + "github.com/libp2p/go-libp2p/core/peer" + "github.com/multiformats/go-multibase" + "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" + "github.com/multiformats/go-varint" +) + +// peerIDFromDIDKey attempts to derive a libp2p PeerID from a did:key: identifier. +// Currently supports ed25519 public keys (multicodec 0xed). +func peerIDFromDIDKey(id string) (peer.ID, error) { + if !strings.HasPrefix(id, "did:key:") { + return "", fmt.Errorf("not a did:key identifier") + } + encoded := strings.TrimPrefix(id, "did:key:") + + // Decode the multibase-encoded part (z prefix = base58btc) + _, data, err := multibase.Decode(encoded) + if err != nil { + return "", fmt.Errorf("multibase decode: %w", err) + } + + // Read the multicodec varint + codec, n, err := varint.FromUvarint(data) + if err != nil { + return "", fmt.Errorf("varint decode: %w", err) + } + + // Only handle ed25519-pub (0xed) for now + if multicodec.Code(codec) != multicodec.Ed25519Pub { + return "", fmt.Errorf("unsupported key type: 0x%x", codec) + } + + keyBytes := data[n:] + pubKey, err := crypto.UnmarshalEd25519PublicKey(keyBytes) + if err != nil { + return "", fmt.Errorf("unmarshal ed25519: %w", err) + } + + return peer.IDFromPublicKey(pubKey) +} + +// peerIDPlaceholderNonce is a random value generated once per process, +// mixed into placeholder PeerID hashes to ensure different processes +// produce different placeholders for the same provider ID. +// TODO: make this configurable or find a better way to ensure uniqueness +// across processes while keeping determinism within one process. +var peerIDPlaceholderNonce = func() string { + var buf [16]byte + _, _ = rand.Read(buf[:]) + return hex.EncodeToString(buf[:]) +}() + +// peerIDPlaceholderFromArbitraryID generates a placeholder PeerID by hashing +// the given identifier with a per-process nonce and domain-specific salt. +// +// The resulting PeerID is deterministic within a single process (same input +// always produces the same PeerID) but differs across processes. This is a +// compatibility stub for providers that use identifiers which are not +// parseable as PeerIDs or did:key: values. The resulting PeerID is only +// used to pass addresses through legacy libp2p routing APIs (like Kubo and +// Rainbow) that require a PeerID. It does not represent a real libp2p +// identity: no valid private key exists for this PeerID because it is a +// SHA-256 multihash of arbitrary data, not a key-derived identity. +func peerIDPlaceholderFromArbitraryID(id string) peer.ID { + mh, _ := multihash.Sum([]byte("contentrouter/peerIDPlaceholder:"+peerIDPlaceholderNonce+":"+id), multihash.SHA2_256, -1) + return peer.ID(mh) +} + +// hasHTTPURL returns true if any address is an http:// or https:// URL. +func hasHTTPURL(addrs types.Addresses) bool { + for i := range addrs { + if u := addrs[i].URL(); u != nil { + s := strings.ToLower(u.Scheme) + if s == "http" || s == "https" { + return true + } + } + } + return false +} diff --git a/routing/http/contentrouter/interop_test.go b/routing/http/contentrouter/interop_test.go new file mode 100644 index 000000000..f9e50cfdb --- /dev/null +++ b/routing/http/contentrouter/interop_test.go @@ -0,0 +1,136 @@ +package contentrouter + +import ( + "crypto/rand" + "testing" + + "github.com/ipfs/boxo/routing/http/types" + "github.com/libp2p/go-libp2p/core/crypto" + "github.com/libp2p/go-libp2p/core/peer" + "github.com/multiformats/go-multibase" + "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" + "github.com/multiformats/go-varint" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHasHTTPURL(t *testing.T) { + tests := []struct { + name string + addrs []string + expect bool + }{ + { + name: "http URL", + addrs: []string{"http://example.com"}, + expect: true, + }, + { + name: "https URL", + addrs: []string{"https://example.com"}, + expect: true, + }, + { + name: "non-HTTP URL", + addrs: []string{"foo://example.com"}, + expect: false, + }, + { + name: "multiaddr only", + addrs: []string{"/ip4/127.0.0.1/tcp/4001"}, + expect: false, + }, + { + name: "mixed with one https", + addrs: []string{"foo://example.com", "https://example.com"}, + expect: true, + }, + { + name: "empty slice", + addrs: nil, + expect: false, + }, + { + name: "single invalid address", + addrs: []string{"not-a-valid-address"}, + expect: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var addrs types.Addresses + for _, s := range tt.addrs { + a, err := types.NewAddress(s) + if err != nil { + // invalid addresses are stored with nil url/multiaddr + addrs = append(addrs, types.Address{}) + continue + } + addrs = append(addrs, a) + } + assert.Equal(t, tt.expect, hasHTTPURL(addrs)) + }) + } +} + +// TestPeerIDPlaceholderFromArbitraryID tests the determinism and uniqueness +// properties of peerIDPlaceholderFromArbitraryID directly. Within a single +// process, the same input must always produce the same PeerID, and different +// inputs must produce different PeerIDs. +func TestPeerIDPlaceholderFromArbitraryID(t *testing.T) { + t.Run("deterministic within process", func(t *testing.T) { + a := peerIDPlaceholderFromArbitraryID("provider-A") + b := peerIDPlaceholderFromArbitraryID("provider-A") + assert.Equal(t, a, b) + }) + + t.Run("different inputs produce different PeerIDs", func(t *testing.T) { + a := peerIDPlaceholderFromArbitraryID("provider-A") + b := peerIDPlaceholderFromArbitraryID("provider-B") + assert.NotEqual(t, a, b) + }) + + t.Run("result is a valid multihash", func(t *testing.T) { + pid := peerIDPlaceholderFromArbitraryID("provider-X") + _, err := multihash.Decode([]byte(pid)) + require.NoError(t, err, "placeholder PeerID should be a valid multihash") + }) +} + +// TestPeerIDFromDIDKey tests did:key parsing and PeerID derivation in +// isolation from the FindProviders flow. This covers edge cases (wrong prefix, +// unsupported key types) that are hard to exercise through the integration test. +func TestPeerIDFromDIDKey(t *testing.T) { + t.Run("valid ed25519 did:key", func(t *testing.T) { + _, pubKey, err := crypto.GenerateEd25519Key(rand.Reader) + require.NoError(t, err) + expectedPID, err := peer.IDFromPublicKey(pubKey) + require.NoError(t, err) + + didKey := encodeDIDKey(t, pubKey) + pid, err := peerIDFromDIDKey(didKey) + require.NoError(t, err) + assert.Equal(t, expectedPID, pid) + }) + + t.Run("not a did:key", func(t *testing.T) { + _, err := peerIDFromDIDKey("12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn") + require.Error(t, err) + assert.Contains(t, err.Error(), "not a did:key") + }) + + t.Run("unsupported key type", func(t *testing.T) { + // Encode a fake key with a different multicodec (e.g. secp256k1-pub 0xe7) + prefix := varint.ToUvarint(uint64(multicodec.Secp256k1Pub)) + fakeKey := make([]byte, 33) // secp256k1 pubkey is 33 bytes + data := append(prefix, fakeKey...) + encoded, err := multibase.Encode(multibase.Base58BTC, data) + require.NoError(t, err) + + _, err = peerIDFromDIDKey("did:key:" + encoded) + require.Error(t, err) + assert.Contains(t, err.Error(), "unsupported key type") + }) +} diff --git a/routing/http/filters/filters.go b/routing/http/filters/filters.go index 0aa93d4e2..ea5261b7c 100644 --- a/routing/http/filters/filters.go +++ b/routing/http/filters/filters.go @@ -9,7 +9,6 @@ import ( "github.com/ipfs/boxo/routing/http/types" "github.com/ipfs/boxo/routing/http/types/iter" logging "github.com/ipfs/go-log/v2" - "github.com/multiformats/go-multiaddr" ) var logger = logging.Logger("routing/http/filters") @@ -69,7 +68,6 @@ func ApplyFiltersToIter(recordsIter iter.ResultIter[types.Record], filterAddrs, record, ok := v.Val.(*types.PeerRecord) if !ok { logger.Errorw("problem casting find providers record", "Schema", v.Val.GetSchema(), "Type", reflect.TypeFor[iter.Result[types.Record]]().String()) - // drop failed type assertion return iter.Result[types.Record]{} } @@ -79,6 +77,19 @@ func ApplyFiltersToIter(recordsIter iter.ResultIter[types.Record], filterAddrs, } v.Val = record + case types.SchemaGeneric: + record, ok := v.Val.(*types.GenericRecord) + if !ok { + logger.Errorw("problem casting find providers record", "Schema", v.Val.GetSchema(), "Type", reflect.TypeFor[iter.Result[types.Record]]().String()) + return iter.Result[types.Record]{} + } + + record = applyGenericFilters(record, filterAddrs, filterProtocols) + if record == nil { + return iter.Result[types.Record]{} + } + v.Val = record + //nolint:staticcheck //lint:ignore SA1019 // ignore staticcheck case types.SchemaBitswap: @@ -86,7 +97,6 @@ func ApplyFiltersToIter(recordsIter iter.ResultIter[types.Record], filterAddrs, record, ok := v.Val.(*types.BitswapRecord) if !ok { logger.Errorw("problem casting find providers record", "Schema", v.Val.GetSchema(), "Type", reflect.TypeFor[iter.Result[types.Record]]().String()) - // drop failed type assertion return iter.Result[types.Record]{} } peerRecord := types.FromBitswapRecord(record) @@ -120,7 +130,7 @@ func ApplyFiltersToPeerRecordIter(peerRecordIter iter.ResultIter[*types.PeerReco filteredIter := ApplyFiltersToIter(mappedIter, filterAddrs, filterProtocols) - // Convert Record back to PeerRecord 🙃 + // Convert Record back to PeerRecord return iter.Map(filteredIter, func(v iter.Result[types.Record]) iter.Result[*types.PeerRecord] { if v.Err != nil || v.Val == nil { return iter.Result[*types.PeerRecord]{Err: v.Err} @@ -131,15 +141,15 @@ func ApplyFiltersToPeerRecordIter(peerRecordIter iter.ResultIter[*types.PeerReco }) } -// Applies the filters. Returns nil if the provider does not pass the protocols filter -// The address filter is more complicated because it potentially modifies the Addrs slice. +// applyFilters applies the filters to a PeerRecord. Returns nil if the provider +// does not pass the protocols filter. The address filter potentially modifies +// the Addrs slice. func applyFilters(provider *types.PeerRecord, filterAddrs, filterProtocols []string) *types.PeerRecord { if len(filterAddrs) == 0 && len(filterProtocols) == 0 { return provider } if !protocolsAllowed(provider.Protocols, filterProtocols) { - // If the provider doesn't match any of the passed protocols, the provider is omitted from the response. return nil } @@ -148,87 +158,124 @@ func applyFilters(provider *types.PeerRecord, filterAddrs, filterProtocols []str return provider } - filteredAddrs := applyAddrFilter(provider.Addrs, filterAddrs) + // Convert []Multiaddr to Addresses to reuse applyAddrFilter which + // handles both multiaddr and URL protocol matching per IPIP-518. + addrs := make(types.Addresses, len(provider.Addrs)) + for i, ma := range provider.Addrs { + addrs[i] = types.NewAddressFromMultiaddr(ma.Multiaddr) + } - // If filtering resulted in no addrs, omit the provider - if len(filteredAddrs) == 0 { + filtered := applyAddrFilter(addrs, filterAddrs) + if len(filtered) == 0 { return nil } - provider.Addrs = filteredAddrs + // convert back to []Multiaddr + provider.Addrs = make([]types.Multiaddr, len(filtered)) + for i, a := range filtered { + provider.Addrs[i] = types.Multiaddr{Multiaddr: a.Multiaddr()} + } + return provider } -// applyAddrFilter filters a list of multiaddresses based on the provided filter query. +// applyGenericFilters applies the filters to a GenericRecord. Returns nil if +// the provider does not pass the protocols filter. The address filter +// potentially modifies the Addrs slice. +func applyGenericFilters(provider *types.GenericRecord, filterAddrs, filterProtocols []string) *types.GenericRecord { + if len(filterAddrs) == 0 && len(filterProtocols) == 0 { + return provider + } + + if !protocolsAllowed(provider.Protocols, filterProtocols) { + return nil + } + + if len(filterAddrs) == 0 || (len(provider.Addrs) == 0 && slices.Contains(filterAddrs, "unknown")) { + return provider + } + + filtered := applyAddrFilter(provider.Addrs, filterAddrs) + if len(filtered) == 0 { + return nil + } + + provider.Addrs = filtered + return provider +} + +// applyAddrFilter filters a list of addresses based on the provided filter query. // // Parameters: -// - addrs: A slice of types.Multiaddr to be filtered. +// - addrs: A slice of types.Address to be filtered. // - filterAddrsQuery: A slice of strings representing the filter criteria. // // The function supports both positive and negative filters: -// - Positive filters (e.g., "tcp", "udp") include addresses that match the specified protocols. +// - Positive filters (e.g., "tcp", "udp", "http") include addresses that match the specified protocols. // - Negative filters (e.g., "!tcp", "!udp") exclude addresses that match the specified protocols. +// - "unknown" can be passed to include providers whose addresses are unknown or cannot be parsed. // // If no filters are provided, the original list of addresses is returned unchanged. // If only negative filters are provided, addresses not matching any negative filter are included. // If positive filters are provided, only addresses matching at least one positive filter (and no negative filters) are included. // If both positive and negative filters are provided, the address must match at least one positive filter and no negative filters to be included. -// -// Returns: -// A new slice of types.Multiaddr containing only the addresses that pass the filter criteria. -func applyAddrFilter(addrs []types.Multiaddr, filterAddrsQuery []string) []types.Multiaddr { +func applyAddrFilter(addrs types.Addresses, filterAddrsQuery []string) types.Addresses { if len(filterAddrsQuery) == 0 { return addrs } - var filteredAddrs []types.Multiaddr - var positiveFilters, negativeFilters []multiaddr.Protocol + var filteredAddrs types.Addresses + var positiveFilters, negativeFilters []string + var includeUnknown bool // Separate positive and negative filters for _, filter := range filterAddrsQuery { - if strings.HasPrefix(filter, "!") { - negativeFilters = append(negativeFilters, multiaddr.ProtocolWithName(filter[1:])) + if filter == "unknown" { + includeUnknown = true + } else if strings.HasPrefix(filter, "!") { + negativeFilters = append(negativeFilters, filter[1:]) } else { - positiveFilters = append(positiveFilters, multiaddr.ProtocolWithName(filter)) + positiveFilters = append(positiveFilters, filter) } } for _, addr := range addrs { - protocols := addr.Protocols() + // Handle unknown (unparseable) addresses + if !addr.IsValid() { + if includeUnknown { + filteredAddrs = append(filteredAddrs, addr) + } + continue + } // Check negative filters - if containsAny(protocols, negativeFilters) { + shouldExclude := false + for _, filter := range negativeFilters { + if addr.HasProtocol(filter) { + shouldExclude = true + break + } + } + if shouldExclude { continue } // If no positive filters or matches a positive filter, include the address - if len(positiveFilters) == 0 || containsAny(protocols, positiveFilters) { + if len(positiveFilters) == 0 { filteredAddrs = append(filteredAddrs, addr) + } else { + for _, filter := range positiveFilters { + if addr.HasProtocol(filter) { + filteredAddrs = append(filteredAddrs, addr) + break + } + } } } return filteredAddrs } -// Helper function to check if protocols contain any of the filters -func containsAny(protocols []multiaddr.Protocol, filters []multiaddr.Protocol) bool { - for _, filter := range filters { - if containsProtocol(protocols, filter) { - return true - } - } - return false -} - -func containsProtocol(protos []multiaddr.Protocol, proto multiaddr.Protocol) bool { - for _, p := range protos { - if p.Code == proto.Code { - return true - } - } - return false -} - // protocolsAllowed returns true if the peerProtocols are allowed by the filter protocols. func protocolsAllowed(peerProtocols []string, filterProtocols []string) bool { if len(filterProtocols) == 0 { diff --git a/routing/http/filters/filters_test.go b/routing/http/filters/filters_test.go index d86316045..0fc4420cd 100644 --- a/routing/http/filters/filters_test.go +++ b/routing/http/filters/filters_test.go @@ -4,6 +4,7 @@ import ( "testing" "github.com/ipfs/boxo/routing/http/types" + "github.com/ipfs/boxo/routing/http/types/iter" "github.com/libp2p/go-libp2p/core/peer" "github.com/multiformats/go-multiaddr" "github.com/stretchr/testify/assert" @@ -74,21 +75,21 @@ func TestApplyAddrFilter(t *testing.T) { addr7, _ := multiaddr.NewMultiaddr("/dns4/ny5.bootstrap.libp2p.io/tcp/443/wss/p2p/QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt") addr8, _ := multiaddr.NewMultiaddr("/ip4/127.0.0.1/udp/4001/quic-v1/webtransport/certhash/uEiAMrMcVWFNiqtSeRXZTwHTac4p9WcGh5hg8kVBzTC1JTA/certhash/uEiA4dfvbbbnBIYalhp1OpW1Bk-nuWIKSy21ol6vPea67Cw/p2p/QmcZf59bWwK5XFi76CZX8cbJ4BhTzzA3gU1ZjYZcYW3dwt") - addrs := []types.Multiaddr{ - {Multiaddr: addr1}, - {Multiaddr: addr2}, - {Multiaddr: addr3}, - {Multiaddr: addr4}, - {Multiaddr: addr5}, - {Multiaddr: addr6}, - {Multiaddr: addr7}, - {Multiaddr: addr8}, + addrs := types.Addresses{ + types.NewAddressFromMultiaddr(addr1), + types.NewAddressFromMultiaddr(addr2), + types.NewAddressFromMultiaddr(addr3), + types.NewAddressFromMultiaddr(addr4), + types.NewAddressFromMultiaddr(addr5), + types.NewAddressFromMultiaddr(addr6), + types.NewAddressFromMultiaddr(addr7), + types.NewAddressFromMultiaddr(addr8), } testCases := []struct { name string filterAddrs []string - expectedAddrs []types.Multiaddr + expectedAddrs types.Addresses }{ { name: "No filter", @@ -98,52 +99,52 @@ func TestApplyAddrFilter(t *testing.T) { { name: "Filter TCP", filterAddrs: []string{"tcp"}, - expectedAddrs: []types.Multiaddr{{Multiaddr: addr1}, {Multiaddr: addr3}, {Multiaddr: addr4}, {Multiaddr: addr7}}, + expectedAddrs: types.Addresses{types.NewAddressFromMultiaddr(addr1), types.NewAddressFromMultiaddr(addr3), types.NewAddressFromMultiaddr(addr4), types.NewAddressFromMultiaddr(addr7)}, }, { name: "Filter UDP", filterAddrs: []string{"udp"}, - expectedAddrs: []types.Multiaddr{{Multiaddr: addr2}, {Multiaddr: addr5}, {Multiaddr: addr6}, {Multiaddr: addr8}}, + expectedAddrs: types.Addresses{types.NewAddressFromMultiaddr(addr2), types.NewAddressFromMultiaddr(addr5), types.NewAddressFromMultiaddr(addr6), types.NewAddressFromMultiaddr(addr8)}, }, { name: "Filter WebSocket", filterAddrs: []string{"ws"}, - expectedAddrs: []types.Multiaddr{{Multiaddr: addr3}}, + expectedAddrs: types.Addresses{types.NewAddressFromMultiaddr(addr3)}, }, { name: "Exclude TCP", filterAddrs: []string{"!tcp"}, - expectedAddrs: []types.Multiaddr{{Multiaddr: addr2}, {Multiaddr: addr5}, {Multiaddr: addr6}, {Multiaddr: addr8}}, + expectedAddrs: types.Addresses{types.NewAddressFromMultiaddr(addr2), types.NewAddressFromMultiaddr(addr5), types.NewAddressFromMultiaddr(addr6), types.NewAddressFromMultiaddr(addr8)}, }, { name: "Filter TCP addresses that don't have WebSocket and p2p-circuit", filterAddrs: []string{"tcp", "!ws", "!wss", "!p2p-circuit"}, - expectedAddrs: []types.Multiaddr{{Multiaddr: addr1}}, + expectedAddrs: types.Addresses{types.NewAddressFromMultiaddr(addr1)}, }, { name: "Include WebTransport and exclude p2p-circuit", filterAddrs: []string{"webtransport", "!p2p-circuit"}, - expectedAddrs: []types.Multiaddr{{Multiaddr: addr8}}, + expectedAddrs: types.Addresses{types.NewAddressFromMultiaddr(addr8)}, }, { - name: "empty for unknown protocol nae", + name: "empty for unknown protocol name", filterAddrs: []string{"fakeproto"}, - expectedAddrs: []types.Multiaddr{}, + expectedAddrs: types.Addresses{}, }, { name: "Include WebTransport but ignore unknown protocol name", filterAddrs: []string{"webtransport", "fakeproto"}, - expectedAddrs: []types.Multiaddr{{Multiaddr: addr6}, {Multiaddr: addr8}}, + expectedAddrs: types.Addresses{types.NewAddressFromMultiaddr(addr6), types.NewAddressFromMultiaddr(addr8)}, }, { name: "Multiple filters", filterAddrs: []string{"tcp", "ws"}, - expectedAddrs: []types.Multiaddr{{Multiaddr: addr1}, {Multiaddr: addr3}, {Multiaddr: addr4}, {Multiaddr: addr7}}, + expectedAddrs: types.Addresses{types.NewAddressFromMultiaddr(addr1), types.NewAddressFromMultiaddr(addr3), types.NewAddressFromMultiaddr(addr4), types.NewAddressFromMultiaddr(addr7)}, }, { name: "Multiple negative filters", filterAddrs: []string{"!tcp", "!ws"}, - expectedAddrs: []types.Multiaddr{{Multiaddr: addr2}, {Multiaddr: addr5}, {Multiaddr: addr6}, {Multiaddr: addr8}}, + expectedAddrs: types.Addresses{types.NewAddressFromMultiaddr(addr2), types.NewAddressFromMultiaddr(addr5), types.NewAddressFromMultiaddr(addr6), types.NewAddressFromMultiaddr(addr8)}, }, } @@ -156,24 +157,24 @@ func TestApplyAddrFilter(t *testing.T) { for _, expectedAddr := range tc.expectedAddrs { found := false for _, resultAddr := range result { - if expectedAddr.Multiaddr.Equal(resultAddr.Multiaddr) { + if expectedAddr.IsMultiaddr() && resultAddr.IsMultiaddr() && expectedAddr.Multiaddr().Equal(resultAddr.Multiaddr()) { found = true break } } - assert.True(t, found, "Expected address not found in test %s result: %s", tc.name, expectedAddr.Multiaddr) + assert.True(t, found, "Expected address not found in test %s result: %s", tc.name, expectedAddr.String()) } // Check that each result address is in the expected list for _, resultAddr := range result { found := false for _, expectedAddr := range tc.expectedAddrs { - if resultAddr.Multiaddr.Equal(expectedAddr.Multiaddr) { + if resultAddr.IsMultiaddr() && expectedAddr.IsMultiaddr() && resultAddr.Multiaddr().Equal(expectedAddr.Multiaddr()) { found = true break } } - assert.True(t, found, "Unexpected address found in test %s result: %s", tc.name, resultAddr.Multiaddr) + assert.True(t, found, "Unexpected address found in test %s result: %s", tc.name, resultAddr.String()) } }) } @@ -370,10 +371,334 @@ func TestApplyFilters(t *testing.T) { } } -func mustMultiaddr(t *testing.T, s string) types.Multiaddr { - addr, err := multiaddr.NewMultiaddr(s) - if err != nil { - t.Fatalf("Failed to create multiaddr: %v", err) +// TestApplyAddrFilterWithURLs verifies that applyAddrFilter handles the +// duck-typed Addresses used by GenericRecord, where the same list can contain +// multiaddrs, HTTP(S) URLs, and non-standard URI schemes. +// This is the core IPIP-518 filtering scenario that differs from PeerRecord +// (which only has multiaddrs). +func TestApplyAddrFilterWithURLs(t *testing.T) { + maAddr := types.NewAddressFromMultiaddr(mustRawMultiaddr(t, "/ip4/127.0.0.1/tcp/4001")) + httpsURL := mustAddr(t, "https://gateway.example.com") + httpURL := mustAddr(t, "http://gateway.example.com:8080") + fooURL := mustAddr(t, "foo://custom.example.com") + tcpURL := mustAddr(t, "tcp://192.168.1.1:4001") + tlsHTTPma := types.NewAddressFromMultiaddr(mustRawMultiaddr(t, "/dns4/example.com/tcp/443/tls/http")) + httpsMA := types.NewAddressFromMultiaddr(mustRawMultiaddr(t, "/dns/example.com/tcp/443/https")) + + allAddrs := types.Addresses{maAddr, httpsURL, httpURL, fooURL, tcpURL, tlsHTTPma, httpsMA} + + tests := []struct { + name string + filter []string + expected types.Addresses + }{ + { + name: "no filter returns all", + filter: []string{}, + expected: allAddrs, + }, + { + // "http" filter matches: + // - http:// and https:// URLs (HTTPS is HTTP over TLS) + // - multiaddrs with /http (including /tls/http) + // This is the expected behavior for clients looking for + // HTTP-capable providers regardless of TLS. + name: "http matches http and https URLs and /http multiaddrs", + filter: []string{"http"}, + expected: types.Addresses{ + httpsURL, // https:// URL matches "http" + httpURL, // http:// URL matches "http" + tlsHTTPma, // /tls/http multiaddr matches "http" + }, + }, + { + // "https" filter is stricter than "http": + // - matches https:// URLs + // - matches /https and /tls/http multiaddrs + // - does NOT match plain http:// URLs + name: "https matches https URLs and /tls/http and /https multiaddrs", + filter: []string{"https"}, + expected: types.Addresses{ + httpsURL, // https:// URL + tlsHTTPma, // /tls/http multiaddr + httpsMA, // /https multiaddr + }, + }, + { + // Non-standard URI schemes are matched by exact scheme name. + name: "foo matches foo:// URLs only", + filter: []string{"foo"}, + expected: types.Addresses{fooURL}, + }, + { + // tcp:// as a URI scheme matches the "tcp" filter via scheme matching. + // Multiaddrs with tcp in the protocol stack also match. + name: "tcp matches tcp:// URLs and /tcp multiaddrs", + filter: []string{"tcp"}, + expected: types.Addresses{ + maAddr, // /ip4/.../tcp/4001 + tcpURL, // tcp://... + tlsHTTPma, // /dns4/.../tcp/443/tls/http + httpsMA, // /dns/.../tcp/443/https + }, + }, + { + // Negative filters exclude addresses that match the protocol. + name: "exclude http removes http and https URLs and /http multiaddrs", + filter: []string{"!http"}, + expected: types.Addresses{ + maAddr, // /ip4/.../tcp/4001 (no /http protocol) + fooURL, + tcpURL, + httpsMA, // /https is not /http + }, + }, + { + // Combining positive and negative filters. + name: "tcp without http", + filter: []string{"tcp", "!http"}, + expected: types.Addresses{ + maAddr, // /ip4/.../tcp/4001 + tcpURL, // tcp://... (has tcp scheme, no http) + httpsMA, // /dns/.../tcp/443/https (has tcp, /https is not /http) + }, + }, } - return types.Multiaddr{Multiaddr: addr} + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := applyAddrFilter(allAddrs, tt.filter) + + require.Len(t, result, len(tt.expected), "wrong number of addresses") + for i, want := range tt.expected { + assert.Equal(t, want.String(), result[i].String(), "address %d mismatch", i) + } + }) + } +} + +// TestApplyGenericFilters verifies filtering of GenericRecord, which is the +// primary use case introduced by IPIP-518. GenericRecord differs from +// PeerRecord in that its Addrs field accepts both multiaddrs and URIs. +func TestApplyGenericFilters(t *testing.T) { + t.Run("no filters returns record unchanged", func(t *testing.T) { + rec := &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "peer1", + Addrs: types.Addresses{mustAddr(t, "https://example.com")}, + Protocols: []string{"transport-ipfs-gateway-http"}, + } + + result := applyGenericFilters(rec, nil, nil) + require.NotNil(t, result) + assert.Len(t, result.Addrs, 1) + }) + + t.Run("protocol filter removes non-matching record", func(t *testing.T) { + // Record advertises gateway-http but filter asks for bitswap. + // The entire record should be removed (nil return). + rec := &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "peer1", + Addrs: types.Addresses{mustAddr(t, "https://example.com")}, + Protocols: []string{"transport-ipfs-gateway-http"}, + } + + result := applyGenericFilters(rec, nil, []string{"transport-bitswap"}) + assert.Nil(t, result, "record should be filtered out when protocol does not match") + }) + + t.Run("protocol filter keeps matching record", func(t *testing.T) { + rec := &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "peer1", + Addrs: types.Addresses{mustAddr(t, "https://example.com")}, + Protocols: []string{"transport-ipfs-gateway-http"}, + } + + result := applyGenericFilters(rec, nil, []string{"transport-ipfs-gateway-http"}) + require.NotNil(t, result) + assert.Equal(t, "peer1", result.ID) + }) + + t.Run("addr filter on mixed multiaddr and URL addresses", func(t *testing.T) { + // A typical GenericRecord from a provider that supports both + // bitswap (multiaddr) and HTTP gateway (URL). + // Filtering for "http" should keep the URL and remove the multiaddr. + rec := &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "peer1", + Addrs: types.Addresses{ + types.NewAddressFromMultiaddr(mustRawMultiaddr(t, "/ip4/127.0.0.1/udp/4001/quic-v1")), + mustAddr(t, "https://gateway.example.com"), + mustAddr(t, "foo://custom.example.com"), + }, + Protocols: []string{"transport-bitswap", "transport-ipfs-gateway-http"}, + } + + result := applyGenericFilters(rec, []string{"http"}, nil) + require.NotNil(t, result) + require.Len(t, result.Addrs, 1) + assert.Equal(t, "https://gateway.example.com", result.Addrs[0].String()) + }) + + t.Run("addr filter removes record when no addresses match", func(t *testing.T) { + // When filtering removes all addresses, the record itself should be + // removed (nil return) since it has no usable connectivity info. + rec := &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "peer1", + Addrs: types.Addresses{ + mustAddr(t, "foo://custom.example.com"), + }, + Protocols: []string{"transport-foo"}, + } + + result := applyGenericFilters(rec, []string{"http"}, nil) + assert.Nil(t, result, "record should be removed when no addresses match the filter") + }) + + t.Run("unknown addr filter keeps record with no addresses", func(t *testing.T) { + // "unknown" in the addr filter means "include providers whose + // addresses are unknown or cannot be parsed". + // A record with no addresses should be kept. + rec := &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "peer1", + Protocols: []string{"transport-ipfs-gateway-http"}, + } + + result := applyGenericFilters(rec, []string{"unknown"}, nil) + require.NotNil(t, result, "record with no addrs should be kept when filter contains 'unknown'") + }) +} + +// TestApplyFiltersToIterMixedSchemas verifies that the iterator-level filter +// correctly processes a stream containing both PeerRecord and GenericRecord. +// This is the real-world scenario: a routing server returns a mix of schema +// types and the client needs to filter all of them consistently. +func TestApplyFiltersToIterMixedSchemas(t *testing.T) { + pid, err := peer.Decode("12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn") + require.NoError(t, err) + + records := []iter.Result[types.Record]{ + // PeerRecord with bitswap transport (multiaddr only) + {Val: &types.PeerRecord{ + Schema: types.SchemaPeer, + ID: &pid, + Protocols: []string{"transport-bitswap"}, + Addrs: []types.Multiaddr{ + mustMultiaddr(t, "/ip4/127.0.0.1/tcp/4001"), + mustMultiaddr(t, "/ip4/127.0.0.1/udp/4001/quic-v1"), + }, + }}, + // GenericRecord with gateway transport (URL + multiaddr) + {Val: &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "gateway-provider", + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{ + mustAddr(t, "https://gateway.example.com"), + types.NewAddressFromMultiaddr(mustRawMultiaddr(t, "/dns4/gateway.example.com/tcp/443/tls/http")), + mustAddr(t, "foo://unrelated.example.com"), + }, + }}, + // GenericRecord that should be filtered out by protocol + {Val: &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "other-provider", + Protocols: []string{"transport-graphsync-filecoinv1"}, + Addrs: types.Addresses{mustAddr(t, "https://filecoin.example.com")}, + }}, + } + + t.Run("filter by protocol keeps matching schemas", func(t *testing.T) { + input := iter.FromSlice(records) + filtered := ApplyFiltersToIter(input, nil, []string{"transport-ipfs-gateway-http"}) + defer filtered.Close() + + var results []types.Record + for filtered.Next() { + v := filtered.Val() + require.NoError(t, v.Err) + results = append(results, v.Val) + } + + // Only the GenericRecord with gateway-http protocol should survive + require.Len(t, results, 1) + gr, ok := results[0].(*types.GenericRecord) + require.True(t, ok, "expected GenericRecord") + assert.Equal(t, "gateway-provider", gr.ID) + }) + + t.Run("filter by addr with http keeps URLs and /http multiaddrs", func(t *testing.T) { + input := iter.FromSlice(records) + filtered := ApplyFiltersToIter(input, []string{"http"}, nil) + defer filtered.Close() + + var results []types.Record + for filtered.Next() { + v := filtered.Val() + require.NoError(t, v.Err) + results = append(results, v.Val) + } + + // PeerRecord has no /http addrs, so it's filtered out. + // GenericRecord "gateway-provider" has https URL and /tls/http multiaddr. + // GenericRecord "other-provider" has https URL. + require.Len(t, results, 2) + + gr1, ok := results[0].(*types.GenericRecord) + require.True(t, ok) + assert.Equal(t, "gateway-provider", gr1.ID) + // foo:// address should be filtered out, only http-capable ones remain + assert.Len(t, gr1.Addrs, 2) + + gr2, ok := results[1].(*types.GenericRecord) + require.True(t, ok) + assert.Equal(t, "other-provider", gr2.ID) + }) + + t.Run("combined protocol and addr filter", func(t *testing.T) { + input := iter.FromSlice(records) + filtered := ApplyFiltersToIter(input, []string{"http"}, []string{"transport-ipfs-gateway-http"}) + defer filtered.Close() + + var results []types.Record + for filtered.Next() { + v := filtered.Val() + require.NoError(t, v.Err) + results = append(results, v.Val) + } + + // Only gateway-provider matches both protocol and addr filters + require.Len(t, results, 1) + gr, ok := results[0].(*types.GenericRecord) + require.True(t, ok) + assert.Equal(t, "gateway-provider", gr.ID) + assert.Len(t, gr.Addrs, 2, "https URL and /tls/http multiaddr should survive") + }) +} + +func mustMultiaddr(t *testing.T, s string) types.Multiaddr { + t.Helper() + ma, err := multiaddr.NewMultiaddr(s) + require.NoError(t, err) + return types.Multiaddr{Multiaddr: ma} +} + +// mustRawMultiaddr returns a raw multiaddr.Multiaddr (not types.Multiaddr). +func mustRawMultiaddr(t *testing.T, s string) multiaddr.Multiaddr { + t.Helper() + ma, err := multiaddr.NewMultiaddr(s) + require.NoError(t, err) + return ma +} + +// mustAddr creates a types.Address from a string or fails the test. +func mustAddr(t *testing.T, s string) types.Address { + t.Helper() + a, err := types.NewAddress(s) + require.NoError(t, err) + return a } diff --git a/routing/http/server/server_test.go b/routing/http/server/server_test.go index cc17cb5bf..b92b43c25 100644 --- a/routing/http/server/server_test.go +++ b/routing/http/server/server_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "crypto/rand" + "encoding/json" "fmt" "io" "net/http" @@ -232,10 +233,6 @@ func TestProviders(t *testing.T) { runTest(t, mediaTypeNDJSON, "webtransport,!p2p-circuit,unknown", "", false, true, `{"Addrs":["/ip4/8.8.8.8/udp/4001/quic-v1/webtransport"],"ID":"12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn","Protocols":["transport-bitswap"],"Schema":"peer"}`+"\n"+`{"Addrs":[],"ID":"12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vz","Protocols":["transport-ipfs-gateway-http"],"Schema":"peer"}`+"\n") }) - t.Run("NDJSON Response with addr filtering", func(t *testing.T) { - runTest(t, mediaTypeNDJSON, "webtransport,!p2p-circuit,unknown", "", false, true, `{"Addrs":["/ip4/8.8.8.8/udp/4001/quic-v1/webtransport"],"ID":"12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn","Protocols":["transport-bitswap"],"Schema":"peer"}`+"\n"+`{"Addrs":[],"ID":"12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vz","Protocols":["transport-ipfs-gateway-http"],"Schema":"peer"}`+"\n") - }) - t.Run("Empty NDJSON Response", func(t *testing.T) { runTest(t, mediaTypeNDJSON, "", "", true, true, "") }) @@ -263,6 +260,268 @@ func TestProviders(t *testing.T) { }) } +// TestProvidersMixedSchemas verifies that the server correctly serializes a mix +// of PeerRecord and GenericRecord in FindProviders responses. This is the +// primary IPIP-518 server-side scenario: a routing backend returns both +// traditional peer records (multiaddrs only) and generic records (with URLs +// and non-standard URI schemes) in the same response stream. +func TestProvidersMixedSchemas(t *testing.T) { + cidStr := "bafkreifjjcie6lypi6ny7amxnfftagclbuxndqonfipmb64f2km2devei4" + cb, err := cid.Decode(cidStr) + require.NoError(t, err) + + pid, err := peer.Decode("12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn") + require.NoError(t, err) + + addr1, _ := multiaddr.NewMultiaddr("/ip4/127.0.0.1/tcp/4001") + + results := iter.FromSlice([]iter.Result[types.Record]{ + // Traditional PeerRecord with multiaddrs + {Val: &types.PeerRecord{ + Schema: types.SchemaPeer, + ID: &pid, + Protocols: []string{"transport-bitswap"}, + Addrs: []types.Multiaddr{{Multiaddr: addr1}}, + }}, + // GenericRecord with URLs (IPIP-518) + {Val: &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "gateway-provider-1", + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{ + mustAddr(t, "https://gateway.example.com"), + mustAddr(t, "foo://custom.example.com"), + }, + }}, + }) + + router := &mockContentRouter{} + server := httptest.NewServer(Handler(router)) + t.Cleanup(server.Close) + serverAddr := "http://" + server.Listener.Addr().String() + + router.On("FindProviders", mock.Anything, cb, DefaultRecordsLimit).Return(results, nil) + + resp, err := http.Get(serverAddr + "/routing/v1/providers/" + cidStr) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + // Verify both records are present in the JSON response. + // PeerRecord uses typed Addrs (multiaddrs), GenericRecord uses string Addrs (URLs). + bodyStr := string(body) + require.Contains(t, bodyStr, `"Schema":"peer"`) + require.Contains(t, bodyStr, `"Schema":"generic"`) + require.Contains(t, bodyStr, `"/ip4/127.0.0.1/tcp/4001"`) + require.Contains(t, bodyStr, `"https://gateway.example.com"`) + require.Contains(t, bodyStr, `"foo://custom.example.com"`) + require.Contains(t, bodyStr, `"transport-ipfs-gateway-http"`) +} + +// TestProvidersMixedSchemasWithFiltering verifies that filter-addrs and +// filter-protocols work correctly when the response contains both PeerRecord +// and GenericRecord. The server must apply filters to all schema types. +func TestProvidersMixedSchemasWithFiltering(t *testing.T) { + cidStr := "bafkreifjjcie6lypi6ny7amxnfftagclbuxndqonfipmb64f2km2devei4" + cb, err := cid.Decode(cidStr) + require.NoError(t, err) + + pid, err := peer.Decode("12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn") + require.NoError(t, err) + + addr1, _ := multiaddr.NewMultiaddr("/ip4/127.0.0.1/tcp/4001") + + results := iter.FromSlice([]iter.Result[types.Record]{ + // PeerRecord with bitswap (no /http in addrs) + {Val: &types.PeerRecord{ + Schema: types.SchemaPeer, + ID: &pid, + Protocols: []string{"transport-bitswap"}, + Addrs: []types.Multiaddr{{Multiaddr: addr1}}, + }}, + // GenericRecord with gateway URL + {Val: &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "gateway-provider-1", + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{ + mustAddr(t, "https://gateway.example.com"), + }, + }}, + }) + + router := &mockContentRouter{} + server := httptest.NewServer(Handler(router)) + t.Cleanup(server.Close) + serverAddr := "http://" + server.Listener.Addr().String() + + router.On("FindProviders", mock.Anything, cb, DefaultRecordsLimit).Return(results, nil) + + // Filter for transport-ipfs-gateway-http protocol only + urlStr := fmt.Sprintf("%s/routing/v1/providers/%s", serverAddr, cidStr) + urlStr = filters.AddFiltersToURL(urlStr, []string{"transport-ipfs-gateway-http"}, nil) + + resp, err := http.Get(urlStr) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + bodyStr := string(body) + // PeerRecord with bitswap should be filtered out + require.NotContains(t, bodyStr, `"transport-bitswap"`) + // GenericRecord with gateway should remain + require.Contains(t, bodyStr, `"Schema":"generic"`) + require.Contains(t, bodyStr, `"https://gateway.example.com"`) +} + +// TestProvidersMixedSchemasNDJSON verifies that the server correctly serializes +// a mix of PeerRecord and GenericRecord in NDJSON streaming responses. +// This complements TestProvidersMixedSchemas which only tests the JSON path. +func TestProvidersMixedSchemasNDJSON(t *testing.T) { + cidStr := "bafkreifjjcie6lypi6ny7amxnfftagclbuxndqonfipmb64f2km2devei4" + cb, err := cid.Decode(cidStr) + require.NoError(t, err) + + pid, err := peer.Decode("12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn") + require.NoError(t, err) + + addr1, _ := multiaddr.NewMultiaddr("/ip4/127.0.0.1/tcp/4001") + + results := iter.FromSlice([]iter.Result[types.Record]{ + {Val: &types.PeerRecord{ + Schema: types.SchemaPeer, + ID: &pid, + Protocols: []string{"transport-bitswap"}, + Addrs: []types.Multiaddr{{Multiaddr: addr1}}, + }}, + {Val: &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "did:key:z6Mkm1example", + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{ + mustAddr(t, "https://gateway.example.com"), + mustAddr(t, "foo://custom.example.com"), + }, + }}, + }) + + router := &mockContentRouter{} + server := httptest.NewServer(Handler(router)) + t.Cleanup(server.Close) + serverAddr := "http://" + server.Listener.Addr().String() + + router.On("FindProviders", mock.Anything, cb, DefaultStreamingRecordsLimit).Return(results, nil) + + req, err := http.NewRequest(http.MethodGet, serverAddr+"/routing/v1/providers/"+cidStr, nil) + require.NoError(t, err) + req.Header.Set("Accept", mediaTypeNDJSON) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Equal(t, mediaTypeNDJSON, resp.Header.Get("Content-Type")) + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + // Each NDJSON line is a separate record. + lines := strings.Split(strings.TrimSpace(string(body)), "\n") + require.Len(t, lines, 2, "expected two NDJSON lines (peer + generic)") + + // Line 0: PeerRecord + require.Contains(t, lines[0], `"Schema":"peer"`) + require.Contains(t, lines[0], `"/ip4/127.0.0.1/tcp/4001"`) + + // Line 1: GenericRecord with URLs + require.Contains(t, lines[1], `"Schema":"generic"`) + require.Contains(t, lines[1], `"did:key:z6Mkm1example"`) + require.Contains(t, lines[1], `"https://gateway.example.com"`) + require.Contains(t, lines[1], `"foo://custom.example.com"`) + require.Contains(t, lines[1], `"transport-ipfs-gateway-http"`) +} + +// TestProvidersMixedSchemasExtraFields verifies that protocol-specific metadata +// (Extra fields) on GenericRecord survive server serialization in both JSON +// and NDJSON paths. The spec allows extra fields like transport-ipfs-gateway-http +// to carry protocol-specific metadata (e.g. "foobar" capability bitfield). +func TestProvidersMixedSchemasExtraFields(t *testing.T) { + cidStr := "bafkreifjjcie6lypi6ny7amxnfftagclbuxndqonfipmb64f2km2devei4" + cb, err := cid.Decode(cidStr) + require.NoError(t, err) + + gr := &types.GenericRecord{ + Schema: types.SchemaGeneric, + ID: "QmUA9D3H7HeCYsirB3KmPSvZh3dNXMZas6Lwgr4fv1HTTp", + Protocols: []string{"transport-ipfs-gateway-http"}, + Addrs: types.Addresses{mustAddr(t, "https://dag.w3s.link")}, + Extra: map[string]json.RawMessage{ + "transport-ipfs-gateway-http": json.RawMessage(`"foobar"`), + }, + } + + t.Run("JSON", func(t *testing.T) { + results := iter.FromSlice([]iter.Result[types.Record]{ + {Val: gr}, + }) + + router := &mockContentRouter{} + server := httptest.NewServer(Handler(router)) + t.Cleanup(server.Close) + serverAddr := "http://" + server.Listener.Addr().String() + + router.On("FindProviders", mock.Anything, cb, DefaultRecordsLimit).Return(results, nil) + + resp, err := http.Get(serverAddr + "/routing/v1/providers/" + cidStr) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + bodyStr := string(body) + require.Contains(t, bodyStr, `"transport-ipfs-gateway-http"`) + // Extra field must survive serialization + require.Contains(t, bodyStr, `"foobar"`) + }) + + t.Run("NDJSON", func(t *testing.T) { + results := iter.FromSlice([]iter.Result[types.Record]{ + {Val: gr}, + }) + + router := &mockContentRouter{} + server := httptest.NewServer(Handler(router)) + t.Cleanup(server.Close) + serverAddr := "http://" + server.Listener.Addr().String() + + router.On("FindProviders", mock.Anything, cb, DefaultStreamingRecordsLimit).Return(results, nil) + + req, err := http.NewRequest(http.MethodGet, serverAddr+"/routing/v1/providers/"+cidStr, nil) + require.NoError(t, err) + req.Header.Set("Accept", mediaTypeNDJSON) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + bodyStr := string(body) + require.Contains(t, bodyStr, `"transport-ipfs-gateway-http"`) + require.Contains(t, bodyStr, `"foobar"`) + }) +} + +func mustAddr(t *testing.T, s string) types.Address { + t.Helper() + addr, err := types.NewAddress(s) + require.NoError(t, err) + return addr +} + func TestPeers(t *testing.T) { makeRequest := func(t *testing.T, router *mockContentRouter, contentType, arg, filterAddrs, filterProtocols string) *http.Response { server := httptest.NewServer(Handler(router)) diff --git a/routing/http/types/address.go b/routing/http/types/address.go new file mode 100644 index 000000000..0bd1a06f7 --- /dev/null +++ b/routing/http/types/address.go @@ -0,0 +1,309 @@ +package types + +import ( + "encoding/json" + "fmt" + "net" + "net/url" + "strings" + + "github.com/ipfs/boxo/routing/http/internal/drjson" + "github.com/multiformats/go-multiaddr" +) + +// Address represents an address that can be either a multiaddr or a URI. +// It implements the parsing logic from IPIP-518: strings starting with '/' +// are parsed as multiaddrs, others are parsed as URIs. +// This type is schema-agnostic and will accept any valid URI scheme. +type Address struct { + raw string + multiaddr multiaddr.Multiaddr + url *url.URL +} + +// NewAddress creates a new Address from a string. +// It accepts any valid multiaddr or URI, following IPIP-518 parsing rules. +func NewAddress(s string) (Address, error) { + addr := Address{raw: s} + + // IPIP-518 parsing logic + if strings.HasPrefix(s, "/") { + // Parse as multiaddr + ma, err := multiaddr.NewMultiaddr(s) + if err != nil { + return Address{}, fmt.Errorf("invalid multiaddr: %w", err) + } + addr.multiaddr = ma + } else { + // Parse as URI - accept any valid URI scheme + u, err := url.Parse(s) + if err != nil { + return Address{}, fmt.Errorf("invalid uri: %w", err) + } + // Must be absolute URL + if !u.IsAbs() { + return Address{}, fmt.Errorf("uri must be absolute") + } + addr.url = u + } + + return addr, nil +} + +// NewAddressFromMultiaddr creates a new Address from a multiaddr. +func NewAddressFromMultiaddr(ma multiaddr.Multiaddr) Address { + return Address{ + raw: ma.String(), + multiaddr: ma, + } +} + +// String returns the original string representation of the address. +func (a *Address) String() string { + return a.raw +} + +// Multiaddr returns the multiaddr if this is a multiaddr, nil otherwise. +func (a *Address) Multiaddr() multiaddr.Multiaddr { + return a.multiaddr +} + +// URL returns the URL if this is a URL, nil otherwise. +func (a *Address) URL() *url.URL { + return a.url +} + +// IsMultiaddr returns true if this address is a multiaddr. +func (a *Address) IsMultiaddr() bool { + return a.multiaddr != nil +} + +// IsURL returns true if this address is a URL. +func (a *Address) IsURL() bool { + return a.url != nil +} + +// IsValid returns true if the address was successfully parsed as either +// a multiaddr or a URI. Returns false for unparseable addresses. +func (a *Address) IsValid() bool { + return a.multiaddr != nil || a.url != nil +} + +// MarshalJSON implements json.Marshaler. +func (a *Address) MarshalJSON() ([]byte, error) { + return drjson.MarshalJSONBytes(a.raw) +} + +// UnmarshalJSON implements json.Unmarshaler. +func (a *Address) UnmarshalJSON(b []byte) error { + var s string + if err := json.Unmarshal(b, &s); err != nil { + return err + } + + addr, err := NewAddress(s) + if err != nil { + // Per IPIP-518: implementations MUST skip addresses they cannot parse + // We'll store the raw string but mark it as invalid + a.raw = s + a.multiaddr = nil + a.url = nil + return nil // Don't return error, just skip + } + + *a = addr + return nil +} + +// Protocols returns the protocols in this address. +// For multiaddrs, it returns the multiaddr protocols. +// For URLs, it returns the scheme. +func (a *Address) Protocols() []string { + if a.url != nil { + return []string{a.url.Scheme} + } + if a.multiaddr != nil { + protos := a.multiaddr.Protocols() + result := make([]string, len(protos)) + for i, p := range protos { + result[i] = p.Name + } + return result + } + return nil +} + +// HasProtocol checks if the address contains the given protocol. +// For URLs, it checks the scheme. For multiaddrs, it checks the protocols. +// Special handling for http/https as per IPIP-518: +// - "http" matches http://, https:// URLs and /http, /tls/http multiaddrs +// - "https" matches https:// URLs, /tls/http, /https multiaddrs +func (a *Address) HasProtocol(proto string) bool { + proto = strings.ToLower(proto) + + if a.url != nil { + scheme := strings.ToLower(a.url.Scheme) + + switch proto { + case "http": + // "http" matches both http and https URLs + return scheme == "http" || scheme == "https" + case "https": + return scheme == "https" + default: + return scheme == proto + } + } + + if a.multiaddr != nil { + protocols := a.Protocols() + + switch proto { + case "http": + // "http" matches /http, including /tls/http + for _, p := range protocols { + if p == "http" { + return true + } + } + case "https": + // "https" matches /https or the combination /tls/http + hasTLS := false + hasHTTP := false + for _, p := range protocols { + switch p { + case "https": + return true + case "tls": + hasTLS = true + case "http": + hasHTTP = true + } + } + return hasTLS && hasHTTP + default: + for _, p := range protocols { + if p == proto { + return true + } + } + } + } + + return false +} + +// ToMultiaddr attempts to convert an HTTP(S) URL to a multiaddr for backward compatibility. +// Returns nil if the address cannot be converted (e.g., non-HTTP schemes or invalid addresses). +// This is a temporary compatibility layer for the transition period while existing software +// expects multiaddrs with /http protocol to signal HTTP retrieval support. +func (a *Address) ToMultiaddr() multiaddr.Multiaddr { + // If already a multiaddr, return it as-is + if a.IsMultiaddr() { + return a.multiaddr + } + + // If not a URL, cannot convert + if !a.IsURL() || a.url == nil { + return nil + } + + // Only convert http/https URLs + scheme := strings.ToLower(a.url.Scheme) + if scheme != "http" && scheme != "https" { + return nil + } + + // Parse hostname and port + host := a.url.Hostname() + port := a.url.Port() + + // Set default ports if not specified + if port == "" { + if scheme == "https" { + port = "443" + } else { + port = "80" + } + } + + // Determine address type + var addrProto string + if ip := net.ParseIP(host); ip != nil { + // Use IP-specific protocols for IP addresses + if ip.To4() != nil { + addrProto = "ip4" + } else { + addrProto = "ip6" + } + } else { + // Use generic /dns for domain names (resolves to both IPv4 and IPv6) + addrProto = "dns" + } + + // Build multiaddr string using scheme directly (http or https) + maStr := fmt.Sprintf("/%s/%s/tcp/%s/%s", addrProto, host, port, scheme) + + // Preserve URL path as http-path component. + // Leading "/" is stripped per examples in the http-path spec: + // https://github.com/multiformats/multiaddr/blob/master/protocols/http-path.md + if p := strings.TrimPrefix(a.url.Path, "/"); p != "" { + maStr += "/http-path/" + url.PathEscape(p) + } + + // Create and return multiaddr + ma, err := multiaddr.NewMultiaddr(maStr) + if err != nil { + // Log the error for debugging but return nil + // This can happen with invalid hostnames or other edge cases + return nil + } + return ma +} + +// Addresses is a slice of Address that can be marshaled/unmarshaled from/to JSON. +type Addresses []Address + +// String returns a string representation of the addresses for printing. +func (addrs Addresses) String() string { + if len(addrs) == 0 { + return "[]" + } + + strs := make([]string, len(addrs)) + for i, addr := range addrs { + strs[i] = addr.String() + } + return fmt.Sprintf("[%s]", strings.Join(strs, " ")) +} + +// MarshalJSON implements json.Marshaler for Addresses. +func (addrs Addresses) MarshalJSON() ([]byte, error) { + strs := make([]string, len(addrs)) + for i, addr := range addrs { + strs[i] = addr.String() + } + return json.Marshal(strs) +} + +// UnmarshalJSON implements json.Unmarshaler for Addresses. +// Per IPIP-518, it MUST skip addresses that cannot be parsed. +func (addrs *Addresses) UnmarshalJSON(b []byte) error { + var strs []string + if err := json.Unmarshal(b, &strs); err != nil { + return err + } + + result := make(Addresses, 0, len(strs)) + for _, s := range strs { + addr := Address{raw: s} + if a, err := NewAddress(s); err == nil { + addr = a + } + // Always add the address, even if invalid (will be skipped during filtering) + result = append(result, addr) + } + + *addrs = result + return nil +} diff --git a/routing/http/types/address_test.go b/routing/http/types/address_test.go new file mode 100644 index 000000000..f8271f8d0 --- /dev/null +++ b/routing/http/types/address_test.go @@ -0,0 +1,436 @@ +package types + +import ( + "encoding/json" + "testing" + + "github.com/multiformats/go-multiaddr" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewAddress(t *testing.T) { + tests := []struct { + name string + input string + wantErr bool + isURL bool + isMultiaddr bool + protocols []string + }{ + { + name: "valid multiaddr", + input: "/ip4/127.0.0.1/tcp/4001", + isMultiaddr: true, + protocols: []string{"ip4", "tcp"}, + }, + { + name: "valid https URL", + input: "https://example.com", + isURL: true, + protocols: []string{"https"}, + }, + { + name: "valid http URL", + input: "http://example.com:8080", + isURL: true, + protocols: []string{"http"}, + }, + { + name: "valid http URL with path", + input: "http://example.com:8080/path", + isURL: true, + protocols: []string{"http"}, + }, + { + name: "other URI scheme foo", + input: "foo://example.com/path", + isURL: true, + protocols: []string{"foo"}, + }, + { + name: "other URI scheme bar", + input: "bar://something", + isURL: true, + protocols: []string{"bar"}, + }, + { + name: "relative URL", + input: "example.com", + wantErr: true, + }, + { + name: "invalid multiaddr", + input: "/invalid", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + addr, err := NewAddress(tt.input) + if tt.wantErr { + assert.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.input, addr.String()) + assert.Equal(t, tt.isURL, addr.IsURL()) + assert.Equal(t, tt.isMultiaddr, addr.IsMultiaddr()) + assert.Equal(t, tt.protocols, addr.Protocols()) + }) + } +} + +func TestAddressHasProtocol(t *testing.T) { + tests := []struct { + name string + address string + protocol string + expected bool + }{ + // Multiaddr tests + { + name: "multiaddr has tcp", + address: "/ip4/127.0.0.1/tcp/4001", + protocol: "tcp", + expected: true, + }, + { + name: "multiaddr doesn't have udp", + address: "/ip4/127.0.0.1/tcp/4001", + protocol: "udp", + expected: false, + }, + { + name: "multiaddr with /http", + address: "/dns4/example.com/tcp/80/http", + protocol: "http", + expected: true, + }, + { + name: "multiaddr with /tls/http matches https", + address: "/dns4/example.com/tcp/443/tls/http", + protocol: "https", + expected: true, + }, + { + name: "multiaddr with /tls/http matches http", + address: "/dns4/example.com/tcp/443/tls/http", + protocol: "http", + expected: true, + }, + { + name: "multiaddr with /tls/http matches tls", + address: "/dns4/example.com/tcp/443/tls/http", + protocol: "tls", + expected: true, + }, + // URL tests + { + name: "https URL matches https", + address: "https://example.com", + protocol: "https", + expected: true, + }, + { + name: "https URL matches http", + address: "https://example.com", + protocol: "http", + expected: true, + }, + { + name: "http URL matches http", + address: "http://example.com", + protocol: "http", + expected: true, + }, + { + name: "http URL doesn't match https", + address: "http://example.com", + protocol: "https", + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + addr, err := NewAddress(tt.address) + require.NoError(t, err) + assert.Equal(t, tt.expected, addr.HasProtocol(tt.protocol)) + }) + } +} + +func TestAddressJSON(t *testing.T) { + tests := []struct { + name string + address string + }{ + { + name: "multiaddr", + address: "/ip4/127.0.0.1/tcp/4001", + }, + { + name: "https URL", + address: "https://example.com", + }, + { + name: "http URL with port", + address: "http://example.com:8080", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + addr, err := NewAddress(tt.address) + require.NoError(t, err) + + // Marshal to JSON + data, err := json.Marshal(&addr) + require.NoError(t, err) + + // Should be a JSON string + var str string + err = json.Unmarshal(data, &str) + require.NoError(t, err) + assert.Equal(t, tt.address, str) + + // Unmarshal back + var addr2 Address + err = json.Unmarshal(data, &addr2) + require.NoError(t, err) + assert.Equal(t, addr.String(), addr2.String()) + assert.Equal(t, addr.IsURL(), addr2.IsURL()) + assert.Equal(t, addr.IsMultiaddr(), addr2.IsMultiaddr()) + }) + } +} + +func TestAddressesJSON(t *testing.T) { + input := []string{ + "/ip4/127.0.0.1/tcp/4001", + "https://example.com", + "http://localhost:8080", + "/invalid/addr", // This should be included but marked as invalid + } + + // Create Addresses from strings + var addrs Addresses + data, err := json.Marshal(input) + require.NoError(t, err) + + err = json.Unmarshal(data, &addrs) + require.NoError(t, err) + + // Should have all 4 addresses + assert.Len(t, addrs, 4) + + // First three should be valid + assert.True(t, addrs[0].IsValid()) + assert.True(t, addrs[0].IsMultiaddr()) + + assert.True(t, addrs[1].IsValid()) + assert.True(t, addrs[1].IsURL()) + + assert.True(t, addrs[2].IsValid()) + assert.True(t, addrs[2].IsURL()) + + // Last one should be invalid but present + assert.False(t, addrs[3].IsValid()) + assert.Equal(t, "/invalid/addr", addrs[3].String()) + + // Marshal back should give the same strings + data2, err := json.Marshal(addrs) + require.NoError(t, err) + + var output []string + err = json.Unmarshal(data2, &output) + require.NoError(t, err) + + assert.Equal(t, input, output) +} + +func TestNewAddressFromMultiaddr(t *testing.T) { + ma, err := multiaddr.NewMultiaddr("/ip4/127.0.0.1/tcp/4001") + require.NoError(t, err) + + addr := NewAddressFromMultiaddr(ma) + assert.True(t, addr.IsMultiaddr()) + assert.False(t, addr.IsURL()) + assert.Equal(t, ma.String(), addr.String()) + assert.Equal(t, ma, addr.Multiaddr()) + assert.Nil(t, addr.URL()) +} + +func TestPeerRecordMultiaddrsOnly(t *testing.T) { + // PeerRecord Addrs field is []Multiaddr (multiaddrs only, no URLs) + jsonData := `{ + "Schema": "peer", + "ID": "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + "Addrs": [ + "/ip4/192.168.1.1/tcp/4001", + "/dns4/libp2p.example.com/tcp/443/wss" + ], + "Protocols": ["transport-bitswap"] + }` + + var pr PeerRecord + err := json.Unmarshal([]byte(jsonData), &pr) + require.NoError(t, err) + + assert.Equal(t, "peer", pr.Schema) + assert.Len(t, pr.Addrs, 2) + assert.Equal(t, "/ip4/192.168.1.1/tcp/4001", pr.Addrs[0].String()) + assert.Equal(t, "/dns4/libp2p.example.com/tcp/443/wss", pr.Addrs[1].String()) + + // Marshal back + data, err := json.Marshal(pr) + require.NoError(t, err) + + var check map[string]interface{} + err = json.Unmarshal(data, &check) + require.NoError(t, err) + + addrs, ok := check["Addrs"].([]interface{}) + require.True(t, ok) + assert.Len(t, addrs, 2) +} + +func TestPeerRecordURLsInAddrs(t *testing.T) { + // PeerRecord.Addrs is []Multiaddr. The Multiaddr UnmarshalJSON returns + // an error for non-multiaddr strings, so a PeerRecord with a URL in + // Addrs fails to deserialize entirely. This documents why URLs belong + // in GenericRecord (which uses Addresses), not PeerRecord. + jsonData := `{ + "Schema": "peer", + "ID": "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + "Addrs": [ + "/ip4/192.168.1.1/tcp/4001", + "https://example.com", + "/dns4/libp2p.example.com/tcp/443/wss" + ], + "Protocols": ["transport-bitswap"] + }` + + var pr PeerRecord + err := json.Unmarshal([]byte(jsonData), &pr) + require.Error(t, err, "PeerRecord should fail to unmarshal when Addrs contains a URL") + assert.Contains(t, err.Error(), "must begin with /") +} + +func TestAddressToMultiaddr(t *testing.T) { + tests := []struct { + name string + input string + expected string // Expected multiaddr string, empty if conversion not possible + }{ + { + name: "https URL with default port", + input: "https://example.com", + expected: "/dns/example.com/tcp/443/https", + }, + { + name: "http URL with default port", + input: "http://example.com", + expected: "/dns/example.com/tcp/80/http", + }, + { + name: "http URL with custom port", + input: "http://example.com:8080", + expected: "/dns/example.com/tcp/8080/http", + }, + { + name: "https URL with custom port", + input: "https://example.com:8443", + expected: "/dns/example.com/tcp/8443/https", + }, + { + name: "http URL with IPv4", + input: "http://192.168.1.1:8080", + expected: "/ip4/192.168.1.1/tcp/8080/http", + }, + { + name: "https URL with IPv4", + input: "https://192.168.1.1", + expected: "/ip4/192.168.1.1/tcp/443/https", + }, + { + name: "http URL with IPv6", + input: "http://[::1]:8080", + expected: "/ip6/::1/tcp/8080/http", + }, + { + name: "https URL with IPv6", + input: "https://[::1]", + expected: "/ip6/::1/tcp/443/https", + }, + { + name: "https URL with IPv6 and custom port", + input: "https://[::1]:8443", + expected: "/ip6/::1/tcp/8443/https", + }, + { + name: "http URL with path preserved", + input: "http://example.com/path/to/resource", + expected: "/dns/example.com/tcp/80/http/http-path/path%2Fto%2Fresource", + }, + { + name: "https URL with query - path preserved, query dropped", + input: "https://example.com:8443/path?query=value", + expected: "/dns/example.com/tcp/8443/https/http-path/path", + }, + { + name: "non-HTTP scheme not converted", + input: "ftp://example.com", + expected: "", + }, + { + name: "websocket scheme not converted", + input: "ws://example.com", + expected: "", + }, + { + name: "existing multiaddr returned as-is", + input: "/ip4/127.0.0.1/tcp/4001", + expected: "/ip4/127.0.0.1/tcp/4001", + }, + { + name: "existing http multiaddr returned as-is", + input: "/dns/example.com/tcp/443/https", + expected: "/dns/example.com/tcp/443/https", + }, + { + name: "localhost http", + input: "http://localhost:8080", + expected: "/dns/localhost/tcp/8080/http", + }, + { + name: "localhost https", + input: "https://localhost", + expected: "/dns/localhost/tcp/443/https", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + addr, err := NewAddress(tt.input) + require.NoError(t, err) + + ma := addr.ToMultiaddr() + if tt.expected == "" { + assert.Nil(t, ma, "Expected nil multiaddr for %s", tt.input) + } else { + require.NotNil(t, ma, "Expected non-nil multiaddr for %s", tt.input) + assert.Equal(t, tt.expected, ma.String()) + } + }) + } + + // Test with invalid address + t.Run("invalid address returns nil", func(t *testing.T) { + addr := &Address{raw: "invalid"} + ma := addr.ToMultiaddr() + assert.Nil(t, ma) + }) +} diff --git a/routing/http/types/json/responses.go b/routing/http/types/json/responses.go index 47c65a8b9..5708f47d5 100644 --- a/routing/http/types/json/responses.go +++ b/routing/http/types/json/responses.go @@ -2,6 +2,7 @@ package json import ( "encoding/json" + "fmt" "github.com/ipfs/boxo/routing/http/types" ) @@ -49,6 +50,16 @@ func (r *RecordsArray) UnmarshalJSON(b []byte) error { return err } *r = append(*r, &prov) + case types.SchemaGeneric: + if len(provBytes) > types.MaxGenericRecordSize { + return fmt.Errorf("generic record too large: %d bytes (max %d)", len(provBytes), types.MaxGenericRecordSize) + } + var prov types.GenericRecord + err := json.Unmarshal(provBytes, &prov) + if err != nil { + return err + } + *r = append(*r, &prov) //nolint:staticcheck //lint:ignore SA1019 // ignore staticcheck case types.SchemaBitswap: diff --git a/routing/http/types/ndjson/records.go b/routing/http/types/ndjson/records.go index 4363e82ab..06711ebf4 100644 --- a/routing/http/types/ndjson/records.go +++ b/routing/http/types/ndjson/records.go @@ -2,6 +2,7 @@ package ndjson import ( "encoding/json" + "fmt" "io" "github.com/ipfs/boxo/routing/http/types" @@ -26,6 +27,18 @@ func NewRecordsIter(r io.Reader) iter.Iter[iter.Result[types.Record]] { return result } result.Val = &prov + case types.SchemaGeneric: + if len(upr.Val.Bytes) > types.MaxGenericRecordSize { + result.Err = fmt.Errorf("generic record too large: %d bytes (max %d)", len(upr.Val.Bytes), types.MaxGenericRecordSize) + return result + } + var prov types.GenericRecord + err := json.Unmarshal(upr.Val.Bytes, &prov) + if err != nil { + result.Err = err + return result + } + result.Val = &prov //nolint:staticcheck //lint:ignore SA1019 // ignore staticcheck case types.SchemaBitswap: diff --git a/routing/http/types/ndjson/records_test.go b/routing/http/types/ndjson/records_test.go new file mode 100644 index 000000000..85d76f909 --- /dev/null +++ b/routing/http/types/ndjson/records_test.go @@ -0,0 +1,244 @@ +package ndjson + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/ipfs/boxo/routing/http/types" + "github.com/ipfs/boxo/routing/http/types/iter" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewRecordsIter_GenericRecordSizeLimit(t *testing.T) { + t.Run("rejects generic record exceeding max size", func(t *testing.T) { + // Build a generic record that exceeds MaxGenericRecordSize by padding + // the ID field with enough data to push over 10 KiB. + largeID := strings.Repeat("x", types.MaxGenericRecordSize) + rec := map[string]any{ + "Schema": types.SchemaGeneric, + "ID": largeID, + } + data, err := json.Marshal(rec) + require.NoError(t, err) + require.Greater(t, len(data), types.MaxGenericRecordSize) + + r := strings.NewReader(string(data) + "\n") + ri := NewRecordsIter(r) + + results := iter.ReadAll[iter.Result[types.Record]](ri) + require.Len(t, results, 1) + assert.Error(t, results[0].Err) + assert.Contains(t, results[0].Err.Error(), "generic record too large") + }) + + t.Run("accepts generic record within max size", func(t *testing.T) { + rec := map[string]any{ + "Schema": types.SchemaGeneric, + "ID": "peer1", + "Addrs": []string{"https://example.com"}, + "Protocols": []string{"transport-ipfs-gateway-http"}, + } + data, err := json.Marshal(rec) + require.NoError(t, err) + require.LessOrEqual(t, len(data), types.MaxGenericRecordSize) + + r := strings.NewReader(string(data) + "\n") + ri := NewRecordsIter(r) + + results := iter.ReadAll[iter.Result[types.Record]](ri) + require.Len(t, results, 1) + require.NoError(t, results[0].Err) + + gr, ok := results[0].Val.(*types.GenericRecord) + require.True(t, ok) + assert.Equal(t, "peer1", gr.ID) + }) +} + +// TestNewRecordsIter_GenericRecordRoundTrip verifies that all GenericRecord +// fields (Addrs, Protocols, Extra) survive the NDJSON deserialization path. +func TestNewRecordsIter_GenericRecordRoundTrip(t *testing.T) { + rec := map[string]any{ + "Schema": types.SchemaGeneric, + "ID": "did:key:z6Mkm1example", + "Addrs": []string{"https://trustless-gateway.example.com", "/ip4/1.2.3.4/tcp/5000"}, + "Protocols": []string{"transport-ipfs-gateway-http"}, + } + data, err := json.Marshal(rec) + require.NoError(t, err) + + r := strings.NewReader(string(data) + "\n") + ri := NewRecordsIter(r) + + results := iter.ReadAll[iter.Result[types.Record]](ri) + require.Len(t, results, 1) + require.NoError(t, results[0].Err) + + gr, ok := results[0].Val.(*types.GenericRecord) + require.True(t, ok, "expected *types.GenericRecord") + + assert.Equal(t, types.SchemaGeneric, gr.Schema) + assert.Equal(t, types.SchemaGeneric, gr.GetSchema()) + assert.Equal(t, "did:key:z6Mkm1example", gr.ID) + require.Len(t, gr.Addrs, 2) + assert.True(t, gr.Addrs[0].IsURL(), "first addr should be URL") + assert.Equal(t, "https://trustless-gateway.example.com", gr.Addrs[0].String()) + assert.True(t, gr.Addrs[1].IsMultiaddr(), "second addr should be multiaddr") + assert.Equal(t, "/ip4/1.2.3.4/tcp/5000", gr.Addrs[1].String()) + assert.Equal(t, []string{"transport-ipfs-gateway-http"}, gr.Protocols) +} + +// TestNewRecordsIter_GenericRecordExtraFields verifies that protocol-specific +// extra fields survive the NDJSON deserialization path. This matches the spec +// test fixture showing a record with custom metadata. +func TestNewRecordsIter_GenericRecordExtraFields(t *testing.T) { + rec := map[string]any{ + "Schema": types.SchemaGeneric, + "ID": "did:key:z6Mkm1example", + "Addrs": []string{"https://provider.example.com"}, + "Protocols": []string{"example-future-protocol"}, + "example-future-protocol": map[string]any{"version": 2, "features": []string{"foo"}}, + } + data, err := json.Marshal(rec) + require.NoError(t, err) + + r := strings.NewReader(string(data) + "\n") + ri := NewRecordsIter(r) + + results := iter.ReadAll[iter.Result[types.Record]](ri) + require.Len(t, results, 1) + require.NoError(t, results[0].Err) + + gr, ok := results[0].Val.(*types.GenericRecord) + require.True(t, ok) + + // Extra field must be preserved and known fields must not leak into it. + require.Contains(t, gr.Extra, "example-future-protocol") + assert.NotContains(t, gr.Extra, "Schema") + assert.NotContains(t, gr.Extra, "ID") + assert.NotContains(t, gr.Extra, "Addrs") + assert.NotContains(t, gr.Extra, "Protocols") + + // Verify the extra field value round-trips correctly. + var meta map[string]any + err = json.Unmarshal(gr.Extra["example-future-protocol"], &meta) + require.NoError(t, err) + assert.Equal(t, float64(2), meta["version"]) +} + +// TestNewRecordsIter_MixedSchemaStream verifies that NewRecordsIter correctly +// deserializes a mixed NDJSON stream containing peer, generic, and bitswap +// records. Each record must come through as the correct Go type. +func TestNewRecordsIter_MixedSchemaStream(t *testing.T) { + peerRec := map[string]any{ + "Schema": types.SchemaPeer, + "ID": "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + "Addrs": []string{"/ip4/127.0.0.1/tcp/4001"}, + "Protocols": []string{"transport-bitswap"}, + } + genericRec := map[string]any{ + "Schema": types.SchemaGeneric, + "ID": "gateway-provider", + "Addrs": []string{"https://gateway.example.com"}, + "Protocols": []string{"transport-ipfs-gateway-http"}, + } + //nolint:staticcheck + bitswapRec := map[string]any{ + //lint:ignore SA1019 // ignore staticcheck + "Schema": types.SchemaBitswap, + "ID": "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + "Protocol": "transport-bitswap", + "Addrs": []string{"/ip4/127.0.0.1/udp/4001/quic-v1"}, + } + unknownRec := map[string]any{ + "Schema": "future-schema", + "ID": "something", + } + + var lines []string + for _, rec := range []any{peerRec, genericRec, bitswapRec, unknownRec} { + data, err := json.Marshal(rec) + require.NoError(t, err) + lines = append(lines, string(data)) + } + ndjson := strings.Join(lines, "\n") + "\n" + + ri := NewRecordsIter(strings.NewReader(ndjson)) + results := iter.ReadAll[iter.Result[types.Record]](ri) + require.Len(t, results, 4) + + // Record 0: PeerRecord + require.NoError(t, results[0].Err) + _, ok := results[0].Val.(*types.PeerRecord) + assert.True(t, ok, "first record should be *types.PeerRecord") + + // Record 1: GenericRecord + require.NoError(t, results[1].Err) + gr, ok := results[1].Val.(*types.GenericRecord) + assert.True(t, ok, "second record should be *types.GenericRecord") + assert.Equal(t, "gateway-provider", gr.ID) + require.Len(t, gr.Addrs, 1) + assert.Equal(t, "https://gateway.example.com", gr.Addrs[0].String()) + + // Record 2: BitswapRecord + require.NoError(t, results[2].Err) + //nolint:staticcheck + //lint:ignore SA1019 // ignore staticcheck + _, ok = results[2].Val.(*types.BitswapRecord) + assert.True(t, ok, "third record should be *types.BitswapRecord") + + // Record 3: unknown schema falls through as UnknownRecord + require.NoError(t, results[3].Err) + _, ok = results[3].Val.(*types.UnknownRecord) + assert.True(t, ok, "fourth record should be *types.UnknownRecord") +} + +// TestNewPeerRecordsIter_SkipsGenericRecord verifies that NewPeerRecordsIter +// silently skips GenericRecord entries and only returns PeerRecords. +func TestNewPeerRecordsIter_SkipsGenericRecord(t *testing.T) { + peerRec := map[string]any{ + "Schema": types.SchemaPeer, + "ID": "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + "Addrs": []string{"/ip4/127.0.0.1/tcp/4001"}, + "Protocols": []string{"transport-bitswap"}, + } + genericRec := map[string]any{ + "Schema": types.SchemaGeneric, + "ID": "gateway-provider", + "Addrs": []string{"https://gateway.example.com"}, + "Protocols": []string{"transport-ipfs-gateway-http"}, + } + peerRec2 := map[string]any{ + "Schema": types.SchemaPeer, + "ID": "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vz", + "Addrs": []string{"/ip4/127.0.0.2/tcp/4001"}, + "Protocols": []string{"transport-ipfs-gateway-http"}, + } + + var lines []string + for _, rec := range []any{peerRec, genericRec, peerRec2} { + data, err := json.Marshal(rec) + require.NoError(t, err) + lines = append(lines, string(data)) + } + ndjson := strings.Join(lines, "\n") + "\n" + + ri := NewPeerRecordsIter(strings.NewReader(ndjson)) + results := iter.ReadAll[iter.Result[*types.PeerRecord]](ri) + + // GenericRecord produces a result with nil Val (skipped), so we collect + // only non-nil PeerRecords. + var peerResults []*types.PeerRecord + for _, r := range results { + require.NoError(t, r.Err) + if r.Val != nil { + peerResults = append(peerResults, r.Val) + } + } + + require.Len(t, peerResults, 2, "only PeerRecords should come through") + assert.Equal(t, types.SchemaPeer, peerResults[0].Schema) + assert.Equal(t, types.SchemaPeer, peerResults[1].Schema) +} diff --git a/routing/http/types/record_generic.go b/routing/http/types/record_generic.go new file mode 100644 index 000000000..a4752ed17 --- /dev/null +++ b/routing/http/types/record_generic.go @@ -0,0 +1,87 @@ +package types + +import ( + "encoding/json" + + "github.com/ipfs/boxo/routing/http/internal/drjson" +) + +const SchemaGeneric = "generic" + +// MaxGenericRecordSize is the maximum serialized size of a single generic +// record, as specified by IPIP-518. +const MaxGenericRecordSize = 10 << 10 // 10 KiB + +var _ Record = (*GenericRecord)(nil) + +// GenericRecord is a duck-typed record that can hold both multiaddrs and URIs +// in its Addrs field. It is introduced by IPIP-518 as the successor to +// PeerRecord for providers that expose non-libp2p transports. +type GenericRecord struct { + Schema string + ID string + Addrs Addresses + Protocols []string + + // Extra contains extra fields that were included in the original JSON raw + // message, except for the known ones represented by the remaining fields. + Extra map[string]json.RawMessage +} + +func (gr *GenericRecord) GetSchema() string { + return gr.Schema +} + +func (gr *GenericRecord) UnmarshalJSON(b []byte) error { + // Unmarshal all known fields and assign them. + v := struct { + Schema string + ID string + Addrs Addresses + Protocols []string + }{} + err := json.Unmarshal(b, &v) + if err != nil { + return err + } + gr.Schema = v.Schema + gr.ID = v.ID + gr.Addrs = v.Addrs + gr.Protocols = v.Protocols + + // Unmarshal everything into the Extra field and remove the + // known fields to avoid conflictual usages of the struct. + err = json.Unmarshal(b, &gr.Extra) + if err != nil { + return err + } + delete(gr.Extra, "Schema") + delete(gr.Extra, "ID") + delete(gr.Extra, "Addrs") + delete(gr.Extra, "Protocols") + + return nil +} + +func (gr GenericRecord) MarshalJSON() ([]byte, error) { + m := map[string]any{} + if gr.Extra != nil { + for key, val := range gr.Extra { + m[key] = val + } + } + + // Schema and ID must always be set. + m["Schema"] = gr.Schema + m["ID"] = gr.ID + + if gr.Addrs != nil { + m["Addrs"] = gr.Addrs + } + + if gr.Protocols != nil { + m["Protocols"] = gr.Protocols + } + + return drjson.MarshalJSONBytes(m) +} diff --git a/routing/http/types/record_generic_test.go b/routing/http/types/record_generic_test.go new file mode 100644 index 000000000..c24e541eb --- /dev/null +++ b/routing/http/types/record_generic_test.go @@ -0,0 +1,238 @@ +package types + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGenericRecord_GetSchema(t *testing.T) { + gr := GenericRecord{Schema: SchemaGeneric} + assert.Equal(t, SchemaGeneric, gr.GetSchema()) +} + +func TestGenericRecord_JSON(t *testing.T) { + t.Run("round-trip with mixed addresses", func(t *testing.T) { + // GenericRecord uses duck-typed Addrs that accept both multiaddrs and URIs. + // This verifies the core IPIP-518 use case: a provider advertising both + // libp2p multiaddrs and HTTP gateway URLs in a single record. + jsonData := `{ + "Schema": "generic", + "ID": "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", + "Addrs": [ + "/ip4/192.168.1.1/tcp/4001", + "https://trustless-gateway.example.com", + "http://example.org:8080", + "/dns4/libp2p.example.com/tcp/443/wss" + ], + "Protocols": ["transport-bitswap", "transport-ipfs-gateway-http"] + }` + + var gr GenericRecord + err := json.Unmarshal([]byte(jsonData), &gr) + require.NoError(t, err) + + assert.Equal(t, SchemaGeneric, gr.Schema) + assert.Equal(t, "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", gr.ID) + assert.Len(t, gr.Addrs, 4) + assert.Equal(t, []string{"transport-bitswap", "transport-ipfs-gateway-http"}, gr.Protocols) + + // Verify address types were correctly parsed + assert.True(t, gr.Addrs[0].IsMultiaddr(), "first addr should be multiaddr") + assert.True(t, gr.Addrs[1].IsURL(), "second addr should be URL") + assert.True(t, gr.Addrs[2].IsURL(), "third addr should be URL") + assert.True(t, gr.Addrs[3].IsMultiaddr(), "fourth addr should be multiaddr") + + // Marshal back and verify fields are present + data, err := json.Marshal(gr) + require.NoError(t, err) + + var check map[string]any + err = json.Unmarshal(data, &check) + require.NoError(t, err) + + assert.Equal(t, "generic", check["Schema"]) + assert.Equal(t, "12D3KooWM8sovaEGU1bmiWGWAzvs47DEcXKZZTuJnpQyVTkRs2Vn", check["ID"]) + + addrs, ok := check["Addrs"].([]any) + require.True(t, ok) + assert.Len(t, addrs, 4) + assert.Equal(t, "/ip4/192.168.1.1/tcp/4001", addrs[0]) + assert.Equal(t, "https://trustless-gateway.example.com", addrs[1]) + }) + + t.Run("extra fields are preserved", func(t *testing.T) { + // Unknown fields must be preserved in Extra during deserialization and + // included again when marshaling. This allows forward compatibility with + // future spec extensions without data loss. + jsonData := `{ + "Schema": "generic", + "ID": "peer1", + "Addrs": ["https://example.com"], + "Protocols": ["transport-ipfs-gateway-http"], + "CustomField": "custom-value", + "AnotherField": 42 + }` + + var gr GenericRecord + err := json.Unmarshal([]byte(jsonData), &gr) + require.NoError(t, err) + + assert.Len(t, gr.Extra, 2) + assert.Contains(t, gr.Extra, "CustomField") + assert.Contains(t, gr.Extra, "AnotherField") + + // Known fields must not leak into Extra + assert.NotContains(t, gr.Extra, "Schema") + assert.NotContains(t, gr.Extra, "ID") + assert.NotContains(t, gr.Extra, "Addrs") + assert.NotContains(t, gr.Extra, "Protocols") + + // Marshal and verify extra fields survive the round-trip + data, err := json.Marshal(gr) + require.NoError(t, err) + + var check map[string]any + err = json.Unmarshal(data, &check) + require.NoError(t, err) + assert.Equal(t, "custom-value", check["CustomField"]) + assert.Equal(t, float64(42), check["AnotherField"]) + }) + + t.Run("nil Addrs and Protocols are omitted", func(t *testing.T) { + // When Addrs or Protocols are nil (not just empty), they should be + // omitted from the JSON output. This matches the behavior of PeerRecord + // and avoids sending unnecessary null fields. + gr := GenericRecord{ + Schema: SchemaGeneric, + ID: "peer1", + } + + data, err := json.Marshal(gr) + require.NoError(t, err) + + var check map[string]any + err = json.Unmarshal(data, &check) + require.NoError(t, err) + + assert.Equal(t, "generic", check["Schema"]) + assert.Equal(t, "peer1", check["ID"]) + _, hasAddrs := check["Addrs"] + assert.False(t, hasAddrs, "nil Addrs should be omitted from JSON") + _, hasProtos := check["Protocols"] + assert.False(t, hasProtos, "nil Protocols should be omitted from JSON") + }) + + t.Run("empty Addrs are included", func(t *testing.T) { + // An explicitly empty Addrs slice should be included in JSON output + // (as opposed to nil which is omitted), because it signals "no addresses" + // rather than "field not provided". + gr := GenericRecord{ + Schema: SchemaGeneric, + ID: "peer1", + Addrs: Addresses{}, + Protocols: []string{}, + } + + data, err := json.Marshal(gr) + require.NoError(t, err) + + var check map[string]any + err = json.Unmarshal(data, &check) + require.NoError(t, err) + + _, hasAddrs := check["Addrs"] + assert.True(t, hasAddrs, "empty Addrs should be included in JSON") + _, hasProtos := check["Protocols"] + assert.True(t, hasProtos, "empty Protocols should be included in JSON") + }) + + t.Run("non-standard URI schemes are valid addresses", func(t *testing.T) { + // IPIP-518 is schema-agnostic: any absolute URI is a valid address. + // This includes non-standard schemes like foo:// or tcp:// that may be + // used by future transports. They must parse as valid URL addresses. + jsonData := `{ + "Schema": "generic", + "ID": "peer1", + "Addrs": [ + "https://example.com", + "foo://custom-transport.example.com/path", + "tcp://192.168.1.1:4001", + "/ip4/127.0.0.1/tcp/4001" + ], + "Protocols": ["transport-foo"] + }` + + var gr GenericRecord + err := json.Unmarshal([]byte(jsonData), &gr) + require.NoError(t, err) + + assert.Len(t, gr.Addrs, 4) + + assert.True(t, gr.Addrs[0].IsURL(), "https should be a valid URL") + assert.True(t, gr.Addrs[1].IsURL(), "foo:// should be a valid URL") + assert.True(t, gr.Addrs[2].IsURL(), "tcp:// should be a valid URL") + assert.True(t, gr.Addrs[3].IsMultiaddr(), "multiaddr should be a valid multiaddr") + + // Verify protocol detection works for non-standard schemes + assert.Equal(t, []string{"https"}, gr.Addrs[0].Protocols()) + assert.Equal(t, []string{"foo"}, gr.Addrs[1].Protocols()) + assert.Equal(t, []string{"tcp"}, gr.Addrs[2].Protocols()) + + // Marshal round-trip preserves all addresses + data, err := json.Marshal(gr) + require.NoError(t, err) + + var check map[string]any + err = json.Unmarshal(data, &check) + require.NoError(t, err) + + addrs := check["Addrs"].([]any) + assert.Len(t, addrs, 4) + assert.Equal(t, "foo://custom-transport.example.com/path", addrs[1]) + assert.Equal(t, "tcp://192.168.1.1:4001", addrs[2]) + }) + + t.Run("unparseable addresses are preserved", func(t *testing.T) { + // Per IPIP-518, implementations MUST NOT fail on addresses they cannot + // parse. Invalid addresses are stored as raw strings and survive the + // round-trip, allowing forward compatibility with future address formats. + // This is distinct from non-standard URI schemes (which ARE valid). + jsonData := `{ + "Schema": "generic", + "ID": "peer1", + "Addrs": [ + "https://example.com", + "foo://valid-but-unknown-scheme.example.com", + "/invalid/multiaddr", + "not-a-valid-address" + ] + }` + + var gr GenericRecord + err := json.Unmarshal([]byte(jsonData), &gr) + require.NoError(t, err) + + assert.Len(t, gr.Addrs, 4) + assert.True(t, gr.Addrs[0].IsValid(), "https URL should be valid") + assert.True(t, gr.Addrs[1].IsValid(), "foo:// URI should be valid") + assert.False(t, gr.Addrs[2].IsValid(), "invalid multiaddr should be marked invalid") + assert.False(t, gr.Addrs[3].IsValid(), "bare string should be marked invalid") + + // All addresses survive marshal round-trip, even invalid ones + data, err := json.Marshal(gr) + require.NoError(t, err) + + var check map[string]any + err = json.Unmarshal(data, &check) + require.NoError(t, err) + + addrs := check["Addrs"].([]any) + assert.Equal(t, "https://example.com", addrs[0]) + assert.Equal(t, "foo://valid-but-unknown-scheme.example.com", addrs[1]) + assert.Equal(t, "/invalid/multiaddr", addrs[2]) + assert.Equal(t, "not-a-valid-address", addrs[3]) + }) +}