diff --git a/CHANGELOG.md b/CHANGELOG.md index f629f4bd3..a744c9633 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ The following emojis are used to highlight certain changes: ### Added +- `gateway`: `Config.MaxDeserializedResponseSize` allows setting a maximum file/directory size for deserialized gateway responses. Content exceeding this limit returns `410 Gone`, directing users to run their own IPFS node. Trustless response formats (`application/vnd.ipld.raw`, `application/vnd.ipld.car`) are not affected. The size is read from the UnixFS root block, so no extra block fetches are needed for the check. [#1138](https://github.com/ipfs/boxo/pull/1138) +- `gateway`: `Config.MaxUnixFSDAGResponseSize` allows setting a maximum content size applied to all response formats (deserialized, raw blocks, CAR, TAR). Content exceeding this limit returns `410 Gone`. For most handlers the check reuses size information already available in the request path; for CAR responses a lightweight `Head` call is made only when the limit is configured. [#1138](https://github.com/ipfs/boxo/pull/1138) + ### Changed ### Removed diff --git a/gateway/gateway.go b/gateway/gateway.go index a22e3e4e9..9909aee52 100644 --- a/gateway/gateway.go +++ b/gateway/gateway.go @@ -150,6 +150,31 @@ type Config struct { // (e.g., Cloudflare's 5GB limit). A value of 0 disables this limit. MaxRangeRequestFileSize int64 + // MaxDeserializedResponseSize is the maximum file or directory DAG size + // in bytes for deserialized responses. When set to a value greater than 0, + // requests for UnixFS content larger than this limit will return + // 410 Gone, directing users to run their own IPFS node for large content. + // This applies to both regular and range requests: if the underlying file + // exceeds the limit, even a small range is rejected. + // No additional block fetches are needed; size is already available from + // the request's normal processing of the UnixFS root block. + // A value of 0 disables this limit. Only affects deserialized responses; + // trustless formats (application/vnd.ipld.raw, application/vnd.ipld.car) + // are not affected. + MaxDeserializedResponseSize int64 + + // MaxUnixFSDAGResponseSize is the maximum UnixFS file or directory DAG + // size in bytes, applied to all response formats: deserialized, raw + // blocks, CAR, and TAR. When set to a value greater than 0, any request + // whose resolved content exceeds this limit will return 410 Gone, + // regardless of response format. This allows gateway operators to cap + // bandwidth across all response types. + // Most handlers reuse the size already available from normal request + // processing; the CAR handler performs a lightweight Head call (root + // block is then cached for the subsequent CAR traversal). + // A value of 0 disables this limit. + MaxUnixFSDAGResponseSize int64 + // MaxRequestDuration is the maximum total time a request can take. // Unlike RetrievalTimeout (which resets on each data write and catches // stalled transfers), this is an absolute deadline for the entire request. diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index e91bbf4ff..38eca65d9 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -1640,6 +1640,292 @@ func TestMaxRangeRequestFileSize(t *testing.T) { }) } +func TestMaxDeserializedResponseSize(t *testing.T) { + backend, root := newMockBackend(t, "fixtures.car") + + // "fnord" file is 5 bytes, lives at subdir/fnord + p, err := path.Join(path.FromCid(root), "subdir", "fnord") + require.NoError(t, err) + + ctx := t.Context() + + k, err := backend.resolvePathNoRootsReturned(ctx, p) + require.NoError(t, err) + + t.Run("GET exceeding limit returns 410", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxDeserializedResponseSize: 4, // smaller than "fnord" (5 bytes) + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusGone, res.StatusCode) + require.Equal(t, cacheControlSizeLimit, res.Header.Get("Cache-Control")) + + body, err := io.ReadAll(res.Body) + require.NoError(t, err) + require.Contains(t, string(body), "not supported for content larger than 4 bytes") + require.Contains(t, string(body), "https://docs.ipfs.tech/install/") + }) + + t.Run("range request for file exceeding limit returns 410", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxDeserializedResponseSize: 4, // smaller than "fnord" (5 bytes) + }) + + // Even though range is only 2 bytes, the file itself is 5 bytes + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + req.Header.Set("Range", "bytes=0-1") + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusGone, res.StatusCode) + + body, err := io.ReadAll(res.Body) + require.NoError(t, err) + require.Contains(t, string(body), "not supported for content larger than 4 bytes") + }) + + t.Run("HEAD exceeding limit returns 410", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxDeserializedResponseSize: 4, + }) + + req, err := http.NewRequest(http.MethodHead, ts.URL+k.String(), nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusGone, res.StatusCode) + }) + + t.Run("GET within limit works", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxDeserializedResponseSize: 1000, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusOK, res.StatusCode) + + body, err := io.ReadAll(res.Body) + require.NoError(t, err) + require.Equal(t, "fnord", string(body)) + }) + + t.Run("disabled when set to 0", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxDeserializedResponseSize: 0, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusOK, res.StatusCode) + + body, err := io.ReadAll(res.Body) + require.NoError(t, err) + require.Equal(t, "fnord", string(body)) + }) + + t.Run("raw format query param bypasses limit", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxDeserializedResponseSize: 1, // 1 byte, way below any content + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String()+"?format=raw", nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusOK, res.StatusCode) + }) + + t.Run("raw Accept header bypasses limit", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxDeserializedResponseSize: 1, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + req.Header.Set("Accept", "application/vnd.ipld.raw") + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusOK, res.StatusCode) + }) + + t.Run("car format query param bypasses limit", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxDeserializedResponseSize: 1, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String()+"?format=car", nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusOK, res.StatusCode) + }) + + t.Run("car Accept header bypasses limit", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxDeserializedResponseSize: 1, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + req.Header.Set("Accept", "application/vnd.ipld.car") + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusOK, res.StatusCode) + }) +} + +func TestMaxUnixFSDAGResponseSize(t *testing.T) { + backend, root := newMockBackend(t, "fixtures.car") + + // "fnord" file is 5 bytes, lives at subdir/fnord + p, err := path.Join(path.FromCid(root), "subdir", "fnord") + require.NoError(t, err) + + ctx := t.Context() + + k, err := backend.resolvePathNoRootsReturned(ctx, p) + require.NoError(t, err) + + t.Run("deserialized GET exceeding limit returns 410", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxUnixFSDAGResponseSize: 4, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusGone, res.StatusCode) + require.Equal(t, cacheControlSizeLimit, res.Header.Get("Cache-Control")) + + body, err := io.ReadAll(res.Body) + require.NoError(t, err) + require.Contains(t, string(body), "not supported for content larger than 4 bytes") + require.Contains(t, string(body), "https://docs.ipfs.tech/install/") + }) + + t.Run("deserialized range request for file exceeding limit returns 410", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxUnixFSDAGResponseSize: 4, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + req.Header.Set("Range", "bytes=0-1") + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusGone, res.StatusCode) + }) + + t.Run("raw format exceeding limit returns 410", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxUnixFSDAGResponseSize: 4, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String()+"?format=raw", nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusGone, res.StatusCode) + }) + + t.Run("raw Accept header exceeding limit returns 410", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxUnixFSDAGResponseSize: 4, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + req.Header.Set("Accept", "application/vnd.ipld.raw") + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusGone, res.StatusCode) + }) + + t.Run("car format exceeding limit returns 410", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxUnixFSDAGResponseSize: 4, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String()+"?format=car", nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusGone, res.StatusCode) + }) + + t.Run("car Accept header exceeding limit returns 410", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxUnixFSDAGResponseSize: 4, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + req.Header.Set("Accept", "application/vnd.ipld.car") + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusGone, res.StatusCode) + }) + + t.Run("GET within limit works", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxUnixFSDAGResponseSize: 1000, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusOK, res.StatusCode) + + body, err := io.ReadAll(res.Body) + require.NoError(t, err) + require.Equal(t, "fnord", string(body)) + }) + + t.Run("disabled when set to 0", func(t *testing.T) { + ts := newTestServerWithConfig(t, backend, Config{ + DeserializedResponses: true, + MaxUnixFSDAGResponseSize: 0, + }) + + req, err := http.NewRequest(http.MethodGet, ts.URL+k.String(), nil) + require.NoError(t, err) + + res := mustDoWithoutRedirect(t, req) + require.Equal(t, http.StatusOK, res.StatusCode) + + body, err := io.ReadAll(res.Body) + require.NoError(t, err) + require.Equal(t, "fnord", string(body)) + }) +} + func TestValidateConfig_MaxRequestDuration(t *testing.T) { t.Parallel() diff --git a/gateway/handler.go b/gateway/handler.go index bc150cd88..07b51af3c 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -1125,3 +1125,38 @@ func (i *handler) getTemplateGlobalData(r *http.Request, contentPath path.Path) func (i *handler) webError(w http.ResponseWriter, r *http.Request, err error, defaultCode int) { webError(w, r, i.config, err, defaultCode) } + +// cacheControlSizeLimit is the Cache-Control header for responses rejected by +// content size limits (MaxDeserializedResponseSize, MaxUnixFSDAGResponseSize). +// Fresh for 1 week, serve stale for up to 31 days while revalidating in the +// background. Uses 410 Gone which is heuristically cacheable per RFC 9110 and +// cached by CDNs (Cloudflare, Fastly) by default. +const cacheControlSizeLimit = "public, max-age=604800, stale-while-revalidate=2678400" + +// exceedsMaxUnixFSDAGResponseSize checks whether sz exceeds the configured +// MaxUnixFSDAGResponseSize. If it does, it writes a cacheable 410 Gone +// response and returns true. Returns false (no-op) when the limit is disabled +// or not exceeded. +func (i *handler) exceedsMaxUnixFSDAGResponseSize(w http.ResponseWriter, r *http.Request, sz int64) bool { + if i.config.MaxUnixFSDAGResponseSize > 0 && sz > i.config.MaxUnixFSDAGResponseSize { + err := fmt.Errorf("responses are not supported for content larger than %d bytes: for large content, run your own IPFS node (https://docs.ipfs.tech/install/)", i.config.MaxUnixFSDAGResponseSize) + w.Header().Set("Cache-Control", cacheControlSizeLimit) + i.webError(w, r, err, http.StatusGone) + return true + } + return false +} + +// exceedsMaxDeserializedResponseSize checks whether sz exceeds the configured +// MaxDeserializedResponseSize. If it does, it writes a cacheable 410 Gone +// response and returns true. Returns false (no-op) when the limit is disabled +// or not exceeded. +func (i *handler) exceedsMaxDeserializedResponseSize(w http.ResponseWriter, r *http.Request, sz int64) bool { + if i.config.MaxDeserializedResponseSize > 0 && sz > i.config.MaxDeserializedResponseSize { + err := fmt.Errorf("deserialized responses are not supported for content larger than %d bytes: for large content, run your own IPFS node (https://docs.ipfs.tech/install/)", i.config.MaxDeserializedResponseSize) + w.Header().Set("Cache-Control", cacheControlSizeLimit) + i.webError(w, r, err, http.StatusGone) + return true + } + return false +} diff --git a/gateway/handler_block.go b/gateway/handler_block.go index 27507272e..354897df8 100644 --- a/gateway/handler_block.go +++ b/gateway/handler_block.go @@ -44,6 +44,10 @@ func (i *handler) serveRawBlock(ctx context.Context, w http.ResponseWriter, r *h return false } + if i.exceedsMaxUnixFSDAGResponseSize(w, r, sz) { + return false + } + if !i.seekToStartOfFirstRange(w, r, data, sz) { return false } diff --git a/gateway/handler_car.go b/gateway/handler_car.go index f6e34984c..480af938b 100644 --- a/gateway/handler_car.go +++ b/gateway/handler_car.go @@ -73,6 +73,21 @@ func (i *handler) serveCAR(ctx context.Context, w http.ResponseWriter, r *http.R return false } + // Check DAG size limit before streaming the CAR. This requires a + // lightweight Head call; the root block is cached in the blockstore + // so GetCAR will not re-fetch it from the network. + if i.config.MaxUnixFSDAGResponseSize > 0 { + _, headResp, headErr := i.backend.Head(ctx, rq.immutablePath) + if headErr == nil { + sz := headResp.bytesSize + headResp.Close() + if i.exceedsMaxUnixFSDAGResponseSize(w, r, sz) { + return false + } + } + // If Head fails, let GetCAR surface the error with proper handling. + } + md, carFile, err := i.backend.GetCAR(ctx, rq.immutablePath, params) if !i.handleRequestErrors(w, r, rq.contentPath, err) { return false diff --git a/gateway/handler_codec.go b/gateway/handler_codec.go index 414c89900..c9228343b 100644 --- a/gateway/handler_codec.go +++ b/gateway/handler_codec.go @@ -78,6 +78,10 @@ func (i *handler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http return false } + if i.exceedsMaxUnixFSDAGResponseSize(w, r, blockSize) { + return false + } + return i.renderCodec(ctx, w, r, rq, blockSize, data) } diff --git a/gateway/handler_defaults.go b/gateway/handler_defaults.go index 60bbaf819..2b02b99d3 100644 --- a/gateway/handler_defaults.go +++ b/gateway/handler_defaults.go @@ -93,6 +93,25 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h setIpfsRootsHeader(w, rq, &pathMetadata) + // Check content size limits before streaming any data. + // Size comes from the UnixFS root block (no child blocks fetched). + // Applies to both regular and range requests: if the underlying file + // exceeds the limit, even a small byte range is rejected. + { + var sz int64 + if headResp != nil { + sz = headResp.bytesSize + } else { + sz = getResp.bytesSize + } + if i.exceedsMaxUnixFSDAGResponseSize(w, r, sz) { + return false + } + if i.exceedsMaxDeserializedResponseSize(w, r, sz) { + return false + } + } + // On deserialized responses, we prefer Last-Modified from pathMetadata // (mtime in UnixFS 1.5 DAG). This also applies to /ipns/, because value // from dag-pb, if present, is more meaningful than lastMod inferred from diff --git a/gateway/handler_tar.go b/gateway/handler_tar.go index 04d6a23ad..b98b561b0 100644 --- a/gateway/handler_tar.go +++ b/gateway/handler_tar.go @@ -28,6 +28,13 @@ func (i *handler) serveTAR(ctx context.Context, w http.ResponseWriter, r *http.R defer file.Close() setIpfsRootsHeader(w, rq, &pathMetadata) + + if i.config.MaxUnixFSDAGResponseSize > 0 { + if sz, err := file.Size(); err == nil && i.exceedsMaxUnixFSDAGResponseSize(w, r, sz) { + return false + } + } + rootCid := pathMetadata.LastSegment.RootCid() // Set Cache-Control and read optional Last-Modified time