|
| 1 | +//go:build linux |
| 2 | + |
| 3 | +package overlay |
| 4 | + |
| 5 | +import ( |
| 6 | + "archive/tar" |
| 7 | + "bytes" |
| 8 | + "errors" |
| 9 | + "fmt" |
| 10 | + "io" |
| 11 | + "os" |
| 12 | + "path/filepath" |
| 13 | + "strings" |
| 14 | + |
| 15 | + "github.com/sirupsen/logrus" |
| 16 | + "go.podman.io/storage/pkg/archive" |
| 17 | + "go.podman.io/storage/pkg/chrootarchive" |
| 18 | + "go.podman.io/storage/pkg/directory" |
| 19 | + "go.podman.io/storage/pkg/fileutils" |
| 20 | + "go.podman.io/storage/pkg/idtools" |
| 21 | + "go.podman.io/storage/pkg/splitfdstream" |
| 22 | + "go.podman.io/storage/pkg/unshare" |
| 23 | + "golang.org/x/sys/unix" |
| 24 | +) |
| 25 | + |
| 26 | +// ErrSplitFDStreamNotSupported is returned when splitfdstream operations |
| 27 | +// are not supported for a layer (e.g., composefs layers). |
| 28 | +var ErrSplitFDStreamNotSupported = errors.New("splitfdstream not supported for this layer") |
| 29 | + |
| 30 | +// untarSplitFDStream defines the splitfdstream untar method (through chrootarchive for security isolation) |
| 31 | +var untarSplitFDStream = chrootarchive.UnpackSplitFDStream |
| 32 | + |
| 33 | +// ApplySplitFDStream applies changes from a split FD stream to the specified layer. |
| 34 | +// It iterates over the splitfdstream entries and extracts them using |
| 35 | +// archive.UnpackFromIterator, which enables reflink-based copying for |
| 36 | +// external file descriptor references. |
| 37 | +// This API is experimental and can be changed without bumping the major version number. |
| 38 | +func (d *Driver) ApplySplitFDStream(options *splitfdstream.ApplySplitFDStreamOpts) (int64, error) { |
| 39 | + if options == nil { |
| 40 | + return 0, fmt.Errorf("options cannot be nil") |
| 41 | + } |
| 42 | + if err := options.Validate(); err != nil { |
| 43 | + return 0, fmt.Errorf("invalid options: %w", err) |
| 44 | + } |
| 45 | + |
| 46 | + var diffPath string |
| 47 | + |
| 48 | + if options.StagingDir != "" { |
| 49 | + diffPath = options.StagingDir |
| 50 | + logrus.Debugf("overlay: ApplySplitFDStream applying to staging dir %s", diffPath) |
| 51 | + } else { |
| 52 | + dir := d.dir(options.LayerID) |
| 53 | + if err := fileutils.Exists(dir); err != nil { |
| 54 | + return 0, fmt.Errorf("layer %s does not exist: %w", options.LayerID, err) |
| 55 | + } |
| 56 | + |
| 57 | + composefsData := d.getComposefsData(options.LayerID) |
| 58 | + if err := fileutils.Exists(composefsData); err == nil { |
| 59 | + return 0, fmt.Errorf("%w: layer %s uses composefs", ErrSplitFDStreamNotSupported, options.LayerID) |
| 60 | + } |
| 61 | + |
| 62 | + var err error |
| 63 | + diffPath, err = d.getDiffPath(options.LayerID) |
| 64 | + if err != nil { |
| 65 | + return 0, fmt.Errorf("failed to get diff path for layer %s: %w", options.LayerID, err) |
| 66 | + } |
| 67 | + |
| 68 | + logrus.Debugf("overlay: ApplySplitFDStream applying to layer %s at %s", options.LayerID, diffPath) |
| 69 | + } |
| 70 | + |
| 71 | + idMappings := options.IDMappings |
| 72 | + if idMappings == nil { |
| 73 | + idMappings = &idtools.IDMappings{} |
| 74 | + } |
| 75 | + |
| 76 | + if err := untarSplitFDStream(options.Stream, options.FileDescriptors, diffPath, &archive.TarOptions{ |
| 77 | + UIDMaps: idMappings.UIDs(), |
| 78 | + GIDMaps: idMappings.GIDs(), |
| 79 | + IgnoreChownErrors: options.IgnoreChownErrors || d.options.ignoreChownErrors, |
| 80 | + WhiteoutFormat: d.getWhiteoutFormat(), |
| 81 | + ForceMask: options.ForceMask, |
| 82 | + InUserNS: unshare.IsRootless(), |
| 83 | + }); err != nil { |
| 84 | + return 0, fmt.Errorf("failed to apply split FD stream: %w", err) |
| 85 | + } |
| 86 | + |
| 87 | + return directory.Size(diffPath) |
| 88 | +} |
| 89 | + |
| 90 | +// GetSplitFDStream generates a split FD stream from the layer differences. |
| 91 | +// The returned ReadCloser contains the splitfdstream-formatted data, and the |
| 92 | +// []*os.File slice contains the external file descriptors referenced by the stream. |
| 93 | +// Regular files are passed as external file descriptors for reflink-based copying. |
| 94 | +// The caller is responsible for closing both the ReadCloser and all file descriptors. |
| 95 | +// This API is experimental and can be changed without bumping the major version number. |
| 96 | +func (d *Driver) GetSplitFDStream(id, parent string, options *splitfdstream.GetSplitFDStreamOpts) (io.ReadCloser, []*os.File, error) { |
| 97 | + if options == nil { |
| 98 | + return nil, nil, fmt.Errorf("options cannot be nil") |
| 99 | + } |
| 100 | + |
| 101 | + dir := d.dir(id) |
| 102 | + if err := fileutils.Exists(dir); err != nil { |
| 103 | + return nil, nil, fmt.Errorf("layer %s does not exist: %w", id, err) |
| 104 | + } |
| 105 | + |
| 106 | + // Check if this is a composefs layer - splitfdstream is not supported for composefs yet |
| 107 | + composefsData := d.getComposefsData(id) |
| 108 | + if err := fileutils.Exists(composefsData); err == nil { |
| 109 | + return nil, nil, fmt.Errorf("%w: layer %s uses composefs", ErrSplitFDStreamNotSupported, id) |
| 110 | + } else if !errors.Is(err, unix.ENOENT) { |
| 111 | + return nil, nil, err |
| 112 | + } |
| 113 | + |
| 114 | + logrus.Debugf("overlay: GetSplitFDStream for layer %s with parent %s", id, parent) |
| 115 | + |
| 116 | + // Set up ID mappings |
| 117 | + idMappings := options.IDMappings |
| 118 | + if idMappings == nil { |
| 119 | + idMappings = &idtools.IDMappings{} |
| 120 | + } |
| 121 | + |
| 122 | + // Get the diff path for file access (used for FD references) |
| 123 | + diffPath, err := d.getDiffPath(id) |
| 124 | + if err != nil { |
| 125 | + return nil, nil, fmt.Errorf("failed to get diff path for layer %s: %w", id, err) |
| 126 | + } |
| 127 | + |
| 128 | + // Use Diff() to generate the tar stream - it handles naiveDiff |
| 129 | + // and all the edge cases correctly. |
| 130 | + tarStream, err := d.Diff(id, idMappings, parent, nil, options.MountLabel) |
| 131 | + if err != nil { |
| 132 | + return nil, nil, fmt.Errorf("failed to generate diff for layer %s: %w", id, err) |
| 133 | + } |
| 134 | + defer tarStream.Close() |
| 135 | + |
| 136 | + // Buffer the splitfdstream data in memory |
| 137 | + var buf bytes.Buffer |
| 138 | + var fds []*os.File |
| 139 | + writer := splitfdstream.NewWriter(&buf) |
| 140 | + |
| 141 | + // Convert tar stream to splitfdstream |
| 142 | + err = d.convertTarToSplitFDStream(tarStream, writer, diffPath, &fds) |
| 143 | + if err != nil { |
| 144 | + // Close any opened FDs on error |
| 145 | + for _, f := range fds { |
| 146 | + f.Close() |
| 147 | + } |
| 148 | + return nil, nil, fmt.Errorf("failed to convert tar to splitfdstream: %w", err) |
| 149 | + } |
| 150 | + |
| 151 | + logrus.Debugf("overlay: GetSplitFDStream complete for layer %s: streamSize=%d, numFDs=%d", id, buf.Len(), len(fds)) |
| 152 | + return io.NopCloser(bytes.NewReader(buf.Bytes())), fds, nil |
| 153 | +} |
| 154 | + |
| 155 | +// convertTarToSplitFDStream converts a tar stream to a splitfdstream by parsing |
| 156 | +// tar headers and replacing file content with file descriptor references. |
| 157 | +func (d *Driver) convertTarToSplitFDStream(tarStream io.ReadCloser, writer *splitfdstream.SplitFDStreamWriter, diffPath string, fds *[]*os.File) error { |
| 158 | + tr := tar.NewReader(tarStream) |
| 159 | + |
| 160 | + // Open diff directory for safe file access |
| 161 | + diffDirFd, err := unix.Open(diffPath, unix.O_RDONLY|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) |
| 162 | + if err != nil { |
| 163 | + return fmt.Errorf("failed to open diff directory %s: %w", diffPath, err) |
| 164 | + } |
| 165 | + defer unix.Close(diffDirFd) |
| 166 | + |
| 167 | + for { |
| 168 | + header, err := tr.Next() |
| 169 | + if err == io.EOF { |
| 170 | + break |
| 171 | + } |
| 172 | + if err != nil { |
| 173 | + return fmt.Errorf("failed to read tar header: %w", err) |
| 174 | + } |
| 175 | + |
| 176 | + // Write the tar header as inline data |
| 177 | + if err := d.writeTarHeaderInline(writer, header); err != nil { |
| 178 | + return fmt.Errorf("failed to write tar header for %s: %w", header.Name, err) |
| 179 | + } |
| 180 | + |
| 181 | + // Handle file content |
| 182 | + if header.Typeflag == tar.TypeReg && header.Size > 0 { |
| 183 | + // Try to open file and write FD reference |
| 184 | + ok, err := d.tryWriteFileAsFDReference(writer, diffDirFd, header, fds) |
| 185 | + if err != nil { |
| 186 | + return fmt.Errorf("failed to write FD reference for %s: %w", header.Name, err) |
| 187 | + } |
| 188 | + if ok { |
| 189 | + // Skip the content in the tar stream since we're using FD reference |
| 190 | + if _, err := io.CopyN(io.Discard, tr, header.Size); err != nil { |
| 191 | + return fmt.Errorf("failed to skip content for %s: %w", header.Name, err) |
| 192 | + } |
| 193 | + } else { |
| 194 | + // File not found in diff directory (e.g., naiveDiff was used), |
| 195 | + // write content inline from the tar stream |
| 196 | + content := make([]byte, header.Size) |
| 197 | + if _, err := io.ReadFull(tr, content); err != nil { |
| 198 | + return fmt.Errorf("failed to read content for %s: %w", header.Name, err) |
| 199 | + } |
| 200 | + if err := writer.WriteInline(content); err != nil { |
| 201 | + return fmt.Errorf("failed to write inline content for %s: %w", header.Name, err) |
| 202 | + } |
| 203 | + } |
| 204 | + } |
| 205 | + // For non-regular files or empty files, there's no content to handle |
| 206 | + } |
| 207 | + |
| 208 | + return nil |
| 209 | +} |
| 210 | + |
| 211 | +// writeTarHeaderInline writes a tar header as inline data to the splitfdstream. |
| 212 | +func (d *Driver) writeTarHeaderInline(writer *splitfdstream.SplitFDStreamWriter, header *tar.Header) error { |
| 213 | + var headerBuf bytes.Buffer |
| 214 | + tw := tar.NewWriter(&headerBuf) |
| 215 | + if err := tw.WriteHeader(header); err != nil { |
| 216 | + return fmt.Errorf("failed to serialize tar header: %w", err) |
| 217 | + } |
| 218 | + |
| 219 | + headerBytes := headerBuf.Bytes() |
| 220 | + if len(headerBytes) > 0 { |
| 221 | + if err := writer.WriteInline(headerBytes); err != nil { |
| 222 | + return fmt.Errorf("failed to write inline header: %w", err) |
| 223 | + } |
| 224 | + } |
| 225 | + |
| 226 | + return nil |
| 227 | +} |
| 228 | + |
| 229 | +// tryWriteFileAsFDReference tries to open a file and write an FD reference to the splitfdstream. |
| 230 | +// Returns (true, nil) if the file was successfully written as FD reference. |
| 231 | +// Returns (false, nil) if the file doesn't exist in the diff directory (caller should write inline). |
| 232 | +// Returns (false, error) on other errors. |
| 233 | +func (d *Driver) tryWriteFileAsFDReference(writer *splitfdstream.SplitFDStreamWriter, diffDirFd int, header *tar.Header, fds *[]*os.File) (bool, error) { |
| 234 | + // Clean the file name to prevent path traversal |
| 235 | + cleanName := filepath.Clean(header.Name) |
| 236 | + if strings.Contains(cleanName, "..") { |
| 237 | + return false, fmt.Errorf("invalid file path: %s", header.Name) |
| 238 | + } |
| 239 | + |
| 240 | + // Open the file safely using openat2 |
| 241 | + fd, err := unix.Openat2(diffDirFd, cleanName, &unix.OpenHow{ |
| 242 | + Flags: unix.O_RDONLY | unix.O_CLOEXEC, |
| 243 | + Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_BENEATH, |
| 244 | + }) |
| 245 | + if err != nil { |
| 246 | + // File not found in diff directory - caller should write inline |
| 247 | + return false, nil |
| 248 | + } |
| 249 | + |
| 250 | + // Verify it's still a regular file |
| 251 | + var fdStat unix.Stat_t |
| 252 | + if err := unix.Fstat(fd, &fdStat); err != nil { |
| 253 | + unix.Close(fd) |
| 254 | + return false, fmt.Errorf("failed to fstat opened file %s: %w", cleanName, err) |
| 255 | + } |
| 256 | + if fdStat.Mode&unix.S_IFMT != unix.S_IFREG { |
| 257 | + unix.Close(fd) |
| 258 | + return false, fmt.Errorf("file %s is not a regular file", cleanName) |
| 259 | + } |
| 260 | + |
| 261 | + // Create os.File from fd |
| 262 | + f := os.NewFile(uintptr(fd), cleanName) |
| 263 | + if f == nil { |
| 264 | + unix.Close(fd) |
| 265 | + return false, fmt.Errorf("failed to create File from fd for %s", cleanName) |
| 266 | + } |
| 267 | + |
| 268 | + fdIndex := len(*fds) |
| 269 | + *fds = append(*fds, f) |
| 270 | + |
| 271 | + // Write FD reference |
| 272 | + if err := writer.WriteExternal(fdIndex); err != nil { |
| 273 | + return false, fmt.Errorf("failed to write external FD reference: %w", err) |
| 274 | + } |
| 275 | + |
| 276 | + return true, nil |
| 277 | +} |
0 commit comments