diff --git a/fuzz/fuzz_targets/buf_independent.rs b/fuzz/fuzz_targets/buf_independent.rs index 1b531d26..4c7947db 100644 --- a/fuzz/fuzz_targets/buf_independent.rs +++ b/fuzz/fuzz_targets/buf_independent.rs @@ -26,10 +26,10 @@ use libfuzzer_sys::fuzz_target; use std::fmt::Debug; -use std::io::{BufRead, BufReader, Cursor, Seek}; +use std::io::{BufRead, BufReader, Cursor}; mod smal_buf { - use std::io::{BufRead, Cursor, Read, Seek}; + use std::io::{BufRead, Cursor, Read}; /// A reader that returns at most 1 byte in a single call to `read`. pub struct SmalBuf { @@ -62,22 +62,17 @@ mod smal_buf { self.inner.consume(amt); } } - impl Seek for SmalBuf { - fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { - self.inner.seek(pos) - } - } } mod intermittent_eofs { use std::cell::Cell; - use std::io::{BufRead, Read, Seek}; + use std::io::{BufRead, Read}; use std::rc::Rc; /// A reader that returns `std::io::ErrorKind::UnexpectedEof` errors in every other read. /// EOFs can be temporarily disabled and re-enabled later using the associated `EofController`. - pub struct IntermittentEofs { + pub struct IntermittentEofs { inner: R, /// Controls whether intermittent EOFs happen at all. @@ -88,7 +83,7 @@ mod intermittent_eofs { eof_soon: bool, } - impl IntermittentEofs { + impl IntermittentEofs { pub fn new(inner: R) -> Self { Self { inner, @@ -102,7 +97,7 @@ mod intermittent_eofs { } } - impl Read for IntermittentEofs { + impl Read for IntermittentEofs { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { if self.controller.are_intermittent_eofs_enabled() && self.eof_soon { self.eof_soon = false; @@ -118,7 +113,7 @@ mod intermittent_eofs { inner_result } } - impl BufRead for IntermittentEofs { + impl BufRead for IntermittentEofs { fn fill_buf(&mut self) -> std::io::Result<&[u8]> { self.inner.fill_buf() } @@ -127,11 +122,6 @@ mod intermittent_eofs { self.inner.consume(amt); } } - impl Seek for IntermittentEofs { - fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { - self.inner.seek(pos) - } - } pub struct EofController { are_intermittent_eofs_enabled: Cell, @@ -172,9 +162,6 @@ fuzz_target!(|data: &[u8]| { let _ = test_data(data); }); -trait BufReadSeek: BufRead + Seek {} -impl BufReadSeek for T where T: BufRead + Seek {} - #[inline(always)] fn test_data<'a>(data: &'a [u8]) -> Result<(), ()> { let baseline_reader = Box::new(Cursor::new(data)); @@ -186,7 +173,7 @@ fn test_data<'a>(data: &'a [u8]) -> Result<(), ()> { // `Decoder` used to internally wrap the provided reader with a `BufReader`. Now that it has // been removed, fuzzing would be far too slow if we didn't use a BufReader here. - let data_readers: Vec>> = vec![ + let data_readers: Vec>> = vec![ BufReader::new(baseline_reader), BufReader::new(byte_by_byte_reader), BufReader::new(intermittent_eofs_reader), diff --git a/src/decoder/input.rs b/src/decoder/input.rs new file mode 100644 index 00000000..06fa6e1b --- /dev/null +++ b/src/decoder/input.rs @@ -0,0 +1,78 @@ +use std::io::{BufRead, BufReader, Read, Take}; + +/// Buffered input for the PNG decoder, with optional boundary-limited refills. +/// +/// This trait is similar to [`BufRead`], but `fill_buf_limited` receives the maximum number of +/// bytes the decoder currently expects to consume before reaching the next PNG parser boundary. +/// +/// To avoid reading beyond PNG boundaries, see [`LimitBufReader`] for an implementation that +/// honors the provided `limit`. +pub trait LimitBufRead { + /// Same as [`BufRead::fill_buf`], but with a hint to cap reads to at most `limit` bytes. + /// + /// The `limit` boundary serves as a hint to avoid overshooting past `IEND`. Implementors + /// are free to discard this value: [`BufRead`] implements this trait discarding `limit` + /// entirely. + /// + /// Implementations that can enforce the `limit` should use it to cap the number of reads, i.e. + /// it is valid to return a slice bigger than `limit` if there are more than `limit` unconsumed + /// bytes. + fn fill_buf_limited(&mut self, limit: usize) -> std::io::Result<&[u8]>; + + /// Same as [`BufRead::consume`]. + fn consume(&mut self, amt: usize); +} + +/// Blanket implementation for all [`BufRead`] types. +/// +/// [`LimitBufRead`] replaces the [`BufRead`] trait bound historically required by +/// [`Decoder`](crate::Decoder) and [`Reader`](crate::Reader) in a backwards-compatible manner. +/// +/// This implementation deliberately ignores the `limit` dynamic boundary contract, aggressively +/// pre-fetching data into standard memory buffers to maximize standard I/O caching performance. +/// However, it does not prevent the underlying reader from being advanced past PNG boundaries. +impl LimitBufRead for T { + fn fill_buf_limited(&mut self, _limit: usize) -> std::io::Result<&[u8]> { + self.fill_buf() + } + + fn consume(&mut self, amt: usize) { + BufRead::consume(self, amt); + } +} + +/// A wrapper for [`Read`] streams that implements controlled buffering to prevent reading past +/// `IEND` chunk boundaries. +/// +/// # Performance note +/// +/// Because this wrapper caps reads strictly to what the state machine expects, it can trigger more +/// frequent operating system read syscalls in files characterized by many small chunks. This can +/// cause context-switching regressions compared to standard large-block pre-fetching (e.g. +/// `std::io::BufReader`). [`LimitBufReader`] should only be used when boundary safety is required. +pub struct LimitBufReader(BufReader>); + +impl LimitBufReader { + /// Creates a new [`LimitBufReader`] wrapping the given [`Read`] stream with the default + /// [`BufReader`] capacity. + pub fn new(inner: R) -> Self { + Self(BufReader::new(inner.take(u64::MAX))) + } + + /// Creates a new [`LimitBufReader`] wrapping the given [`Read`] stream with a specified + /// `capacity` limit. + pub fn with_capacity(capacity: usize, inner: R) -> Self { + Self(BufReader::with_capacity(capacity, inner.take(u64::MAX))) + } +} + +impl LimitBufRead for LimitBufReader { + fn fill_buf_limited(&mut self, limit: usize) -> std::io::Result<&[u8]> { + self.0.get_mut().set_limit(limit as u64); + self.0.fill_buf() + } + + fn consume(&mut self, amt: usize) { + BufRead::consume(&mut self.0, amt); + } +} diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs index 17f72791..b05b6ad3 100644 --- a/src/decoder/mod.rs +++ b/src/decoder/mod.rs @@ -1,3 +1,4 @@ +pub(crate) mod input; mod interlace_info; mod read_decoder; pub(crate) mod stream; @@ -10,7 +11,6 @@ use self::stream::{DecodeOptions, DecodingError, FormatErrorInner}; use self::transform::{create_transform_fn, TransformFn}; use self::unfiltering_buffer::UnfilteringBuffer; -use std::io::{BufRead, Seek}; use std::mem; use crate::adam7::Adam7Info; @@ -20,6 +20,7 @@ use crate::common::{ use crate::FrameControl; pub use zlib::{UnfilterBuf, UnfilterRegion}; +pub use self::input::{LimitBufRead, LimitBufReader}; pub use interlace_info::InterlaceInfo; use interlace_info::InterlaceInfoIter; @@ -87,7 +88,7 @@ impl Default for Limits { } /// PNG Decoder -pub struct Decoder { +pub struct Decoder { read_decoder: ReadDecoder, /// Output transformations transform: Transformations, @@ -122,7 +123,7 @@ impl<'data> Row<'data> { } } -impl Decoder { +impl Decoder { /// Create a new decoder configuration with default limits. pub fn new(r: R) -> Decoder { Decoder::new_with_limits(r, Limits::default()) @@ -291,7 +292,7 @@ impl Decoder { /// PNG reader (mostly high-level interface) /// /// Provides a high level that iterates over lines or whole images. -pub struct Reader { +pub struct Reader { decoder: ReadDecoder, bpp: BytesPerPixel, subframe: SubframeInfo, @@ -327,7 +328,7 @@ struct SubframeInfo { consumed_and_flushed: bool, } -impl Reader { +impl Reader { /// Advances to the start of the next animation frame and /// returns a reference to the [`FrameControl`] info that describes it. /// Skips and discards the image data of the previous frame if necessary. diff --git a/src/decoder/read_decoder.rs b/src/decoder/read_decoder.rs index 0d187d47..59936c5c 100644 --- a/src/decoder/read_decoder.rs +++ b/src/decoder/read_decoder.rs @@ -2,7 +2,8 @@ use super::stream::{DecodeOptions, Decoded, DecodingError, FormatErrorInner, Str use super::zlib::UnfilterBuf; use super::Limits; -use std::io::{BufRead, ErrorKind, Read, Seek}; +use super::input::LimitBufRead; +use std::io::ErrorKind; use crate::chunk; use crate::common::Info; @@ -16,12 +17,12 @@ use crate::common::Info; /// * `decode_image_data` - reading from `IDAT` / `fdAT` sequence into `Vec` /// * `finish_decoding_image_data()` - discarding remaining data from `IDAT` / `fdAT` sequence /// * `read_until_end_of_input()` - reading until `IEND` chunk -pub(crate) struct ReadDecoder { +pub(crate) struct ReadDecoder { reader: R, decoder: StreamingDecoder, } -impl ReadDecoder { +impl ReadDecoder { pub fn new(r: R) -> Self { Self { reader: r, @@ -64,8 +65,9 @@ impl ReadDecoder { image_data: Option<&mut UnfilterBuf<'_>>, ) -> Result { let (consumed, result) = { - let buf = self.reader.fill_buf()?; - if buf.is_empty() { + let limit = self.decoder.expected_read_limit()?; + let buf = self.reader.fill_buf_limited(limit)?; + if limit > 0 && buf.is_empty() { return Err(DecodingError::IoError(ErrorKind::UnexpectedEof.into())); } self.decoder.update(buf, image_data)? diff --git a/src/decoder/stream.rs b/src/decoder/stream.rs index 16d3305f..ba6bd4fc 100644 --- a/src/decoder/stream.rs +++ b/src/decoder/stream.rs @@ -661,6 +661,32 @@ impl StreamingDecoder { self.info.as_ref() } + /// Calculates the maximum number of bytes the streaming decoder expects to consume in its + /// current state before transitioning to the next state. + /// + /// This value is used by [`LimitBufReader`](crate::decoder::LimitBufReader) to limit buffer + /// refills, avoiding aggressive pre-fetching beyond the current chunk boundary to prevent + /// overshoot past the `IEND` chunk. + pub(crate) fn expected_read_limit(&self) -> Result { + self.state + .as_ref() + .map(|state| match state { + State::U32 { + accumulated_count, .. + } => 4 - *accumulated_count, + State::ReadChunkData(_) | State::ImageData(_) => { + if self.current_chunk.remaining == 0 { + 4 // For the CRC that we will transition to + } else { + self.current_chunk.remaining as usize + } + } + }) + .ok_or_else(|| { + DecodingError::Parameter(ParameterErrorKind::PolledAfterFatalError.into()) + }) + } + pub fn set_ignore_text_chunk(&mut self, ignore_text_chunk: bool) { self.decode_options.set_ignore_text_chunk(ignore_text_chunk); } @@ -3049,7 +3075,7 @@ mod tests { Decoder::new(Cursor::new(png)).read_info().unwrap() } - fn get_fctl_sequence_number(reader: &Reader) -> u32 { + fn get_fctl_sequence_number(reader: &Reader) -> u32 { reader .info() .frame_control diff --git a/src/encoder.rs b/src/encoder.rs index bbdd2dbf..3c9d4e93 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -674,8 +674,6 @@ impl Writer { /// the length of `data` can't be parsed as a `u32` though the length of the chunk data should /// not exceed `i32::MAX` or 2,147,483,647. pub fn write_chunk(&mut self, name: ChunkType, data: &[u8]) -> Result<()> { - use std::convert::TryFrom; - if u32::try_from(data.len()).map_or(true, |length| length > i32::MAX as u32) { let kind = FormatErrorKind::WrittenTooMuch(data.len() - i32::MAX as usize); return Err(EncodingError::Format(kind.into())); diff --git a/src/lib.rs b/src/lib.rs index 05ae4d7f..30f4274a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -85,7 +85,9 @@ pub use crate::adam7::{ pub use crate::adam7::{Adam7Info, Adam7Variant}; pub use crate::common::*; pub use crate::decoder::stream::{DecodeOptions, Decoded, DecodingError, StreamingDecoder}; -pub use crate::decoder::{Decoder, InterlaceInfo, InterlacedRow, Limits, OutputInfo, Reader}; +pub use crate::decoder::{ + Decoder, InterlaceInfo, InterlacedRow, LimitBufRead, LimitBufReader, Limits, OutputInfo, Reader, +}; pub use crate::decoder::{UnfilterBuf, UnfilterRegion}; pub use crate::encoder::{Encoder, EncodingError, StreamWriter, Writer}; pub use crate::filter::Filter; diff --git a/tests/limit_buf_read.rs b/tests/limit_buf_read.rs new file mode 100644 index 00000000..dbdfdd00 --- /dev/null +++ b/tests/limit_buf_read.rs @@ -0,0 +1,103 @@ +use std::fs; +use std::io::{self, BufReader, Cursor, Error, ErrorKind, Read}; + +use png::{Decoder, LimitBufRead, LimitBufReader}; + +/// A mock [`Read`] stream that rejects reads over the given `limit`. +/// +/// This replicates integrations where a buffering layer above a blocking pipe attempts to satisfy +/// the requested size instead of returning a short read. +struct BlockOnLimit { + inner: Cursor>, + limit: usize, + read_bytes: usize, +} + +impl BlockOnLimit { + fn new(data: Vec, limit: usize) -> Self { + Self { + inner: Cursor::new(data), + limit, + read_bytes: 0, + } + } +} + +impl Read for BlockOnLimit { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let remaining = self.limit - self.read_bytes; + if buf.len() > remaining { + return Err(Error::new( + ErrorKind::WouldBlock, + "Blocked on eager overshoot request", + )); + } + let n = self.inner.read(buf)?; + self.read_bytes += n; + Ok(n) + } +} + +fn decode_image(reader: R) -> Result<(), png::DecodingError> { + let decoder = Decoder::new(reader); + let mut reader = decoder.read_info()?; + let mut buf = vec![0; reader.output_buffer_size().unwrap()]; + reader.next_frame(&mut buf)?; + reader.finish()?; + Ok(()) +} + +#[test] +fn test_buf_reader_overshoots() { + let png_data = fs::read("tests/pngsuite/basi0g01.png").unwrap(); + let mut concat_data = Vec::new(); + concat_data.extend_from_slice(&png_data); + concat_data.extend_from_slice(&png_data); + + let mut cursor = Cursor::new(concat_data); + let buf_reader = BufReader::new(&mut cursor); + decode_image(buf_reader).expect("Standard BufReader decode failed"); + + assert_eq!(cursor.position(), (png_data.len() * 2) as u64); +} + +#[test] +fn test_buf_reader_blocks_on_blocking_source() { + let png_data = fs::read("tests/pngsuite/basi0g01.png").unwrap(); + let png_len = png_data.len(); + let blocking_source = BlockOnLimit::new(png_data, png_len); + + let buf_reader = BufReader::new(blocking_source); + let res = decode_image(buf_reader); + + assert!(res.is_err()); + let err = res.unwrap_err(); + assert!(matches!( + err, + png::DecodingError::IoError(ref io_err) if io_err.kind() == ErrorKind::WouldBlock + )); +} + +#[test] +fn test_limit_buf_reader_does_not_overshoot() { + let png_data = fs::read("tests/pngsuite/basi0g01.png").unwrap(); + let mut concat_data = Vec::new(); + concat_data.extend_from_slice(&png_data); + concat_data.extend_from_slice(&png_data); + + let mut cursor = Cursor::new(concat_data); + let limited_reader = LimitBufReader::new(&mut cursor); + decode_image(limited_reader).expect("LimitBufReader decode failed"); + + assert_eq!(cursor.position(), png_data.len() as u64); +} + +#[test] +fn test_limit_buf_reader_does_not_block_on_blocking_source() { + let png_data = fs::read("tests/pngsuite/basi0g01.png").unwrap(); + let png_len = png_data.len(); + let blocking_source = BlockOnLimit::new(png_data, png_len); + + let limited_reader = LimitBufReader::new(blocking_source); + decode_image(limited_reader).expect("LimitBufReader decode failed"); +} diff --git a/tests/partial_decode.rs b/tests/partial_decode.rs index 455d38bb..ed6c7e94 100644 --- a/tests/partial_decode.rs +++ b/tests/partial_decode.rs @@ -3,10 +3,10 @@ use std::io::Write; mod pipe { use std::cell::RefCell; - use std::io::{BufRead, Cursor, Read, Result, Seek, SeekFrom, Write}; + use std::io::{BufRead, Cursor, Read, Result, Write}; use std::rc::Rc; - pub fn create() -> (impl BufRead + Seek, impl Write) { + pub fn create() -> (impl BufRead, impl Write) { let write_end = Pipe { buf: Rc::new(RefCell::new(Cursor::new(Vec::new()))), long_lived_fill_buf: Vec::new(), @@ -46,12 +46,6 @@ mod pipe { } } - impl Seek for Pipe { - fn seek(&mut self, pos: SeekFrom) -> Result { - self.buf.borrow_mut().seek(pos) - } - } - impl Write for Pipe { fn write(&mut self, buf: &[u8]) -> Result { self.buf.borrow_mut().get_mut().extend_from_slice(buf);