From 2a619fe15f7d8819856b135ec33a05eac3708186 Mon Sep 17 00:00:00 2001 From: Lukasz Anforowicz Date: Thu, 31 Oct 2024 19:57:33 +0000 Subject: [PATCH 1/2] Add a benchmark covering `Reader.next_frame_info`. --- Cargo.toml | 13 ++++++++---- benches/next_frame_info.rs | 43 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 benches/next_frame_info.rs diff --git a/Cargo.toml b/Cargo.toml index 49d47ad5..6cd7ca2f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,13 +48,18 @@ name = "decoder" harness = false [[bench]] -path = "benches/unfilter.rs" -name = "unfilter" +path = "benches/expand_paletted.rs" +name = "expand_paletted" harness = false required-features = ["benchmarks"] [[bench]] -path = "benches/expand_paletted.rs" -name = "expand_paletted" +path = "benches/next_frame_info.rs" +name = "next_frame_info" +harness = false + +[[bench]] +path = "benches/unfilter.rs" +name = "unfilter" harness = false required-features = ["benchmarks"] diff --git a/benches/next_frame_info.rs b/benches/next_frame_info.rs new file mode 100644 index 00000000..1fa61c7b --- /dev/null +++ b/benches/next_frame_info.rs @@ -0,0 +1,43 @@ +use std::fs; +use std::path::Path; + +use criterion::{ + criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, +}; +use png::Decoder; + +criterion_group! {benches, load_all} +criterion_main!(benches); + +fn load_all(c: &mut Criterion) { + let mut g = c.benchmark_group("next_frame_info"); + bench_file(&mut g, Path::new("tests/animated/basic_f20.png"), 18, 35); +} + +fn bench_file( + g: &mut BenchmarkGroup, + png_path: &Path, + number_of_frames_to_skip: usize, + expected_fctl_sequence_number: u32, +) { + let data = fs::read(png_path).unwrap(); + let name = format!("{}: {} skips", png_path.display(), number_of_frames_to_skip); + g.bench_with_input(&name, data.as_slice(), |b, data| { + b.iter(|| { + let decoder = Decoder::new(data); + let mut reader = decoder.read_info().unwrap(); + for _ in 0..number_of_frames_to_skip { + reader.next_frame_info().unwrap(); + } + assert_eq!( + reader + .info() + .frame_control + .as_ref() + .unwrap() + .sequence_number, + expected_fctl_sequence_number, + ); + }) + }); +} From 84b2d3789ad632b578bd284bb27b05d3a40f3445 Mon Sep 17 00:00:00 2001 From: Lukasz Anforowicz Date: Fri, 1 Nov 2024 21:14:31 +0000 Subject: [PATCH 2/2] Performance: Avoid spending time decompressing skipped frames. This commit adds an internal `DecodeOptions::skip_frame_decoding` flag and sets it to true from `ReadDecoder::finish_decoding_image_data`. This results in the following performance gains when using the `next_frame_info` public API to skip frames: `change: [-94.186% -94.170% -94.157%] (p = 0.00 < 0.05)` This commit is mainly motivated by https://crbug.com/371060427 and https://github.com/image-rs/image-png/issues/510. Hat tip to @kornelski for suggesting this approach in https://github.com/image-rs/image-png/issues/510#issuecomment-2452454387 --- src/decoder/mod.rs | 6 +++ src/decoder/read_decoder.rs | 50 +++++++++++++++++++++ src/decoder/stream.rs | 87 +++++++++++++++++++++++++++++++++++-- 3 files changed, 140 insertions(+), 3 deletions(-) diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs index 7c0759c3..703e7827 100644 --- a/src/decoder/mod.rs +++ b/src/decoder/mod.rs @@ -325,6 +325,12 @@ impl Reader { /// Returns a [`ParameterError`] when there are no more animation frames. /// To avoid this the caller can check if [`Info::animation_control`] exists /// and consult [`AnimationControl::num_frames`]. + /// + /// Note that this method may end up skipping and discarding some image data. + /// When the whole image frame is skipped in this way, then (for better runtime + /// performance) the image data is not decompressed. This may result in some + /// format errors being undetected (e.g. Adler 32 checksums would not be checked + /// in this case). pub fn next_frame_info(&mut self) -> Result<&FrameControl, DecodingError> { let remaining_frames = if self.subframe.consumed_and_flushed { self.remaining_frames diff --git a/src/decoder/read_decoder.rs b/src/decoder/read_decoder.rs index 44bcb48d..aa2cc684 100644 --- a/src/decoder/read_decoder.rs +++ b/src/decoder/read_decoder.rs @@ -20,6 +20,7 @@ use crate::common::Info; pub(crate) struct ReadDecoder { reader: BufReader, decoder: StreamingDecoder, + state: State, } impl ReadDecoder { @@ -27,6 +28,7 @@ impl ReadDecoder { Self { reader: BufReader::with_capacity(CHUNK_BUFFER_SIZE, r), decoder: StreamingDecoder::new(), + state: State::Initial, } } @@ -37,6 +39,7 @@ impl ReadDecoder { Self { reader: BufReader::with_capacity(CHUNK_BUFFER_SIZE, r), decoder, + state: State::Initial, } } @@ -106,6 +109,10 @@ impl ReadDecoder { /// /// Prerequisite: **Not** within `IDAT` / `fdAT` chunk sequence. pub fn read_until_image_data(&mut self) -> Result<(), DecodingError> { + // Caller should guarantee that we are in a state that meets function prerequisites. + debug_assert!(self.state != State::StartOfImageData); + debug_assert!(self.state != State::MiddleOfImageData); + loop { match self.decode_next_without_image_data()? { Decoded::ChunkBegin(_, chunk::IDAT) | Decoded::ChunkBegin(_, chunk::fdAT) => break, @@ -119,6 +126,9 @@ impl ReadDecoder { _ => {} } } + + self.state = State::StartOfImageData; + self.decoder.set_skip_frame_decoding(false); Ok(()) } @@ -130,6 +140,13 @@ impl ReadDecoder { &mut self, image_data: &mut Vec, ) -> Result { + // Caller should guarantee that we are in a state that meets function prerequisites. + debug_assert!(matches!( + self.state, + State::StartOfImageData | State::MiddleOfImageData + )); + + self.state = State::MiddleOfImageData; match self.decode_next(image_data)? { Decoded::ImageData => Ok(ImageDataCompletionStatus::ExpectingMoreData), Decoded::ImageDataFlushed => Ok(ImageDataCompletionStatus::Done), @@ -148,9 +165,22 @@ impl ReadDecoder { /// /// Prerequisite: Input is currently positioned within `IDAT` / `fdAT` chunk sequence. pub fn finish_decoding_image_data(&mut self) -> Result<(), DecodingError> { + match self.state { + // If skipping image data from start to end, then bypass decompression. + State::StartOfImageData => self.decoder.set_skip_frame_decoding(true), + + // Otherwise, keep passing the remainder of the data to the decompressor + // (e.g. to detect adler32 errors if this is what `DecodeOptions` ask for). + State::MiddleOfImageData => (), + + // Caller should guarantee that we are in a state that meets function prerequisites. + _ => unreachable!(), + } + loop { let mut to_be_discarded = vec![]; if let ImageDataCompletionStatus::Done = self.decode_image_data(&mut to_be_discarded)? { + self.state = State::OutsideOfImageData; return Ok(()); } } @@ -160,10 +190,12 @@ impl ReadDecoder { /// /// Prerequisite: `IEND` chunk hasn't been reached yet. pub fn read_until_end_of_input(&mut self) -> Result<(), DecodingError> { + debug_assert!(self.state != State::EndOfInput); while !matches!( self.decode_next_and_discard_image_data()?, Decoded::ImageEnd ) {} + self.state = State::EndOfInput; Ok(()) } @@ -177,3 +209,21 @@ pub(crate) enum ImageDataCompletionStatus { ExpectingMoreData, Done, } + +#[derive(Debug, Eq, PartialEq)] +enum State { + /// Before the first `IDAT`. + Initial, + + /// At the start of a new `IDAT` or `fdAT` sequence. + StartOfImageData, + + /// In the middle of an `IDAT` or `fdAT` sequence. + MiddleOfImageData, + + /// Outside of an `IDAT` or `fdAT` sequence. + OutsideOfImageData, + + /// After consuming `IEND`. + EndOfInput, +} diff --git a/src/decoder/stream.rs b/src/decoder/stream.rs index d0b7cb5c..abda73ea 100644 --- a/src/decoder/stream.rs +++ b/src/decoder/stream.rs @@ -428,6 +428,7 @@ pub struct DecodeOptions { ignore_text_chunk: bool, ignore_iccp_chunk: bool, skip_ancillary_crc_failures: bool, + skip_frame_decoding: bool, } impl Default for DecodeOptions { @@ -438,6 +439,7 @@ impl Default for DecodeOptions { ignore_text_chunk: false, ignore_iccp_chunk: false, skip_ancillary_crc_failures: true, + skip_frame_decoding: false, } } } @@ -615,6 +617,10 @@ impl StreamingDecoder { .set_skip_ancillary_crc_failures(skip_ancillary_crc_failures) } + pub(crate) fn set_skip_frame_decoding(&mut self, skip_frame_decoding: bool) { + self.decode_options.skip_frame_decoding = skip_frame_decoding; + } + /// Low level StreamingDecoder interface. /// /// Allows to stream partial data to the encoder. Returns a tuple containing the bytes that have @@ -752,7 +758,16 @@ impl StreamingDecoder { debug_assert!(type_str == IDAT || type_str == chunk::fdAT); let len = std::cmp::min(buf.len(), self.current_chunk.remaining as usize); let buf = &buf[..len]; - let consumed = self.inflater.decompress(buf, image_data)?; + let consumed = if self.decode_options.skip_frame_decoding { + // `inflater.reset()` is not strictly necessary. We do it anyway to ensure + // that if (unexpectedly) `skip_frame_decoding` changes before the end of this + // frame, then it will (most likely) lead to decompression errors later (when + // restarting again from the middle). + self.inflater.reset(); + len + } else { + self.inflater.decompress(buf, image_data)? + }; self.current_chunk.crc.update(&buf[..consumed]); self.current_chunk.remaining -= consumed as u32; if self.current_chunk.remaining == 0 { @@ -811,7 +826,9 @@ impl StreamingDecoder { && (self.current_chunk.type_ == IDAT || self.current_chunk.type_ == chunk::fdAT) { self.current_chunk.type_ = type_str; - self.inflater.finish_compressed_chunks(image_data)?; + if !self.decode_options.skip_frame_decoding { + self.inflater.finish_compressed_chunks(image_data)?; + } self.inflater.reset(); self.ready_for_idat_chunks = false; self.ready_for_fdat_chunks = false; @@ -1704,7 +1721,7 @@ mod tests { use super::ScaledFloat; use super::SourceChromaticities; use crate::test_utils::*; - use crate::{Decoder, DecodingError, Reader}; + use crate::{DecodeOptions, Decoder, DecodingError, Reader}; use approx::assert_relative_eq; use byteorder::WriteBytesExt; use std::cell::RefCell; @@ -2748,4 +2765,68 @@ mod tests { &err, ); } + + struct ByteByByteReader(R); + + impl Read for ByteByByteReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let len = buf.len().min(1); + self.0.read(&mut buf[..len]) + } + } + + /// When decoding byte-by-byte we will decompress all image bytes *before* + /// consuming the final 4 bytes of the adler32 checksum. And we consume + /// image bytes using `ReadDecoder.decode_image_data` but the remainder of + /// the compressed data is consumed using + /// `ReadDecoder.finish_decoding_image_data`. In + /// https://github.com/image-rs/image-png/pull/533 we consider tweaking + /// `finish_decoding_image_data` to discard the final, non-image bytes, + /// rather then decompressing them - the initial naive implementation of + /// such short-circuiting would have effectively disabled adler32 checksum + /// processing when decoding byte-by-byte. This is what this test checks. + #[test] + fn test_broken_adler_checksum_when_decoding_byte_by_byte() { + let png = { + let width = 16; + let mut frame_data = generate_rgba8_with_width_and_height(width, width); + + // Inject invalid adler32 checksum + let adler32: &mut [u8; 4] = frame_data.last_chunk_mut::<4>().unwrap(); + *adler32 = [12, 34, 56, 78]; + + let mut png = VecDeque::new(); + write_png_sig(&mut png); + write_rgba8_ihdr_with_width(&mut png, width); + write_chunk(&mut png, b"IDAT", &frame_data); + write_iend(&mut png); + png + }; + + // Default `DecodeOptions` ignore adler32 - we expect no errors here. + let mut reader = Decoder::new(ByteByByteReader(png.clone())) + .read_info() + .unwrap(); + let mut buf = vec![0; reader.output_buffer_size()]; + reader.next_frame(&mut buf).unwrap(); + + // But we expect to get an error after explicitly opting into adler32 + // checks. + let mut reader = { + let mut options = DecodeOptions::default(); + options.set_ignore_adler32(false); + Decoder::new_with_options(ByteByByteReader(png.clone()), options) + .read_info() + .unwrap() + }; + let err = reader.next_frame(&mut buf).unwrap_err(); + assert!( + matches!(&err, DecodingError::Format(_)), + "Unexpected kind of error: {:?}", + &err, + ); + let msg = format!("{:?}", err); + assert!(msg.contains("CorruptFlateStream")); + assert!(msg.contains("WrongChecksum")); + } }