Refactor optimize_raw (#670)

Code always tends to get messy over time. I've found the `optimize_raw` function increasingly harder to read, particularly after the addition of fast mode, so I've taken some time to refactor and simplify it. One change of note here is the main compression trials now use the Evaluator. This means verbose output is a little different which is shown below. There is no change to performance or output size. `-vvo2`: master ``` Processing: tests/files/rgba_8_should_be_palette_4.png 500x400 pixels, PNG format 8-bit RGB + Alpha, non-interlaced IDAT size = 2757 bytes File size = 18109 bytes Eval: 4-bit Indexed (5 colors) None 1837 bytes Eval: 8-bit Indexed (5 colors) None 1988 bytes Eval: 4-bit Indexed (5 colors) Bigrams >1837 bytes Eval: 8-bit Indexed (5 colors) Bigrams >1837 bytes Transformed image to 4-bit Indexed (5 colors), non-interlaced Evaluating: 2 filters Eval: 4-bit Indexed (5 colors) Sub >1810 bytes Eval: 4-bit Indexed (5 colors) Entropy >1810 bytes Trying: None zc = 11 f = None 1583 bytes Found better combination: zc = 11 f = None 1583 bytes IDAT size = 1583 bytes (1174 bytes decrease) file size = 16962 bytes (1147 bytes = 6.33% decrease) 16962 bytes (6.33% smaller): Running in pretend mode, no output ``` `-vvo2`: PR ``` Processing: tests/files/rgba_8_should_be_palette_4.png 500x400 pixels, PNG format 8-bit RGB + Alpha, non-interlaced IDAT size = 2757 bytes File size = 18109 bytes Eval: 4-bit Indexed (5 colors) None 1837 bytes Eval: 8-bit Indexed (5 colors) None 1988 bytes Eval: 4-bit Indexed (5 colors) Bigrams >1837 bytes Eval: 8-bit Indexed (5 colors) Bigrams >1837 bytes Transformed image to 4-bit Indexed (5 colors), non-interlaced Evaluating 2 filters Eval: 4-bit Indexed (5 colors) Sub >1810 bytes Eval: 4-bit Indexed (5 colors) Entropy >1810 bytes Trying filter None with zc = 11 1610 bytes Found better result: zc = 11, f = None IDAT size = 1583 bytes (1174 bytes decrease) file size = 16962 bytes (1147 bytes = 6.33% decrease) 16962 bytes (6.33% smaller): Running in pretend mode, no output ``` `-vvZo5`: master ``` Processing: tests/files/rgba_8_should_be_palette_4.png 500x400 pixels, PNG format 8-bit RGB + Alpha, non-interlaced IDAT size = 2757 bytes File size = 18109 bytes Eval: 8-bit Indexed (battiato sort) None 1821 bytes Eval: 4-bit Indexed (5 colors) None 1657 bytes Eval: 8-bit Indexed (mzeng sort) None 1821 bytes Eval: 8-bit Indexed (5 colors) None 1821 bytes Eval: 8-bit Indexed (battiato sort) Bigrams >1821 bytes Eval: 4-bit Indexed (5 colors) Bigrams >1657 bytes Eval: 8-bit Indexed (mzeng sort) Bigrams >1657 bytes Eval: 8-bit Indexed (5 colors) Bigrams >1657 bytes Transformed image to 4-bit Indexed (5 colors), non-interlaced Trying: 8 filters zc = zopfli f = Brute 1562 bytes zc = zopfli f = Sub >1562 bytes zc = zopfli f = Bigrams >1562 bytes zc = zopfli f = None 1407 bytes zc = zopfli f = Up >1407 bytes zc = zopfli f = MinSum >1407 bytes zc = zopfli f = BigEnt >1407 bytes zc = zopfli f = Entropy >1407 bytes Found better combination: zc = zopfli f = None 1407 bytes IDAT size = 1407 bytes (1350 bytes decrease) file size = 16786 bytes (1323 bytes = 7.31% decrease) 16786 bytes (7.31% smaller): Running in pretend mode, no output ``` `-vvZo5`: PR ``` Processing: tests/files/rgba_8_should_be_palette_4.png 500x400 pixels, PNG format 8-bit RGB + Alpha, non-interlaced IDAT size = 2757 bytes File size = 18109 bytes Eval: 8-bit Indexed (battiato sort) None 1821 bytes Eval: 4-bit Indexed (5 colors) None 1657 bytes Eval: 8-bit Indexed (mzeng sort) None 1821 bytes Eval: 8-bit Indexed (5 colors) None 1821 bytes Eval: 8-bit Indexed (battiato sort) Bigrams >1657 bytes Eval: 4-bit Indexed (5 colors) Bigrams >1657 bytes Eval: 8-bit Indexed (mzeng sort) Bigrams >1657 bytes Eval: 8-bit Indexed (5 colors) Bigrams >1657 bytes Transformed image to 4-bit Indexed (5 colors), non-interlaced Trying 8 filters with zopfli, zi = 15 Eval: 4-bit Indexed (5 colors) Brute 1589 bytes Eval: 4-bit Indexed (5 colors) Bigrams 1641 bytes Eval: 4-bit Indexed (5 colors) Sub 1711 bytes Eval: 4-bit Indexed (5 colors) None 1434 bytes Eval: 4-bit Indexed (5 colors) Up 1764 bytes Eval: 4-bit Indexed (5 colors) MinSum 1760 bytes Eval: 4-bit Indexed (5 colors) BigEnt 1742 bytes Eval: 4-bit Indexed (5 colors) Entropy 1748 bytes Found better result: zopfli, zi = 15, f = None IDAT size = 1407 bytes (1350 bytes decrease) file size = 16786 bytes (1323 bytes = 7.31% decrease) 16786 bytes (7.31% smaller): Running in pretend mode, no output ```
shssoichiro · Jan 29, 2025 · bbde68d · bbde68d
1 parent 8a44cdb
commit bbde68d
Show file tree

Hide file tree

Showing 10 changed files with 190 additions and 256 deletions.
diff --git a/benches/deflate.rs b/benches/deflate.rs
@@ -13,21 +13,15 @@ fn deflate_16_bits(b: &mut Bencher) {
     let input = test::black_box(PathBuf::from("tests/files/rgb_16_should_be_rgb_16.png"));
     let png = PngData::new(&input, &Options::default()).unwrap();
 
-    b.iter(|| {
-        let min = AtomicMin::new(None);
-        deflate(png.raw.data.as_ref(), 12, &min)
-    });
+    b.iter(|| deflate(png.raw.data.as_ref(), 12, None));
 }
 
 #[bench]
 fn deflate_8_bits(b: &mut Bencher) {
     let input = test::black_box(PathBuf::from("tests/files/rgb_8_should_be_rgb_8.png"));
     let png = PngData::new(&input, &Options::default()).unwrap();
 
-    b.iter(|| {
-        let min = AtomicMin::new(None);
-        deflate(png.raw.data.as_ref(), 12, &min)
-    });
+    b.iter(|| deflate(png.raw.data.as_ref(), 12, None));
 }
 
 #[bench]
@@ -37,10 +31,7 @@ fn deflate_4_bits(b: &mut Bencher) {
     ));
     let png = PngData::new(&input, &Options::default()).unwrap();
 
-    b.iter(|| {
-        let min = AtomicMin::new(None);
-        deflate(png.raw.data.as_ref(), 12, &min)
-    });
+    b.iter(|| deflate(png.raw.data.as_ref(), 12, None));
 }
 
 #[bench]
@@ -50,10 +41,7 @@ fn deflate_2_bits(b: &mut Bencher) {
     ));
     let png = PngData::new(&input, &Options::default()).unwrap();
 
-    b.iter(|| {
-        let min = AtomicMin::new(None);
-        deflate(png.raw.data.as_ref(), 12, &min)
-    });
+    b.iter(|| deflate(png.raw.data.as_ref(), 12, None));
 }
 
 #[bench]
@@ -63,10 +51,7 @@ fn deflate_1_bits(b: &mut Bencher) {
     ));
     let png = PngData::new(&input, &Options::default()).unwrap();
 
-    b.iter(|| {
-        let min = AtomicMin::new(None);
-        deflate(png.raw.data.as_ref(), 12, &min)
-    });
+    b.iter(|| deflate(png.raw.data.as_ref(), 12, None));
 }
 
 #[bench]

diff --git a/src/atomicmin.rs b/src/atomicmin.rs
@@ -22,11 +22,6 @@ impl AtomicMin {
         }
     }
 
-    /// Unset value is `usize_max`
-    pub const fn as_atomic_usize(&self) -> &AtomicUsize {
-        &self.val
-    }
-
     /// Try a new value, returning true if it is the new minimum
     pub fn set_min(&self, new_val: usize) -> bool {
         new_val < self.val.fetch_min(new_val, SeqCst)

diff --git a/src/colors.rs b/src/colors.rs
@@ -32,13 +32,11 @@ impl Display for ColorType {
     #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
-            Self::Grayscale { .. } => Display::fmt("Grayscale", f),
-            Self::RGB { .. } => Display::fmt("RGB", f),
-            Self::Indexed { palette } => {
-                Display::fmt(&format!("Indexed ({} colors)", palette.len()), f)
-            }
-            Self::GrayscaleAlpha => Display::fmt("Grayscale + Alpha", f),
-            Self::RGBA => Display::fmt("RGB + Alpha", f),
+            Self::Grayscale { .. } => write!(f, "Grayscale"),
+            Self::RGB { .. } => write!(f, "RGB"),
+            Self::Indexed { palette } => write!(f, "Indexed ({} colors)", palette.len()),
+            Self::GrayscaleAlpha => write!(f, "Grayscale + Alpha"),
+            Self::RGBA => write!(f, "RGB + Alpha"),
         }
     }
 }

diff --git a/src/deflate/deflater.rs b/src/deflate/deflater.rs
@@ -1,12 +1,10 @@
 use libdeflater::*;
 
-use crate::{atomicmin::AtomicMin, PngError, PngResult};
+use crate::{PngError, PngResult};
 
-pub fn deflate(data: &[u8], level: u8, max_size: &AtomicMin) -> PngResult<Vec<u8>> {
+pub fn deflate(data: &[u8], level: u8, max_size: Option<usize>) -> PngResult<Vec<u8>> {
     let mut compressor = Compressor::new(CompressionLvl::new(level.into()).unwrap());
-    let capacity = max_size
-        .get()
-        .unwrap_or_else(|| compressor.zlib_compress_bound(data.len()));
+    let capacity = max_size.unwrap_or_else(|| compressor.zlib_compress_bound(data.len()));
     let mut dest = vec![0; capacity];
     let len = compressor
         .zlib_compress(data, &mut dest)

diff --git a/src/deflate/mod.rs b/src/deflate/mod.rs
@@ -5,7 +5,7 @@ use std::{fmt, fmt::Display};
 
 pub use deflater::{crc32, deflate, inflate};
 
-use crate::{AtomicMin, PngError, PngResult};
+use crate::{PngError, PngResult};
 #[cfg(feature = "zopfli")]
 mod zopfli_oxipng;
 #[cfg(feature = "zopfli")]
@@ -30,13 +30,13 @@ pub enum Deflaters {
 }
 
 impl Deflaters {
-    pub(crate) fn deflate(self, data: &[u8], max_size: &AtomicMin) -> PngResult<Vec<u8>> {
+    pub(crate) fn deflate(self, data: &[u8], max_size: Option<usize>) -> PngResult<Vec<u8>> {
         let compressed = match self {
             Self::Libdeflater { compression } => deflate(data, compression, max_size)?,
             #[cfg(feature = "zopfli")]
             Self::Zopfli { iterations } => zopfli_deflate(data, iterations)?,
         };
-        if let Some(max) = max_size.get() {
+        if let Some(max) = max_size {
             if compressed.len() > max {
                 return Err(PngError::DeflatedDataTooLong(max));
             }
@@ -49,9 +49,9 @@ impl Display for Deflaters {
     #[inline]
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
-            Self::Libdeflater { compression } => Display::fmt(compression, f),
+            Self::Libdeflater { compression } => write!(f, "zc = {compression}"),
             #[cfg(feature = "zopfli")]
-            Self::Zopfli { .. } => Display::fmt("zopfli", f),
+            Self::Zopfli { iterations } => write!(f, "zopfli, zi = {iterations}"),
         }
     }
 }
diff --git a/src/evaluate.rs b/src/evaluate.rs
@@ -10,6 +10,7 @@ use std::sync::{
 
 #[cfg(feature = "parallel")]
 use crossbeam_channel::{unbounded, Receiver, Sender};
+use deflate::Deflaters;
 use indexmap::IndexSet;
 use log::trace;
 use rayon::prelude::*;
@@ -28,9 +29,15 @@ pub(crate) struct Candidate {
 }
 
 impl Candidate {
+    /// Return an estimate of the output size which can help with evaluation of very small data
+    #[must_use]
+    pub fn estimated_output_size(&self) -> usize {
+        self.idat_data.len() + self.image.key_chunks_size()
+    }
+
     fn cmp_key(&self) -> impl Ord {
         (
-            self.idat_data.len() + self.image.key_chunks_size(),
+            self.estimated_output_size(),
             self.image.data.len(),
             self.filter,
             // Prefer the later image added (e.g. baseline, which is always added last)
@@ -43,7 +50,7 @@ impl Candidate {
 pub(crate) struct Evaluator {
     deadline: Arc<Deadline>,
     filters: IndexSet<RowFilter>,
-    compression: u8,
+    deflater: Deflaters,
     optimize_alpha: bool,
     nth: AtomicUsize,
     executed: Arc<AtomicUsize>,
@@ -60,15 +67,15 @@ impl Evaluator {
     pub fn new(
         deadline: Arc<Deadline>,
         filters: IndexSet<RowFilter>,
-        compression: u8,
+        deflater: Deflaters,
         optimize_alpha: bool,
     ) -> Self {
         #[cfg(feature = "parallel")]
         let eval_channel = unbounded();
         Self {
             deadline,
             filters,
-            compression,
+            deflater,
             optimize_alpha,
             nth: AtomicUsize::new(0),
             executed: Arc::new(AtomicUsize::new(0)),
@@ -118,7 +125,7 @@ impl Evaluator {
         // These clones are only cheap refcounts
         let deadline = self.deadline.clone();
         let filters = self.filters.clone();
-        let compression = self.compression;
+        let deflater = self.deflater;
         let optimize_alpha = self.optimize_alpha;
         let executed = self.executed.clone();
         let best_candidate_size = self.best_candidate_size.clone();
@@ -140,9 +147,16 @@ impl Evaluator {
                     return;
                 }
                 let filtered = image.filter_image(filter, optimize_alpha);
-                let idat_data = deflate::deflate(&filtered, compression, &best_candidate_size);
+                let idat_data = deflater.deflate(&filtered, best_candidate_size.get());
                 if let Ok(idat_data) = idat_data {
-                    let size = idat_data.len() + image.key_chunks_size();
+                    let new = Candidate {
+                        image: image.clone(),
+                        idat_data,
+                        filtered,
+                        filter,
+                        nth,
+                    };
+                    let size = new.estimated_output_size();
                     best_candidate_size.set_min(size);
                     trace!(
                         "Eval: {}-bit {:23} {:8}   {} bytes",
@@ -151,13 +165,6 @@ impl Evaluator {
                         filter,
                         size
                     );
-                    let new = Candidate {
-                        image: image.clone(),
-                        idat_data,
-                        filtered,
-                        filter,
-                        nth,
-                    };
 
                     #[cfg(feature = "parallel")]
                     {

diff --git a/src/headers.rs b/src/headers.rs
@@ -8,7 +8,7 @@ use crate::{
     display_chunks::DISPLAY_CHUNKS,
     error::PngError,
     interlace::Interlacing,
-    AtomicMin, Deflaters, PngResult,
+    Deflaters, PngResult,
 };
 
 #[derive(Debug, Clone)]
@@ -275,9 +275,9 @@ pub fn extract_icc(iccp: &Chunk) -> Option<Vec<u8>> {
     }
 }
 
-/// Construct an iCCP chunk by compressing the ICC profile
-pub fn construct_iccp(icc: &[u8], deflater: Deflaters) -> PngResult<Chunk> {
-    let mut compressed = deflater.deflate(icc, &AtomicMin::new(None))?;
+/// Make an iCCP chunk by compressing the ICC profile
+pub fn make_iccp(icc: &[u8], deflater: Deflaters, max_size: Option<usize>) -> PngResult<Chunk> {
+    let mut compressed = deflater.deflate(icc, max_size)?;
     let mut data = Vec::with_capacity(compressed.len() + 5);
     data.extend(b"icc"); // Profile name - generally unused, can be anything
     data.extend([0, 0]); // Null separator, zlib compression method