From 9525864b82f2171913e4aa77b150f456a1e27345 Mon Sep 17 00:00:00 2001
From: Sam Sieber <sam@coolhousecapital.com>
Date: Tue, 31 Oct 2023 10:11:59 -0500
Subject: [PATCH] Allow for intra-byte bits_per_sample.

Specifically, correctly read stripped tifs for any bits_per_sample sized up to the normal maximum bits_per_sample limit.
Also, correctly read tiled tiffs that have bit padding in the last tile of each row. Bit padding in more than one tile per row is not supported.

Future work could include error reporting about gaps in coverage, tests, and a separate newtype for buffer indexing (vs usize for bytes)
---
 src/decoder/image.rs | 27 ++++++++++++++++++---------
 src/decoder/mod.rs   | 26 ++++++++++++++++++++++++--
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/src/decoder/image.rs b/src/decoder/image.rs
index 55e1e996..8e2a0ae1 100644
--- a/src/decoder/image.rs
+++ b/src/decoder/image.rs
@@ -557,7 +557,7 @@ impl Image {
             | (ColorType::YCbCr(n), _)
             | (ColorType::Gray(n), _)
                 if usize::from(n) == buffer.byte_len() * 8 => {}
-            (ColorType::Gray(n), DecodingBuffer::U8(_)) if n < 8 => match self.predictor {
+            (ColorType::Gray(n), DecodingBuffer::U8(_)) if n % 8 > 0 => match self.predictor {
                 Predictor::None => {}
                 Predictor::Horizontal => {
                     return Err(TiffError::UnsupportedError(
@@ -624,19 +624,25 @@ impl Image {
             self.jpeg_tables.as_deref().map(|a| &**a),
         )?;
 
+        // Polyfil for usize::div_ceil added in rust 1.73+
+        fn usize_div_ceil(numerator: usize, denominator: usize) -> usize {
+            (numerator + denominator - 1) / denominator
+        }
+
         if output_width == data_dims.0 as usize && padding_right == 0 {
-            let total_samples = data_dims.0 as usize * data_dims.1 as usize * samples;
-            let tile = &mut buffer.as_bytes_mut()[..total_samples * byte_len];
+            let row_buffer_units: usize = usize_div_ceil(data_dims.0 as usize * samples * self.bits_per_sample as usize, byte_len * 8);
+            let total_buffer_units = row_buffer_units * data_dims.1 as usize;
+            let tile = &mut buffer.as_bytes_mut()[..total_buffer_units * byte_len];
             reader.read_exact(tile)?;
 
             for row in 0..data_dims.1 as usize {
-                let row_start = row * output_width * samples;
-                let row_end = (row + 1) * output_width * samples;
+                let row_start = row * row_buffer_units;
+                let row_end = (row + 1) * row_buffer_units;
                 let row = buffer.subrange(row_start..row_end);
                 super::fix_endianness_and_predict(row, samples, byte_order, predictor);
             }
             if photometric_interpretation == PhotometricInterpretation::WhiteIsZero {
-                super::invert_colors(&mut buffer.subrange(0..total_samples), color_type);
+                super::invert_colors(&mut buffer.subrange(0..total_buffer_units), color_type);
             }
         } else if padding_right > 0 && self.predictor == Predictor::FloatingPoint {
             // The floating point predictor shuffles the padding bytes into the encoded output, so
@@ -658,16 +664,19 @@ impl Image {
                 }
             }
         } else {
+            let row_buffer_units: usize = usize_div_ceil(data_dims.0 as usize * samples * self.bits_per_sample as usize, byte_len * 8);
+            let output_buffer_units = usize_div_ceil(output_width * samples * self.bits_per_sample as usize, byte_len * 8);
+            let row_with_padding_buffer_units = usize_div_ceil((data_dims.0 as usize + padding_right as usize) * samples * self.bits_per_sample as usize, byte_len * 8);
             for row in 0..data_dims.1 as usize {
-                let row_start = row * output_width * samples;
-                let row_end = row_start + data_dims.0 as usize * samples;
+                let row_start = row * output_buffer_units;
+                let row_end = row_start + row_buffer_units;
 
                 let row = &mut buffer.as_bytes_mut()[(row_start * byte_len)..(row_end * byte_len)];
                 reader.read_exact(row)?;
 
                 // Skip horizontal padding
                 if padding_right > 0 {
-                    let len = u64::try_from(padding_right as usize * samples * byte_len)?;
+                    let len = u64::try_from((row_with_padding_buffer_units - row_buffer_units) * byte_len)?;
                     io::copy(&mut reader.by_ref().take(len), &mut io::sink())?;
                 }
 
diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
index cf2c18ed..aa031757 100644
--- a/src/decoder/mod.rs
+++ b/src/decoder/mod.rs
@@ -1055,6 +1055,19 @@ impl<R: Read + Seek> Decoder<R> {
         Ok(())
     }
 
+    /// TODO: this over-allocates when (bits_per_sample * samples) % 8 != 0.
+    /// If we leave in per-tile bit padding, then we cannot do it without knowing the number of tiles across this image is.
+    /// 
+    /// We have two options basically for precise allocation without knowing the number of tiles across:
+    ///  * Ban tiles where tile_dim % (bits_per_sample * samples) == 0
+    ///  * Bitshift data to remove any padding bits between tiles
+    /// 
+    /// At this point, we don't support tiles when tile_dim % (bits_per_sample * samples) == 0,
+    /// But I could see it being beneficial to add support for a buffer output format that tracks padding bits and provides an iter over
+    /// intra-byte sized numbers.
+    /// 
+    /// But also, this method is used for both allocating a buffer for a single chunk, 
+    /// and for an entire image, which influences how that implementation will go
     fn result_buffer(&self, width: usize, height: usize) -> TiffResult<DecodingResult> {
         let buffer_size = match width
             .checked_mul(height)
@@ -1152,8 +1165,16 @@ impl<R: Read + Seek> Decoder<R> {
             ));
         }
 
+        /// Named such to avoid conflict when compiling on rust 1.73+
+        fn usize_div_ceil(numerator: usize, denominator: usize) -> usize {
+            (numerator + denominator - 1) / denominator
+        }
+
+        let byte_len = result.as_buffer(0).byte_len();
         let chunks_across = ((width - 1) / chunk_dimensions.0 + 1) as usize;
-        let strip_samples = width as usize * chunk_dimensions.1 as usize * samples;
+        // Calculate the number of strips to be round up such that the next row starts on a buffer unit boundary
+        // This assumes that non-end-of-row chunks fit exactly in their chunks; e.g. no extra unused bits within an unpadded chunk
+        let strip_samples = usize_div_ceil(width as usize * samples * self.image.bits_per_sample  as usize, 8 * byte_len)  * chunk_dimensions.1 as usize;
 
         let image_chunks = self.image().chunk_offsets.len() / self.image().strips_per_pixel();
         // For multi-band images, only the first band is read.
@@ -1165,7 +1186,8 @@ impl<R: Read + Seek> Decoder<R> {
 
             let x = chunk % chunks_across;
             let y = chunk / chunks_across;
-            let buffer_offset = y * strip_samples + x * chunk_dimensions.0 as usize * samples;
+            let row_buffer_len = usize_div_ceil(chunk_dimensions.0 as usize * samples * self.image.bits_per_sample as usize, 8 * byte_len);
+            let buffer_offset = y * strip_samples + x * row_buffer_len;
             let byte_order = self.reader.byte_order;
             self.image.expand_chunk(
                 &mut self.reader,