Skip to content

Commit 957ceaa

Browse files
committed
chore(ricepp): more perf tweaks
1 parent 9304ec4 commit 957ceaa

File tree

4 files changed

+53
-43
lines changed

4 files changed

+53
-43
lines changed

ricepp/include/ricepp/bitstream_reader.h

+9-9
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class bitstream_reader final {
5454
assert(num_bits <= std::numeric_limits<T>::digits);
5555
T bits = 0;
5656
uint16_t pos = 0;
57-
if (num_bits > 0) {
57+
if (num_bits > 0) [[likely]] {
5858
for (;;) {
5959
size_t const remain = kBitsTypeBits - bit_pos_;
6060
if (num_bits <= remain) {
@@ -91,11 +91,11 @@ class bitstream_reader final {
9191
if (bits != bits_type{}) [[likely]] {
9292
size_t const ffs = std::countr_zero(bits);
9393
assert(ffs < kBitsTypeBits);
94-
if (ffs + 1 == kBitsTypeBits) [[unlikely]] {
95-
bit_pos_ = 0;
96-
} else {
94+
if (ffs + 1 != kBitsTypeBits) {
9795
data_ = bits;
9896
bit_pos_ = ffs + 1;
97+
} else {
98+
bit_pos_ = 0;
9999
}
100100
return zeros + ffs;
101101
}
@@ -112,8 +112,7 @@ class bitstream_reader final {
112112

113113
RICEPP_FORCE_INLINE void skip_bits(size_t num_bits) {
114114
assert(bit_pos_ + num_bits <= kBitsTypeBits);
115-
bit_pos_ += num_bits;
116-
bit_pos_ &= kBitsTypeBits - 1;
115+
bit_pos_ = (bit_pos_ + num_bits) & (kBitsTypeBits - 1);
117116
}
118117

119118
RICEPP_FORCE_INLINE bool peek_bit() {
@@ -123,16 +122,17 @@ class bitstream_reader final {
123122

124123
RICEPP_FORCE_INLINE bits_type peek_bits(size_t num_bits) {
125124
assert(bit_pos_ + num_bits <= kBitsTypeBits);
126-
if (bit_pos_ == 0) [[unlikely]] {
125+
auto const bp = bit_pos_;
126+
if (bp == 0) {
127127
data_ = read_packet();
128128
}
129129
// The remainder of this function is equivalent to:
130130
//
131-
// return _bextr_u64(data_, bit_pos_, num_bits);
131+
// return _bextr_u64(data_, bp, num_bits);
132132
//
133133
// However, in practice, at least clang generates code that is as fast
134134
// as the intrinsic, so we use the following code for portability.
135-
bits_type bits = data_ >> bit_pos_;
135+
bits_type bits = data_ >> bp;
136136
if (num_bits < kBitsTypeBits) [[likely]] {
137137
bits &= (static_cast<bits_type>(1) << num_bits) - 1;
138138
}

ricepp/include/ricepp/bitstream_writer.h

+13-7
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class bitstream_writer final {
7676
write_packet(bits);
7777
repeat -= kBitsTypeBits;
7878
}
79-
if (repeat > 0) {
79+
if (repeat > 0) [[likely]] {
8080
write_bits_impl(bits, repeat);
8181
}
8282
}
@@ -86,11 +86,17 @@ class bitstream_writer final {
8686
static constexpr size_t kArgBits{std::numeric_limits<T>::digits};
8787
assert(bit_pos_ < kBitsTypeBits);
8888
assert(num_bits <= kArgBits);
89-
while (num_bits > 0) {
90-
size_t const bits_to_write = std::min(num_bits, kBitsTypeBits - bit_pos_);
91-
write_bits_impl(bits, bits_to_write);
92-
bits >>= bits_to_write;
93-
num_bits -= bits_to_write;
89+
if (num_bits > 0) [[likely]] {
90+
for (;;) {
91+
size_t const bits_to_write =
92+
std::min(num_bits, kBitsTypeBits - bit_pos_);
93+
write_bits_impl(bits, bits_to_write);
94+
bits >>= bits_to_write;
95+
if (num_bits == bits_to_write) [[likely]] {
96+
break;
97+
}
98+
num_bits -= bits_to_write;
99+
}
94100
}
95101
}
96102

@@ -111,7 +117,7 @@ class bitstream_writer final {
111117
private:
112118
RICEPP_FORCE_INLINE void write_bits_impl(bits_type bits, size_t num_bits) {
113119
assert(bit_pos_ + num_bits <= kBitsTypeBits);
114-
if (num_bits < kBitsTypeBits) {
120+
if (num_bits < kBitsTypeBits) [[likely]] {
115121
bits &= (static_cast<bits_type>(1) << num_bits) - 1;
116122
}
117123
data_ |= bits << bit_pos_;

ricepp/include/ricepp/detail/decode.h

+18-14
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,26 @@ void decode_block(V block, BitstreamReader& reader, PixelTraits const& traits,
5050

5151
auto const fsp1 = reader.template read_bits<value_type>(kFsBits);
5252

53-
if (fsp1 == 0) [[unlikely]] {
54-
std::fill(block.begin(), block.end(), traits.write(last));
55-
} else if (fsp1 > kFsMax) [[unlikely]] {
56-
for (auto& b : block) {
57-
b = reader.template read_bits<value_type>(kPixelBits);
53+
if (fsp1 > 0) {
54+
if (fsp1 <= kFsMax) {
55+
auto const fs = fsp1 - 1;
56+
for (auto& b : block) {
57+
value_type diff = reader.find_first_set() << fs;
58+
diff |= reader.template read_bits<value_type>(fs);
59+
last += static_cast<std::make_signed_t<value_type>>(
60+
((diff & 1) * value_type(-1)) ^ (diff >> 1));
61+
// last += static_cast<std::make_signed_t<value_type>>(
62+
// (diff & 1) ? ~(diff >> 1) : (diff >> 1));
63+
b = traits.write(last);
64+
}
65+
} else {
66+
for (auto& b : block) {
67+
b = reader.template read_bits<value_type>(kPixelBits);
68+
}
69+
last = traits.read(block.back());
5870
}
59-
last = traits.read(block.back());
6071
} else {
61-
auto const fs = fsp1 - 1;
62-
for (auto& b : block) {
63-
value_type diff = reader.find_first_set() << fs;
64-
diff |= reader.template read_bits<value_type>(fs);
65-
last += static_cast<std::make_signed_t<value_type>>(
66-
(diff & 1) ? ~(diff >> 1) : (diff >> 1));
67-
b = traits.write(last);
68-
}
72+
std::fill(block.begin(), block.end(), traits.write(last));
6973
}
7074

7175
last_value = last;

ricepp/include/ricepp/detail/encode.h

+13-13
Original file line numberDiff line numberDiff line change
@@ -117,23 +117,12 @@ void encode_block(V block, BitstreamWriter& writer, PixelTraits const& traits,
117117

118118
last_value = last;
119119

120-
if (sum == 0) [[unlikely]] {
121-
// All differences are zero, so just write a zero fs and we're done.
122-
writer.write_bits(0U, kFsBits);
123-
} else {
120+
if (sum > 0) [[likely]] {
124121
// Find the best bit position to split the difference values.
125122
auto const [fs, bits_used] =
126123
compute_best_split<kFsMax>(delta, block.size(), sum);
127124

128-
if (fs >= kFsMax || bits_used >= kPixelBits * block.size()) [[unlikely]] {
129-
// Difference values are too large for entropy coding. Just plain copy
130-
// the input pixel data. This is really unlikely, so reading the input
131-
// pixels again is fine.
132-
writer.write_bits(kFsMax + 1, kFsBits);
133-
for (auto& b : block) {
134-
writer.write_bits(b, kPixelBits);
135-
}
136-
} else {
125+
if (fs < kFsMax && bits_used < kPixelBits * block.size()) [[likely]] {
137126
// Encode the difference values using Rice entropy coding.
138127
writer.write_bits(fs + 1, kFsBits);
139128
for (size_t i = 0; i < block.size(); ++i) {
@@ -145,7 +134,18 @@ void encode_block(V block, BitstreamWriter& writer, PixelTraits const& traits,
145134
writer.write_bit(1);
146135
writer.write_bits(diff, fs);
147136
}
137+
} else {
138+
// Difference values are too large for entropy coding. Just plain copy
139+
// the input pixel data. This is really unlikely, so reading the input
140+
// pixels again is fine.
141+
writer.write_bits(kFsMax + 1, kFsBits);
142+
for (auto& b : block) {
143+
writer.write_bits(b, kPixelBits);
144+
}
148145
}
146+
} else {
147+
// All differences are zero, so just write a zero fs and we're done.
148+
writer.write_bits(0U, kFsBits);
149149
}
150150
}
151151

0 commit comments

Comments
 (0)