Skip to content

Commit 3c6666c

Browse files
authored
Merge pull request #25 from wx257osn2/improve-decoder-performance
Improve decoder performance
2 parents 395017e + 47d083a commit 3c6666c

File tree

1 file changed

+16
-20
lines changed

1 file changed

+16
-20
lines changed

include/qoixx.hpp

+16-20
Original file line numberDiff line numberDiff line change
@@ -1079,19 +1079,19 @@ class qoi{
10791079
constexpr std::uint32_t mask_tail_4 = 0b0000'1111u;
10801080
const auto vr = (i >> 4);
10811081
const auto vb = (i & mask_tail_4);
1082-
table[i] = vr*3 + vb*7;
1082+
table[i] = (vr*3 + vb*7) % index_size;
10831083
}
10841084
for(std::size_t i = chunk_tag::diff; i < chunk_tag::luma; ++i){
10851085
constexpr std::uint32_t mask_tail_2 = 0b0000'0011u;
1086-
const auto vr = ((i >> 4) & mask_tail_2) - 2;
1087-
const auto vg = ((i >> 2) & mask_tail_2) - 2;
1088-
const auto vb = ( i & mask_tail_2) - 2;
1089-
table[i+hash_table_offset] = vr*3 + vg*5 + vb*7;
1086+
const auto vr = static_cast<int>((i >> 4) & mask_tail_2) - 2;
1087+
const auto vg = static_cast<int>((i >> 2) & mask_tail_2) - 2;
1088+
const auto vb = static_cast<int>( i & mask_tail_2) - 2;
1089+
table[i+hash_table_offset] = static_cast<std::uint8_t>((vr*3 + vg*5 + vb*7) % index_size);
10901090
}
10911091
for(std::size_t i = chunk_tag::luma; i < chunk_tag::run; ++i){
10921092
constexpr int vgv = chunk_tag::luma+40;
10931093
const int vg = i - vgv;
1094-
table[i+hash_table_offset] = vg*3 + (vg+8)*5 + vg*7;
1094+
table[i+hash_table_offset] = static_cast<std::uint8_t>((vg*3 + (vg+8)*5 + vg*7) % index_size);
10951095
}
10961096
return table;
10971097
}
@@ -1130,8 +1130,12 @@ class qoi{
11301130
if constexpr(std::is_same<rgba_t, qoi::rgba_t>::value)
11311131
px.a = 255;
11321132
rgba_t index[index_size];
1133-
if constexpr(std::is_same<rgba_t, qoi::rgba_t>::value)
1133+
if constexpr(std::is_same<rgba_t, qoi::rgba_t>::value){
11341134
index[(0*3+0*5+0*7+0*11)%index_size] = {};
1135+
index[(0*3+0*5+0*7+255*11)%index_size] = px;
1136+
}
1137+
else
1138+
index[(0*3+0*5+0*7+255*11)%index_size] = {};
11351139

11361140
#if QOIXX_DECODE_WITH_TABLES
11371141
#define QOIXX_HPP_WITH_TABLES(...) __VA_ARGS__
@@ -1147,10 +1151,7 @@ class qoi{
11471151
static constexpr auto hash_diff_table = luma_hash_diff_table.data() + hash_table_offset;
11481152
)
11491153

1150-
const auto f = [&pixels, &p, &px_len, &size, &px, &index QOIXX_HPP_WITH_TABLES(, &hash)](bool first){
1151-
static constexpr std::uint32_t mask_tail_6 = 0b0011'1111u;
1152-
[[maybe_unused]] static constexpr std::uint32_t mask_tail_4 = 0b0000'1111u;
1153-
[[maybe_unused]] static constexpr std::uint32_t mask_tail_2 = 0b0000'0011u;
1154+
const auto f = [&pixels, &p, &px_len, &size, &px, &index QOIXX_HPP_WITH_TABLES(, &hash)]{
11541155
const auto b1 = p.pull();
11551156
--size;
11561157

@@ -1183,18 +1184,12 @@ class qoi{
11831184
if(b1 >= chunk_tag::run){
11841185
if(b1 < chunk_tag::rgb){
11851186
/*run*/
1187+
static constexpr std::uint32_t mask_tail_6 = 0b0011'1111u;
11861188
std::size_t run = b1 & mask_tail_6;
11871189
if(run >= px_len)[[unlikely]]
11881190
run = px_len;
11891191
px_len -= run;
11901192
QOIXX_HPP_DECODE_RUN(px, run)
1191-
if(first)[[unlikely]]{
1192-
QOIXX_HPP_WITH_TABLES(hash = (0*3+0*5+0*7+255*11) % index_size;)
1193-
if constexpr(std::is_same<rgba_t, qoi::rgba_t>::value)
1194-
index[QOIXX_HPP_WITH_TABLES(hash) QOIXX_HPP_WITHOUT_TABLES((0*3+0*5+0*7+255*11) % index_size)] = px;
1195-
else
1196-
efficient_memcpy<Channels>(index + QOIXX_HPP_WITH_TABLES(hash) QOIXX_HPP_WITHOUT_TABLES((0*3+0*5+0*7+255*11) % index_size), &px);
1197-
}
11981193
return;
11991194
}
12001195
if(b1 == chunk_tag::rgb){
@@ -1244,6 +1239,7 @@ class qoi{
12441239
px.b += vg + drb[1];
12451240
hash = (static_cast<int>(hash)+hash_diff_table[b1]+luma_hash_diff_table[b2]) % index_size;
12461241
) QOIXX_HPP_WITHOUT_TABLES(
1242+
static constexpr std::uint32_t mask_tail_4 = 0b0000'1111u;
12471243
px.r += vg + (b2 >> 4);
12481244
px.g += vg + 8;
12491245
px.b += vg + (b2 & mask_tail_4);
@@ -1259,6 +1255,7 @@ class qoi{
12591255
px.b += drgb[2];
12601256
hash = (static_cast<int>(hash)+hash_diff_table[b1]) % index_size;
12611257
) QOIXX_HPP_WITHOUT_TABLES(
1258+
static constexpr std::uint32_t mask_tail_2 = 0b0000'0011u;
12621259
px.r += ((b1 >> 4) & mask_tail_2) - 2;
12631260
px.g += ((b1 >> 2) & mask_tail_2) - 2;
12641261
px.b += ( b1 & mask_tail_2) - 2;
@@ -1279,9 +1276,8 @@ class qoi{
12791276
push<Channels>(pixels, &px);
12801277
};
12811278

1282-
bool first = true;
12831279
while(px_len--)[[likely]]{
1284-
f(std::exchange(first, false));
1280+
f();
12851281
if(size < sizeof(padding))[[unlikely]]{
12861282
throw std::runtime_error("qoixx::qoi::decode: insufficient input data");
12871283
}

0 commit comments

Comments
 (0)