Skip to content

Commit

Permalink
handle address wrap around in texture cache
Browse files Browse the repository at this point in the history
fixes out of bounds access in Mario 64
also slightly optimise paletted texture conversion
  • Loading branch information
RSDuck committed Oct 27, 2024
1 parent b60f42b commit 58ab332
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 114 deletions.
11 changes: 11 additions & 0 deletions src/GPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,17 @@ class GPU
OAMDirty |= 1 << (addr / 1024);
}

template <typename T>
inline T ReadVRAMFlat_Texture(u32 addr) const
{
return *(T*)&VRAMFlat_Texture[addr & 0x7FFFF];
}
template <typename T>
inline T ReadVRAMFlat_TexPal(u32 addr) const
{
return *(T*)&VRAMFlat_TexPal[addr & 0x1FFFF];
}

void SetPowerCnt(u32 val) noexcept;

void StartFrame() noexcept;
Expand Down
50 changes: 25 additions & 25 deletions src/GPU3D_Soft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,47 +193,47 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
case 1: // A3I5
{
vramaddr += ((t * width) + s);
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);

texpal <<= 4;
*color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1), gpu);
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
}
break;

case 2: // 4-color
{
vramaddr += (((t * width) + s) >> 2);
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
pixel >>= ((s & 0x3) << 1);
pixel &= 0x3;

texpal <<= 3;
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1), gpu);
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;

case 3: // 16-color
{
vramaddr += (((t * width) + s) >> 1);
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
if (s & 0x1) pixel >>= 4;
else pixel &= 0xF;

texpal <<= 4;
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1), gpu);
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;

case 4: // 256-color
{
vramaddr += ((t * width) + s);
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);

texpal <<= 4;
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1), gpu);
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + (pixel<<1));
*alpha = (pixel==0) ? alpha0 : 31;
}
break;
Expand All @@ -253,31 +253,31 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
val = 0;
else
{
val = ReadVRAM_Texture<u8>(vramaddr, gpu);
val = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
val >>= (2 * (s & 0x3));
}

u16 palinfo = ReadVRAM_Texture<u16>(slot1addr, gpu);
u16 palinfo = gpu.ReadVRAMFlat_Texture<u16>(slot1addr);
u32 paloffset = (palinfo & 0x3FFF) << 2;
texpal <<= 4;

switch (val & 0x3)
{
case 0:
*color = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
*alpha = 31;
break;

case 1:
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);
*alpha = 31;
break;

case 2:
if ((palinfo >> 14) == 1)
{
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);

u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
Expand All @@ -294,8 +294,8 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
}
else if ((palinfo >> 14) == 3)
{
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);

u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
Expand All @@ -311,20 +311,20 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
*color = r | g | b;
}
else
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4, gpu);
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 4);
*alpha = 31;
break;

case 3:
if ((palinfo >> 14) == 2)
{
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6, gpu);
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 6);
*alpha = 31;
}
else if ((palinfo >> 14) == 3)
{
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);

u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
Expand Down Expand Up @@ -353,18 +353,18 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
case 6: // A5I3
{
vramaddr += ((t * width) + s);
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);

texpal <<= 4;
*color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1), gpu);
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + ((pixel&0x7)<<1));
*alpha = (pixel >> 3);
}
break;

case 7: // direct color
{
vramaddr += (((t * width) + s) << 1);
*color = ReadVRAM_Texture<u16>(vramaddr, gpu);
*color = gpu.ReadVRAMFlat_Texture<u16>(vramaddr);
*alpha = (*color & 0x8000) ? 31 : 0;
}
break;
Expand Down Expand Up @@ -1659,8 +1659,8 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
{
for (int x = 0; x < 256; x++)
{
u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1), gpu);
u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1), gpu);
u16 val2 = gpu.ReadVRAMFlat_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
u16 val3 = gpu.ReadVRAMFlat_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));

// TODO: confirm color conversion
u32 r = (val2 << 1) & 0x3E; if (r) r++;
Expand Down
10 changes: 0 additions & 10 deletions src/GPU3D_Soft.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,16 +430,6 @@ class SoftRenderer : public Renderer3D
s32 ycoverage, ycov_incr;
};

template <typename T>
inline T ReadVRAM_Texture(u32 addr, const GPU& gpu) const
{
return *(T*)&gpu.VRAMFlat_Texture[addr & 0x7FFFF];
}
template <typename T>
inline T ReadVRAM_TexPal(u32 addr, const GPU& gpu) const
{
return *(T*)&gpu.VRAMFlat_TexPal[addr & 0x1FFFF];
}
u32 AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 alpha) const noexcept;

struct RendererPolygon
Expand Down
61 changes: 31 additions & 30 deletions src/GPU3D_Texcache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ inline u32 ConvertRGB5ToRGB6(u16 val)
}

template <int outputFmt>
void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData)
void ConvertBitmapTexture(u32 width, u32 height, u32* output, u32 addr, GPU& gpu)
{
for (u32 i = 0; i < width*height; i++)
{
u16 value = *(u16*)&texData[i * 2];
u16 value = gpu.ReadVRAMFlat_Texture<u16>(addr + i * 2);

switch (outputFmt)
{
Expand All @@ -96,28 +96,28 @@ void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData)
}
}

template void ConvertBitmapTexture<outputFmt_RGB6A5>(u32 width, u32 height, u32* output, u8* texData);
template void ConvertBitmapTexture<outputFmt_RGB6A5>(u32 width, u32 height, u32* output, u32 addr, GPU& gpu);

template <int outputFmt>
void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData)
void ConvertCompressedTexture(u32 width, u32 height, u32* output, u32 addr, u32 addrAux, u32 palAddr, GPU& gpu)
{
// we process a whole block at the time
for (int y = 0; y < height / 4; y++)
{
for (int x = 0; x < width / 4; x++)
{
u32 data = ((u32*)texData)[x + y * (width / 4)];
u16 auxData = ((u16*)texAuxData)[x + y * (width / 4)];
u32 data = gpu.ReadVRAMFlat_Texture<u32>(addr + (x + y * (width / 4))*4);
u16 auxData = gpu.ReadVRAMFlat_Texture<u16>(addrAux + (x + y * (width / 4))*2);

u32 paletteOffset = auxData & 0x3FFF;
u16 color0 = palData[paletteOffset*2] | 0x8000;
u16 color1 = palData[paletteOffset*2+1] | 0x8000;
u16 color2, color3;
u32 paletteOffset = palAddr + (auxData & 0x3FFF) * 4;
u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset) | 0x8000;
u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset+2) | 0x8000;
u16 color2 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset+4) | 0x8000;
u16 color3 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset+6) | 0x8000;

switch ((auxData >> 14) & 0x3)
{
case 0:
color2 = palData[paletteOffset*2+2] | 0x8000;
color3 = 0;
break;
case 1:
Expand All @@ -137,8 +137,6 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
color3 = 0;
break;
case 2:
color2 = palData[paletteOffset*2+2] | 0x8000;
color3 = palData[paletteOffset*2+3] | 0x8000;
break;
case 3:
{
Expand Down Expand Up @@ -179,7 +177,8 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
{
for (int i = 0; i < 4; i++)
{
u16 color = (packed >> 16 * (data >> 2 * (i + j * 4))) & 0xFFFF;
u32 colorIdx = 16 * ((data >> 2 * (i + j * 4)) & 0x3);
u16 color = (packed >> colorIdx) & 0xFFFF;
u32 res;
switch (outputFmt)
{
Expand All @@ -197,20 +196,20 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
}
}

template void ConvertCompressedTexture<outputFmt_RGB6A5>(u32, u32, u32*, u8*, u8*, u16*);
template void ConvertCompressedTexture<outputFmt_RGB6A5>(u32, u32, u32*, u32, u32, u32, GPU&);

template <int outputFmt, int X, int Y>
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData)
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, GPU& gpu)
{
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
u8 val = texData[x + y * width];
u8 val = gpu.ReadVRAMFlat_Texture<u8>(addr + x + y * width);

u32 idx = val & ((1 << Y) - 1);

u16 color = palData[idx];
u16 color = gpu.ReadVRAMFlat_TexPal<u16>(palAddr + idx * 2);
u32 alpha = (val >> Y) & ((1 << X) - 1);
if (X != 5)
alpha = alpha * 4 + alpha / 2;
Expand All @@ -228,22 +227,24 @@ void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* pa
}
}

template void ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(u32, u32, u32*, u8*, u16*);
template void ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(u32, u32, u32*, u8*, u16*);
template void ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(u32, u32, u32*, u32, u32, GPU&);
template void ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(u32, u32, u32*, u32, u32, GPU&);

template <int outputFmt, int colorBits>
void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent)
void ConvertNColorsTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, bool color0Transparent, GPU& gpu)
{
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width / (8 / colorBits); x++)
for (int x = 0; x < width / (16 / colorBits); x++)
{
u8 val = texData[x + y * (width / (8 / colorBits))];
// smallest possible row is 8 pixels with 2bpp => fits in u16
u16 val = gpu.ReadVRAMFlat_Texture<u16>(addr + 2 * (x + y * (width / (16 / colorBits))));

for (int i = 0; i < 8 / colorBits; i++)
for (int i = 0; i < 16 / colorBits; i++)
{
u32 index = (val >> (i * colorBits)) & ((1 << colorBits) - 1);
u16 color = palData[index];
u32 index = val & ((1 << colorBits) - 1);
val >>= colorBits;
u16 color = gpu.ReadVRAMFlat_TexPal<u16>(palAddr + index * 2);

bool transparent = color0Transparent && index == 0;
u32 res;
Expand All @@ -256,14 +257,14 @@ void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16*
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
| (transparent ? 0 : 0xFF000000); break;
}
output[x * (8 / colorBits) + y * width + i] = res;
output[x * (16 / colorBits) + y * width + i] = res;
}
}
}
}

template void ConvertNColorsTexture<outputFmt_RGB6A5, 2>(u32, u32, u32*, u8*, u16*, bool);
template void ConvertNColorsTexture<outputFmt_RGB6A5, 4>(u32, u32, u32*, u8*, u16*, bool);
template void ConvertNColorsTexture<outputFmt_RGB6A5, 8>(u32, u32, u32*, u8*, u16*, bool);
template void ConvertNColorsTexture<outputFmt_RGB6A5, 2>(u32, u32, u32*, u32, u32, bool, GPU&);
template void ConvertNColorsTexture<outputFmt_RGB6A5, 4>(u32, u32, u32*, u32, u32, bool, GPU&);
template void ConvertNColorsTexture<outputFmt_RGB6A5, 8>(u32, u32, u32*, u32, u32, bool, GPU&);

}
Loading

0 comments on commit 58ab332

Please sign in to comment.