Skip to content

Commit

Permalink
NEON GPU and faster GX
Browse files Browse the repository at this point in the history
  • Loading branch information
RSDuck committed Jan 26, 2021
1 parent 3215e07 commit d4b7e5c
Show file tree
Hide file tree
Showing 14 changed files with 3,413 additions and 61 deletions.
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ if (ENABLE_OGLRENDERER)
add_definitions(-DOGLRENDERER_ENABLED)
endif()

if (ARCHITECTURE STREQUAL ARM64)
option(ENABLE_NEONSOFTGPU "Enable NEON GPU" ON)

if (ENABLE_NEONSOFTGPU)
add_definitions(-DNEONSOFTGPU_ENABLED)
endif()
endif()

if (CMAKE_BUILD_TYPE STREQUAL Debug)
add_compile_options(-Og)
endif()
Expand Down
7 changes: 7 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ if (ENABLE_OGLRENDERER)
)
endif()

if (ENABLE_NEONSOFTGPU)
target_sources(core PRIVATE
GPU3DGX_Neon.cpp
GPU2D_NeonSoft.cpp
)
endif()

if (ENABLE_JIT)
enable_language(ASM)

Expand Down
38 changes: 27 additions & 11 deletions src/DMA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,21 +256,37 @@ void DMA::Run9()
}*/
}

while (IterCount > 0 && !Stall)
if (IsGXFIFODMA)
{
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);
u32 count = (NDS::ARM9Target - NDS::ARM9Timestamp) / (unitcycles << NDS::ARM9ClockShift);
count = std::min<u32>(count, 1);
count = std::max<u32>(count, IterCount);

if (ConsoleType == 1)
DSi::ARM9Write32(CurDstAddr, DSi::ARM9Read32(CurSrcAddr));
else
NDS::ARM9Write32(CurDstAddr, NDS::ARM9Read32(CurSrcAddr));
GPU3D::WriteBatchToGXFIFO((u32*)&NDS::MainRAM[CurSrcAddr & NDS::MainRAMMask], count);

CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
IterCount--;
RemCount--;
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift) * count;
IterCount -= count;
RemCount -= count;
CurSrcAddr += (count * SrcAddrInc) << 2;
}
else
{
while (IterCount > 0 && !Stall)
{
NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift);

if (NDS::ARM9Timestamp >= NDS::ARM9Target) break;
if (ConsoleType == 1)
DSi::ARM9Write32(CurDstAddr, DSi::ARM9Read32(CurSrcAddr));
else
NDS::ARM9Write32(CurDstAddr, NDS::ARM9Read32(CurSrcAddr));

CurSrcAddr += SrcAddrInc<<2;
CurDstAddr += DstAddrInc<<2;
IterCount--;
RemCount--;

if (NDS::ARM9Timestamp >= NDS::ARM9Target) break;
}
}
}

Expand Down
24 changes: 14 additions & 10 deletions src/GPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ bool RunFIFO;

u16 DispStat[2], VMatch[2];

u8 Palette[2*1024];
u8 OAM[2*1024];

u8 VRAM_A[128*1024];
Expand Down Expand Up @@ -134,21 +133,23 @@ u8 VRAMFlat_BBG[128*1024];
u8 VRAMFlat_AOBJ[256*1024];
u8 VRAMFlat_BOBJ[128*1024];

u8 VRAMFlat_ABGExtPal[32*1024];
u8 VRAMFlat_BBGExtPal[32*1024];
u8 VRAMFlat_AOBJExtPal[8*1024];
u8 VRAMFlat_BOBJExtPal[8*1024];

u8 VRAMFlat_Texture[512*1024];
u8 VRAMFlat_TexPal[128*1024];

u32 OAMDirty;
u32 PaletteDirty;

u8 AllPaletteMemory[2*1024+BGExtPalSize*2+OBJExtPalSize*2];

bool Init()
{
#ifndef NEONSOFTGPU_ENABLED
GPU2D_A = new GPU2D_Soft(0);
GPU2D_B = new GPU2D_Soft(1);
#else
GPU2D_A = new GPU2D_NeonSoft(0);
GPU2D_B = new GPU2D_NeonSoft(1);
#endif
if (!GPU3D::Init()) return false;

FrontBuffer = 0;
Expand Down Expand Up @@ -192,12 +193,15 @@ void ResetVRAMCache()
memset(VRAMFlat_BBG, 0, sizeof(VRAMFlat_BBG));
memset(VRAMFlat_AOBJ, 0, sizeof(VRAMFlat_AOBJ));
memset(VRAMFlat_BOBJ, 0, sizeof(VRAMFlat_BOBJ));
memset(VRAMFlat_ABGExtPal, 0, sizeof(VRAMFlat_ABGExtPal));
memset(VRAMFlat_BBGExtPal, 0, sizeof(VRAMFlat_BBGExtPal));
memset(VRAMFlat_AOBJExtPal, 0, sizeof(VRAMFlat_AOBJExtPal));
memset(VRAMFlat_BOBJExtPal, 0, sizeof(VRAMFlat_BOBJExtPal));
memset(VRAMFlat_ABGExtPal, 0, BGExtPalSize);
memset(VRAMFlat_BBGExtPal, 0, BGExtPalSize);
memset(VRAMFlat_AOBJExtPal, 0, OBJExtPalSize);
memset(VRAMFlat_BOBJExtPal, 0, OBJExtPalSize);
memset(VRAMFlat_Texture, 0, sizeof(VRAMFlat_Texture));
memset(VRAMFlat_TexPal, 0, sizeof(VRAMFlat_TexPal));

OAMDirty = 0x3;
PaletteDirty = 0xF;
}

void Reset()
Expand Down
18 changes: 11 additions & 7 deletions src/GPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,17 @@ extern u16 DispStat[2];
extern u8 VRAMCNT[9];
extern u8 VRAMSTAT;

extern u8 Palette[2*1024];
const u32 BGExtPalSize = 32*1024;
const u32 OBJExtPalSize = 8*1024;

extern u8 AllPaletteMemory[2*1024+BGExtPalSize*2+OBJExtPalSize*2];

u8* const Palette = AllPaletteMemory;
u8* const VRAMFlat_ABGExtPal = AllPaletteMemory + 2*1024;
u8* const VRAMFlat_BBGExtPal = VRAMFlat_ABGExtPal + BGExtPalSize;
u8* const VRAMFlat_AOBJExtPal = VRAMFlat_BBGExtPal + BGExtPalSize;
u8* const VRAMFlat_BOBJExtPal = VRAMFlat_AOBJExtPal + OBJExtPalSize;

extern u8 OAM[2*1024];

extern u8 VRAM_A[128*1024];
Expand Down Expand Up @@ -120,12 +130,6 @@ extern u8 VRAMFlat_BBG[128*1024];
extern u8 VRAMFlat_AOBJ[256*1024];
extern u8 VRAMFlat_BOBJ[128*1024];

extern u8 VRAMFlat_ABGExtPal[32*1024];
extern u8 VRAMFlat_BBGExtPal[32*1024];

extern u8 VRAMFlat_AOBJExtPal[8*1024];
extern u8 VRAMFlat_BOBJExtPal[8*1024];

extern u8 VRAMFlat_Texture[512*1024];
extern u8 VRAMFlat_TexPal[128*1024];

Expand Down
12 changes: 6 additions & 6 deletions src/GPU2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,18 +655,18 @@ void GPU2D::CheckWindows(u32 line)
else if (line == Win1Coords[2]) Win1Active |= 0x1;
}

void GPU2D::CalculateWindowMask(u32 line)
void GPU2D::CalculateWindowMask(u32 line, u8* windowMask, u8* objWindow)
{
for (u32 i = 0; i < 256; i++)
WindowMask[i] = WinCnt[2]; // window outside
windowMask[i] = WinCnt[2]; // window outside

if (DispCnt & (1<<15))
{
// OBJ window
for (int i = 0; i < 256; i++)
{
if (OBJWindow[i])
WindowMask[i] = WinCnt[3];
if (objWindow[i])
windowMask[i] = WinCnt[3];
}
}

Expand All @@ -681,7 +681,7 @@ void GPU2D::CalculateWindowMask(u32 line)
if (i == x2) Win1Active &= ~0x2;
else if (i == x1) Win1Active |= 0x2;

if (Win1Active == 0x3) WindowMask[i] = WinCnt[1];
if (Win1Active == 0x3) windowMask[i] = WinCnt[1];
}
}

Expand All @@ -696,7 +696,7 @@ void GPU2D::CalculateWindowMask(u32 line)
if (i == x2) Win0Active &= ~0x2;
else if (i == x1) Win0Active |= 0x2;

if (Win0Active == 0x3) WindowMask[i] = WinCnt[0];
if (Win0Active == 0x3) windowMask[i] = WinCnt[0];
}
}
}
Expand Down
85 changes: 79 additions & 6 deletions src/GPU2D.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@

#include "types.h"
#include "Savestate.h"
#include "NonStupidBitfield.h"

class GPU2D
{
public:
GPU2D(u32 num);
virtual ~GPU2D() {}

void Reset();
virtual void Reset();

void DoSavestate(Savestate* file);

Expand Down Expand Up @@ -115,11 +116,8 @@ class GPU2D

u16 MasterBrightness;

u8 WindowMask[256] __attribute__((aligned (8)));
u8 OBJWindow[256] __attribute__((aligned (8)));

void UpdateMosaicCounters(u32 line);
void CalculateWindowMask(u32 line);
void CalculateWindowMask(u32 line, u8* windowMask, u8* objWindow);

virtual void MosaicXSizeChanged() = 0;
};
Expand Down Expand Up @@ -148,12 +146,15 @@ class GPU2D_Soft : public GPU2D
u32 OBJLine[256] __attribute__((aligned (8)));
u8 OBJIndex[256] __attribute__((aligned (8)));

u8 WindowMask[256] __attribute__((aligned (8)));
u8 OBJWindow[256] __attribute__((aligned (8)));

u32 NumSprites;

u8 MosaicTable[16][256];
u8* CurBGXMosaicTable;
u8* CurOBJXMosaicTable;

u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
u32 ColorBlend5(u32 val1, u32 val2);
u32 ColorBrightnessUp(u32 val, u32 factor);
Expand Down Expand Up @@ -185,4 +186,76 @@ class GPU2D_Soft : public GPU2D
void DoCapture(u32 line, u32 width);
};

#ifdef NEONSOFTGPU_ENABLED
class GPU2D_NeonSoft : public GPU2D
{
public:
GPU2D_NeonSoft(u32 num);
~GPU2D_NeonSoft() override {}

void Reset() override;

void SetRenderSettings(bool accel) override;

void DrawScanline(u32 line) override;
void DrawSprites(u32 line) override;

protected:
void MosaicXSizeChanged() {}

private:
u32 BGOBJLine[272*2] __attribute__((aligned (16)));
u32* _3DLine;

u32 OBJLine[272*2] __attribute__((aligned (16)));
u8 OBJIndex[256];

u8 WindowMask[272] __attribute__((aligned (16)));
u8 OBJWindow[272] __attribute__((aligned (16)));

u8 MosaicTable[16][256];
u8* CurBGXMosaicTable;
u8* CurOBJXMosaicTable;

u32 NumSprites[4];
u32 NumSpritesPerLayer[4];
u8 SpriteCache[4][128];

bool SkipRendering;
bool _3DSemiTransparencies;
bool SemiTransBitmapSprites;
bool SemiTransTileSprites;

template <bool Enable3DBlend, int SecondSrcBlend>
void ApplyColorEffect();

void PalettiseRange(u32 start);

void InterleaveSprites(u32 prio);

void DoCapture(u32 line, u32 width);

template<u32 bgmode>
void DrawScanlineBGMode(u32 line);
void DrawScanlineBGMode6(u32 line);
void DrawScanlineBGMode7(u32 line);
void DrawScanline_BGOBJ(u32 line);

void DrawBG_3D();
template <bool mosaic>
void DrawBG_Text(u32 line, u32 bgnum);
template <bool mosaic>
void DrawBG_Affine(u32 line, u32 bgnum);
template <bool mosaic>
void DrawBG_Extended(u32 line, u32 bgnum);
template <bool mosaic>
void DrawBG_Large(u32 line);

template <bool window>
void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);
template <bool window>
void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos);
};
#endif

#endif
Loading

0 comments on commit d4b7e5c

Please sign in to comment.