From b75b3f69b783ea7d8919f58d2c86dfa7c9dc6a69 Mon Sep 17 00:00:00 2001
From: Nadia Holmquist Pedersen
Date: Wed, 27 Jan 2021 00:14:24 +0100
Subject: [PATCH 01/18] Don't save the window size to the config if in full
screen (#933)
---
src/frontend/qt_sdl/main.cpp | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp
index 527a507085..a3b0249548 100644
--- a/src/frontend/qt_sdl/main.cpp
+++ b/src/frontend/qt_sdl/main.cpp
@@ -1474,8 +1474,11 @@ void MainWindow::resizeEvent(QResizeEvent* event)
int w = event->size().width();
int h = event->size().height();
- Config::WindowWidth = w;
- Config::WindowHeight = h;
+ if (mainWindow != nullptr && !mainWindow->isFullScreen())
+ {
+ Config::WindowWidth = w;
+ Config::WindowHeight = h;
+ }
// TODO: detect when the window gets maximized!
}
From a3f4aaf50314f6ab528afb8dbcef88cfb012e765 Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Fri, 29 Jan 2021 12:38:18 +0100
Subject: [PATCH 02/18] call glFlush only once that seems to atleast get rid of
the flicker the weird issue that clears don't work is still there
---
src/GPU3D_OpenGL.cpp | 4 ----
src/GPU_OpenGL.cpp | 2 --
src/frontend/qt_sdl/main.cpp | 8 ++++++++
3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp
index 24c2751c97..164f29a5fc 100644
--- a/src/GPU3D_OpenGL.cpp
+++ b/src/GPU3D_OpenGL.cpp
@@ -1145,8 +1145,6 @@ void RenderSceneChunk(int y, int h)
}
}
- glFlush();
-
if (RenderDispCnt & 0x00A0) // fog/edge enabled
{
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
@@ -1204,8 +1202,6 @@ void RenderSceneChunk(int y, int h)
glDrawArrays(GL_TRIANGLES, 0, 2*3);
}
-
- glFlush();
}
}
diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp
index dd28bcdce2..59ced93a6d 100644
--- a/src/GPU_OpenGL.cpp
+++ b/src/GPU_OpenGL.cpp
@@ -226,8 +226,6 @@ void RenderFrame()
glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID);
glBindVertexArray(CompVertexArrayID);
glDrawArrays(GL_TRIANGLES, 0, 4*3);
-
- glFlush();
}
void BindOutputTexture()
diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp
index a3b0249548..240d52e66d 100644
--- a/src/frontend/qt_sdl/main.cpp
+++ b/src/frontend/qt_sdl/main.cpp
@@ -490,6 +490,14 @@ void EmuThread::run()
// emulate
u32 nlines = NDS::RunFrame();
+#ifdef OGLRENDERER_ENABLED
+ // this is hacky but this is the easiest way to call
+ // this function without dealling with a ton of
+ // macro mess
+ if (videoRenderer == 1)
+ epoxy_glFlush();
+#endif
+
#ifdef MELONCAP
MelonCap::Update();
#endif // MELONCAP
From f9e701a719413469be4295909bd020b48873fb25 Mon Sep 17 00:00:00 2001
From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com>
Date: Fri, 29 Jan 2021 16:05:51 +0000
Subject: [PATCH 03/18] Initialise cursor hiding timer before potential
deletion of ScreenPanelGL
---
src/frontend/qt_sdl/main.cpp | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp
index 240d52e66d..6766252d6a 100644
--- a/src/frontend/qt_sdl/main.cpp
+++ b/src/frontend/qt_sdl/main.cpp
@@ -1431,6 +1431,11 @@ void MainWindow::createScreenPanel()
{
panelGL = new ScreenPanelGL(this);
panelGL->show();
+
+ panel = panelGL;
+ panelGL->setMouseTracking(true);
+ mouseTimer = panelGL->setupMouseTimer();
+ connect(mouseTimer, &QTimer::timeout, [=] { if (Config::MouseHide) panelGL->setCursor(Qt::BlankCursor);});
if (!panelGL->isValid())
hasOGL = false;
@@ -1443,14 +1448,6 @@ void MainWindow::createScreenPanel()
if (!hasOGL)
delete panelGL;
-
- if (hasOGL)
- {
- panel = panelGL;
- panelGL->setMouseTracking(true);
- mouseTimer = panelGL->setupMouseTimer();
- connect(mouseTimer, &QTimer::timeout, [=] { if (Config::MouseHide) panelGL->setCursor(Qt::BlankCursor);});
- }
}
if (!hasOGL)
From 0d301c243469baf53344a7610a1c835597c6b3e8 Mon Sep 17 00:00:00 2001
From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com>
Date: Mon, 1 Feb 2021 17:49:37 +0000
Subject: [PATCH 04/18] Remove flatpak from main repo
melonDS is on flathub and the flatpak package is maintained on a seperate repository.
---
flatpak/net.kuribo64.melonDS.yml | 29 -----------------------------
1 file changed, 29 deletions(-)
delete mode 100644 flatpak/net.kuribo64.melonDS.yml
diff --git a/flatpak/net.kuribo64.melonDS.yml b/flatpak/net.kuribo64.melonDS.yml
deleted file mode 100644
index e336990908..0000000000
--- a/flatpak/net.kuribo64.melonDS.yml
+++ /dev/null
@@ -1,29 +0,0 @@
----
-app-id: net.kuribo64.melonDS
-runtime: org.freedesktop.Platform
-runtime-version: '18.08'
-sdk: org.freedesktop.Sdk
-command: melonDS
-finish-args:
- - "--share=ipc"
- - "--socket=x11"
- - "--socket=pulseaudio"
- - "--share=network"
- - "--device=all"
- - "--filesystem=home"
-modules:
- - name: libpcap
- sources:
- - type: archive
- url: http://www.tcpdump.org/release/libpcap-1.9.0.tar.gz
- sha256: 2edb88808e5913fdaa8e9c1fcaf272e19b2485338742b5074b9fe44d68f37019
-
- - name: melonds
- buildsystem: cmake-ninja
- sources:
- - type: git
- url: https://github.com/Arisotura/melonDS.git
- branch: master
- post-install:
- - "desktop-file-install --dir=/app/share/applications net.kuribo64.melonDS.desktop"
- - "install -D icon/melon_256x256.png /app/share/icons/hicolor/256x256/apps/net.kuribo64.melonDS.png"
From b5e601bb88858b124b0cc41a4ee7eb6896dee8d3 Mon Sep 17 00:00:00 2001
From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com>
Date: Tue, 2 Feb 2021 13:29:51 +0000
Subject: [PATCH 05/18] Try to fix Ubuntu AArch64 CI (#979)
Also remove previous fixes, they were fixed upstream
---
.github/workflows/build-ubuntu-aarch64.yml | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/build-ubuntu-aarch64.yml b/.github/workflows/build-ubuntu-aarch64.yml
index 9186263c3f..227785feaa 100644
--- a/.github/workflows/build-ubuntu-aarch64.yml
+++ b/.github/workflows/build-ubuntu-aarch64.yml
@@ -20,11 +20,9 @@ jobs:
- name: Upgrade system
shell: bash
working-directory: ${{runner.workspace}}
- run: | #Fix grub installation error - https://github.com/actions/virtual-environments/issues/1605
+ run: |
sudo apt update
- sudo apt-get install grub-efi
- sudo update-grub
- sudo apt full-upgrade
+ sudo ACCEPT_EULA=Y apt full-upgrade
- name: Install dependencies
shell: bash
working-directory: ${{runner.workspace}}
From 40aae154cf77e0611057a05702f28d9cf17b08f4 Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Tue, 2 Feb 2021 15:31:32 +0100
Subject: [PATCH 06/18] prevent race condition around framebuffers
---
src/GPU.h | 2 +-
src/GPU_OpenGL.cpp | 67 +++++++++++++----------
src/frontend/qt_sdl/main.cpp | 101 +++++++++++++++++++++--------------
src/frontend/qt_sdl/main.h | 4 ++
4 files changed, 106 insertions(+), 68 deletions(-)
diff --git a/src/GPU.h b/src/GPU.h
index 3a254dfb18..2fc15f49de 100644
--- a/src/GPU.h
+++ b/src/GPU.h
@@ -563,7 +563,7 @@ void SetRenderSettings(RenderSettings& settings);
void Stop();
void RenderFrame();
-void BindOutputTexture();
+void BindOutputTexture(int buf);
}
#endif
diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp
index 59ced93a6d..e7ab1f704d 100644
--- a/src/GPU_OpenGL.cpp
+++ b/src/GPU_OpenGL.cpp
@@ -49,8 +49,8 @@ struct CompVertex
CompVertex CompVertices[2 * 3*2];
GLuint CompScreenInputTex;
-GLuint CompScreenOutputTex;
-GLuint CompScreenOutputFB;
+GLuint CompScreenOutputTex[2];
+GLuint CompScreenOutputFB[2];
bool Init()
@@ -118,7 +118,7 @@ bool Init()
glEnableVertexAttribArray(1); // texcoord
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(CompVertex), (void*)(offsetof(CompVertex, Texcoord)));
- glGenFramebuffers(1, &CompScreenOutputFB);
+ glGenFramebuffers(2, CompScreenOutputFB);
glGenTextures(1, &CompScreenInputTex);
glActiveTexture(GL_TEXTURE0);
@@ -129,12 +129,15 @@ bool Init()
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI, 256*3 + 1, 192*2, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, NULL);
- glGenTextures(1, &CompScreenOutputTex);
- glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glGenTextures(2, CompScreenOutputTex);
+ for (int i = 0; i < 2; i++)
+ {
+ glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[i]);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ }
glBindFramebuffer(GL_FRAMEBUFFER, 0);
@@ -143,9 +146,9 @@ bool Init()
void DeInit()
{
- glDeleteFramebuffers(1, &CompScreenOutputFB);
+ glDeleteFramebuffers(2, CompScreenOutputFB);
glDeleteTextures(1, &CompScreenInputTex);
- glDeleteTextures(1, &CompScreenOutputTex);
+ glDeleteTextures(2, CompScreenOutputTex);
glDeleteVertexArrays(1, &CompVertexArrayID);
glDeleteBuffers(1, &CompVertexBufferID);
@@ -167,30 +170,41 @@ void SetRenderSettings(RenderSettings& settings)
ScreenW = 256 * scale;
ScreenH = (384+2) * scale;
- glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
- // fill the padding
- u8 zeroPixels[ScreenW*2*scale*4];
- memset(zeroPixels, 0, sizeof(zeroPixels));
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192*scale, ScreenW, 2*scale, GL_RGBA, GL_UNSIGNED_BYTE, zeroPixels);
-
- GLenum fbassign[] = {GL_COLOR_ATTACHMENT0};
- glBindFramebuffer(GL_FRAMEBUFFER, CompScreenOutputFB);
- glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CompScreenOutputTex, 0);
- glDrawBuffers(1, fbassign);
+ for (int i = 0; i < 2; i++)
+ {
+ glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[i]);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ // fill the padding
+ u8 zeroPixels[ScreenW*2*scale*4];
+ memset(zeroPixels, 0, sizeof(zeroPixels));
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192*scale, ScreenW, 2*scale, GL_RGBA, GL_UNSIGNED_BYTE, zeroPixels);
+
+ GLenum fbassign[] = {GL_COLOR_ATTACHMENT0};
+ glBindFramebuffer(GL_FRAMEBUFFER, CompScreenOutputFB[i]);
+ glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CompScreenOutputTex[i], 0);
+ glDrawBuffers(1, fbassign);
+ }
glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
void Stop()
{
- RenderFrame();
+ for (int i = 0; i < 2; i++)
+ {
+ int frontbuf = GPU::FrontBuffer;
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[frontbuf]);
+
+ glClear(GL_COLOR_BUFFER_BIT);
+ }
}
void RenderFrame()
{
+ int frontbuf = GPU::FrontBuffer;
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[frontbuf]);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
@@ -208,7 +222,6 @@ void RenderFrame()
// TODO: support setting this midframe, if ever needed
glUniform1i(Comp3DXPosLoc[0], ((int)GPU3D::RenderXPos << 23) >> 23);
- int frontbuf = GPU::FrontBuffer;
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, CompScreenInputTex);
@@ -228,9 +241,9 @@ void RenderFrame()
glDrawArrays(GL_TRIANGLES, 0, 4*3);
}
-void BindOutputTexture()
+void BindOutputTexture(int buf)
{
- glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex);
+ glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[buf]);
}
}
diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp
index 6766252d6a..5aa4959ecb 100644
--- a/src/frontend/qt_sdl/main.cpp
+++ b/src/frontend/qt_sdl/main.cpp
@@ -490,13 +490,18 @@ void EmuThread::run()
// emulate
u32 nlines = NDS::RunFrame();
+ FrontBufferLock.lock();
#ifdef OGLRENDERER_ENABLED
- // this is hacky but this is the easiest way to call
- // this function without dealling with a ton of
- // macro mess
if (videoRenderer == 1)
- epoxy_glFlush();
+ {
+ // this is hacky but this is the easiest way to call
+ // this function without dealling with a ton of
+ // macro mess
+ epoxy_glFinish();
+ }
#endif
+ FrontBuffer = GPU::FrontBuffer;
+ FrontBufferLock.unlock();
#ifdef MELONCAP
MelonCap::Update();
@@ -824,11 +829,17 @@ void ScreenPanelNative::paintEvent(QPaintEvent* event)
// fill background
painter.fillRect(event->rect(), QColor::fromRgb(0, 0, 0));
- int frontbuf = GPU::FrontBuffer;
- if (!GPU::Framebuffer[frontbuf][0] || !GPU::Framebuffer[frontbuf][1]) return;
+ emuThread->FrontBufferLock.lock();
+ int frontbuf = emuThread->FrontBuffer;
+ if (!GPU::Framebuffer[frontbuf][0] || !GPU::Framebuffer[frontbuf][1])
+ {
+ emuThread->FrontBufferLock.unlock();
+ return;
+ }
memcpy(screen[0].scanLine(0), GPU::Framebuffer[frontbuf][0], 256*192*4);
memcpy(screen[1].scanLine(0), GPU::Framebuffer[frontbuf][1], 256*192*4);
+ emuThread->FrontBufferLock.unlock();
painter.setRenderHint(QPainter::SmoothPixmapTransform, Config::ScreenFilter!=0);
@@ -988,53 +999,63 @@ void ScreenPanelGL::paintGL()
glViewport(0, 0, w*factor, h*factor);
- screenShader->bind();
+ if (emuThread)
+ {
+ screenShader->bind();
- screenShader->setUniformValue("uScreenSize", (float)w*factor, (float)h*factor);
+ screenShader->setUniformValue("uScreenSize", (float)w*factor, (float)h*factor);
- int frontbuf = GPU::FrontBuffer;
- glActiveTexture(GL_TEXTURE0);
+ emuThread->FrontBufferLock.lock();
+ int frontbuf = emuThread->FrontBuffer;
+ glActiveTexture(GL_TEXTURE0);
-#ifdef OGLRENDERER_ENABLED
- if (GPU::Renderer != 0)
- {
- // hardware-accelerated render
- GPU::GLCompositor::BindOutputTexture();
- }
- else
-#endif
- {
- // regular render
- glBindTexture(GL_TEXTURE_2D, screenTexture);
-
- if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1])
+ #ifdef OGLRENDERER_ENABLED
+ if (GPU::Renderer != 0)
{
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA,
- GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]);
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192+2, 256, 192, GL_RGBA,
- GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]);
+ // hardware-accelerated render
+ GPU::GLCompositor::BindOutputTexture(frontbuf);
}
- }
+ else
+ #endif
+ {
+ // regular render
+ glBindTexture(GL_TEXTURE_2D, screenTexture);
- GLint filter = Config::ScreenFilter ? GL_LINEAR : GL_NEAREST;
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter);
+ if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1])
+ {
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA,
+ GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]);
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192+2, 256, 192, GL_RGBA,
+ GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]);
+ }
+ }
- glBindBuffer(GL_ARRAY_BUFFER, screenVertexBuffer);
- glBindVertexArray(screenVertexArray);
+ GLint filter = Config::ScreenFilter ? GL_LINEAR : GL_NEAREST;
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter);
- GLint transloc = screenShader->uniformLocation("uTransform");
+ glBindBuffer(GL_ARRAY_BUFFER, screenVertexBuffer);
+ glBindVertexArray(screenVertexArray);
- for (int i = 0; i < numScreens; i++)
- {
- glUniformMatrix2x3fv(transloc, 1, GL_TRUE, screenMatrix[i]);
- glDrawArrays(GL_TRIANGLES, screenKind[i] == 0 ? 0 : 2*3, 2*3);
- }
+ GLint transloc = screenShader->uniformLocation("uTransform");
- screenShader->release();
+ for (int i = 0; i < numScreens; i++)
+ {
+ glUniformMatrix2x3fv(transloc, 1, GL_TRUE, screenMatrix[i]);
+ glDrawArrays(GL_TRIANGLES, screenKind[i] == 0 ? 0 : 2*3, 2*3);
+ }
+
+ screenShader->release();
+ }
OSD::Update(this);
OSD::DrawGL(this, w*factor, h*factor);
+
+ if (emuThread)
+ {
+ glFinish();
+ emuThread->FrontBufferLock.unlock();
+ }
}
void ScreenPanelGL::resizeEvent(QResizeEvent* event)
diff --git a/src/frontend/qt_sdl/main.h b/src/frontend/qt_sdl/main.h
index c226fbc07e..9bfcd0a71a 100644
--- a/src/frontend/qt_sdl/main.h
+++ b/src/frontend/qt_sdl/main.h
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
@@ -59,6 +60,9 @@ class EmuThread : public QThread
bool emuIsRunning();
+ int FrontBuffer = 0;
+ QMutex FrontBufferLock;
+
signals:
void windowUpdate();
void windowTitleChange(QString title);
From 2e999ae1b8e88e41fce3b2289372b44344cc79b0 Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Tue, 2 Feb 2021 16:29:23 +0100
Subject: [PATCH 07/18] attempt at fixing #972
---
src/GPU_OpenGL.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp
index e7ab1f704d..8f2d5a138e 100644
--- a/src/GPU_OpenGL.cpp
+++ b/src/GPU_OpenGL.cpp
@@ -198,6 +198,8 @@ void Stop()
glClear(GL_COLOR_BUFFER_BIT);
}
+
+ glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
void RenderFrame()
From 7b9b8418cb91c98525a63fdd1c53089da0d0c758 Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Tue, 2 Feb 2021 20:37:28 +0100
Subject: [PATCH 08/18] fix #978
---
src/GPU2D_Soft.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp
index 53e7b73313..7cab67ad47 100644
--- a/src/GPU2D_Soft.cpp
+++ b/src/GPU2D_Soft.cpp
@@ -2126,7 +2126,7 @@ void GPU2D_Soft::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32
for (; xoff < xend;)
{
- color = objvram[pixelsaddr];
+ color = objvram[pixelsaddr & objvrammask];
pixelsaddr += pixelstride;
From 2502c8d212a9adb1910fb0ccd4ad72acb0231b39 Mon Sep 17 00:00:00 2001
From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com>
Date: Wed, 3 Feb 2021 16:14:53 +0000
Subject: [PATCH 09/18] Add NetBSD support (#985)
Note - This will require PaX MPROTECT to be disabled for melonDS by running:
paxctl +m melonDS
---
src/ARMJIT_Memory.cpp | 4 ++++
src/frontend/qt_sdl/CMakeLists.txt | 5 ++++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp
index 5de185bacf..2ff38f97b5 100644
--- a/src/ARMJIT_Memory.cpp
+++ b/src/ARMJIT_Memory.cpp
@@ -159,6 +159,8 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext)
desc.FaultPC = (u8*)context->uc_mcontext->__ss.__rip;
#elif defined(__FreeBSD__)
desc.FaultPC = (u8*)context->uc_mcontext.mc_rip;
+ #elif defined(__NetBSD__)
+ desc.FaultPC = (u8*)context->uc_mcontext.__gregs[_REG_RIP];
#else
desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP];
#endif
@@ -180,6 +182,8 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext)
context->uc_mcontext->__ss.__rip = (u64)desc.FaultPC;
#elif defined(__FreeBSD__)
context->uc_mcontext.mc_rip = (u64)desc.FaultPC;
+ #elif defined(__NetBSD__)
+ context->uc_mcontext.__gregs[_REG_RIP] = (u64)desc.FaultPC;
#else
context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC;
#endif
diff --git a/src/frontend/qt_sdl/CMakeLists.txt b/src/frontend/qt_sdl/CMakeLists.txt
index 8f48390e68..f61e0caaca 100644
--- a/src/frontend/qt_sdl/CMakeLists.txt
+++ b/src/frontend/qt_sdl/CMakeLists.txt
@@ -92,7 +92,10 @@ endif()
if (UNIX)
option(PORTABLE "Make a portable build that looks for its configuration in the current directory" OFF)
- target_link_libraries(melonDS dl Qt5::Core Qt5::Gui Qt5::Widgets)
+ target_link_libraries(melonDS Qt5::Core Qt5::Gui Qt5::Widgets)
+ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ target_link_libraries(melonDS dl)
+ endif()
elseif (WIN32)
option(PORTABLE "Make a portable build that looks for its configuration in the current directory" ON)
configure_file("${CMAKE_SOURCE_DIR}/melon.rc.in" "${CMAKE_SOURCE_DIR}/melon.rc")
From 1112162e994638f2c0085accc6bba77d52cc27f9 Mon Sep 17 00:00:00 2001
From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com>
Date: Thu, 4 Feb 2021 10:10:49 +0000
Subject: [PATCH 10/18] Add build status badges
---
README.md | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/README.md b/README.md
index 4c7b2c4301..8cefa34cd0 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,11 @@
+
+
+
+
+
DS emulator, sorta
From 6256a42e0099cef9f567148083c55bbb798d8184 Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Tue, 9 Feb 2021 19:24:57 +0100
Subject: [PATCH 11/18] improve and fix NonStupidBitfield also get rid of some
UB fixes optimised lto clang build
---
src/GPU.cpp | 18 +++---
src/GPU2D.cpp | 2 +
src/NonStupidBitfield.h | 130 +++++++++++++++++++++++++---------------
3 files changed, 93 insertions(+), 57 deletions(-)
diff --git a/src/GPU.cpp b/src/GPU.cpp
index e31b2392bd..ab3a5f9360 100644
--- a/src/GPU.cpp
+++ b/src/GPU.cpp
@@ -1187,7 +1187,7 @@ NonStupidBitField VRAMTrackingSet(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
+ result.SetRange(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
banksToBeZeroed |= currentMappings[i];
Mapping[i] = currentMappings[i];
}
@@ -1209,19 +1209,19 @@ NonStupidBitField VRAMTrackingSet> 14)];
- ((u32*)result.Data)[i] |= dirty;
+ result.Data[i / 2] |= (u64)dirty << ((i&1)*32);
}
else if (MappingGranularity == 8*1024)
{
u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)];
- ((u16*)result.Data)[i] |= dirty;
+ result.Data[i / 4] |= (u64)dirty << ((i&3)*16);
}
else if (MappingGranularity == 128*1024)
{
- ((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0];
- ((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1];
- ((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2];
- ((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3];
+ result.Data[i * 4 + 0] |= VRAMDirty[num].Data[0];
+ result.Data[i * 4 + 1] |= VRAMDirty[num].Data[1];
+ result.Data[i * 4 + 2] |= VRAMDirty[num].Data[2];
+ result.Data[i * 4 + 3] |= VRAMDirty[num].Data[3];
}
else
{
@@ -1236,7 +1236,7 @@ NonStupidBitField VRAMTrackingSet& writtenFlags)
mapping &= ~(1 << num);
}
}
- memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data));
+ writtenFlags.Clear();
}
void SyncDirtyFlags()
diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp
index fa05e79548..cbe09d6c39 100644
--- a/src/GPU2D.cpp
+++ b/src/GPU2D.cpp
@@ -117,6 +117,7 @@ void GPU2D::Reset()
BGMosaicYMax = 0;
OBJMosaicY = 0;
OBJMosaicYMax = 0;
+ OBJMosaicYCount = 0;
BlendCnt = 0;
EVA = 16;
@@ -130,6 +131,7 @@ void GPU2D::Reset()
memset(DispFIFOBuffer, 0, 256*2);
CaptureCnt = 0;
+ CaptureLatch = false;
MasterBrightness = 0;
diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h
index 22e13a96f4..8e87c3c007 100644
--- a/src/NonStupidBitfield.h
+++ b/src/NonStupidBitfield.h
@@ -14,9 +14,8 @@
template
struct NonStupidBitField
{
- static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8");
- static const u32 DataLength = Size / 8;
- u8 Data[DataLength];
+ static constexpr u32 DataLength = (Size + 0x3F) >> 6;
+ u64 Data[DataLength];
struct Ref
{
@@ -25,13 +24,13 @@ struct NonStupidBitField
operator bool()
{
- return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7));
+ return BitField.Data[Idx >> 6] & (1ULL << (Idx & 0x3F));
}
Ref& operator=(bool set)
{
- BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7));
- BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7));
+ BitField.Data[Idx >> 6] &= ~(1ULL << (Idx & 0x3F));
+ BitField.Data[Idx >> 6] |= ((u64)set << (Idx & 0x3F));
return *this;
}
};
@@ -43,27 +42,40 @@ struct NonStupidBitField
u32 BitIdx;
u64 RemainingBits;
- u32 operator*() { return DataIdx * 8 + BitIdx; }
+ u32 operator*() { return DataIdx * 64 + BitIdx; }
- bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; }
- bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; }
+ bool operator==(const Iterator& other)
+ {
+ return other.DataIdx == DataIdx;
+ }
+ bool operator!=(const Iterator& other)
+ {
+ return other.DataIdx != DataIdx;
+ }
- template
void Next()
{
- if (DataIdx >= DataLength)
- return;
-
- while (RemainingBits == 0)
+ if (RemainingBits == 0)
{
- DataIdx += sizeof(T);
- if (DataIdx >= DataLength)
- return;
- RemainingBits = *(T*)&BitField.Data[DataIdx];
+ for (u32 i = DataIdx + 1; i < DataLength; i++)
+ {
+ if (BitField.Data[i])
+ {
+ DataIdx = i;
+ RemainingBits = BitField.Data[i];
+ goto done;
+ }
+ }
+ DataIdx = DataLength;
+ return;
+ done:;
}
BitIdx = __builtin_ctzll(RemainingBits);
RemainingBits &= ~(1ULL << BitIdx);
+
+ if ((Size & 0x3F) && BitIdx >= Size)
+ DataIdx = DataLength;
}
Iterator operator++(int)
@@ -75,40 +87,35 @@ struct NonStupidBitField
Iterator& operator++()
{
- if ((DataLength % 8) == 0)
- Next();
- else if ((DataLength % 4) == 0)
- Next();
- else if ((DataLength % 2) == 0)
- Next();
- else
- Next();
-
+ Next();
return *this;
}
};
- NonStupidBitField(u32 start, u32 size)
+ NonStupidBitField(u32 startBit, u32 bitsCount)
{
- memset(Data, 0, sizeof(Data));
+ Clear();
- if (size == 0)
+ if (bitsCount == 0)
return;
- u32 roundedStartBit = (start + 7) & ~7;
- u32 roundedEndBit = (start + size) & ~7;
- if (roundedStartBit != roundedEndBit)
- memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8);
-
- if (start & 0x7)
- Data[start >> 3] = 0xFF << (start & 0x7);
- if ((start + size) & 0x7)
- Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7);
+ SetRange(startBit, bitsCount);
+ /*for (int i = 0; i < Size; i++)
+ {
+ bool state = (*this)[i];
+ if (state != (i >= startBit && i < startBit + bitsCount))
+ {
+ for (u32 j = 0; j < DataLength; j++)
+ printf("data %016lx\n", Data[j]);
+ printf("blarg %d %d %d %d\n", i, startBit, bitsCount, Size);
+ abort();
+ }
+ }*/
}
NonStupidBitField()
{
- memset(Data, 0, sizeof(Data));
+ Clear();
}
Iterator End()
@@ -117,14 +124,20 @@ struct NonStupidBitField
}
Iterator Begin()
{
- if ((DataLength % 8) == 0)
- return ++Iterator{*this, 0, 0, *(u64*)Data};
- else if ((DataLength % 4) == 0)
- return ++Iterator{*this, 0, 0, *(u32*)Data};
- else if ((DataLength % 2) == 0)
- return ++Iterator{*this, 0, 0, *(u16*)Data};
- else
- return ++Iterator{*this, 0, 0, *Data};
+ for (u32 i = 0; i < DataLength; i++)
+ {
+ u32 idx = __builtin_ctzll(Data[i]);
+ if (Data[i] && idx + i * 64 < Size)
+ {
+ return {*this, i, idx, Data[i] & ~(1ULL << idx)};
+ }
+ }
+ return End();
+ }
+
+ void Clear()
+ {
+ memset(Data, 0, sizeof(Data));
}
Ref operator[](u32 idx)
@@ -132,6 +145,27 @@ struct NonStupidBitField
return Ref{*this, idx};
}
+ void SetRange(u32 startBit, u32 bitsCount)
+ {
+ u32 startEntry = startBit >> 6;
+ u64 entriesCount = ((startBit + bitsCount + 0x3F & ~0x3F) >> 6) - startEntry;
+
+ if (entriesCount > 1)
+ {
+ Data[startEntry] |= 0xFFFFFFFFFFFFFFFF << (startBit & 0x3F);
+ if ((startBit + bitsCount) & 0x3F)
+ Data[startEntry + entriesCount - 1] |= ~(0xFFFFFFFFFFFFFFFF << ((startBit + bitsCount) & 0x3F));
+ else
+ Data[startEntry + entriesCount - 1] = 0xFFFFFFFFFFFFFFFF;
+ for (int i = startEntry + 1; i < startEntry + entriesCount - 1; i++)
+ Data[i] = 0xFFFFFFFFFFFFFFFF;
+ }
+ else
+ {
+ Data[startEntry] |= ((1ULL << bitsCount) - 1) << (startBit & 0x3F);
+ }
+ }
+
NonStupidBitField& operator|=(const NonStupidBitField& other)
{
for (u32 i = 0; i < DataLength; i++)
From e7ee3b7bc84e583fbfbe7c9896db53d3e808edcc Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Tue, 9 Feb 2021 22:19:44 +0100
Subject: [PATCH 12/18] wild shot into the dark
---
src/ARMJIT_Memory.cpp | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp
index 2ff38f97b5..5a011f3d52 100644
--- a/src/ARMJIT_Memory.cpp
+++ b/src/ARMJIT_Memory.cpp
@@ -711,16 +711,11 @@ void Init()
MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, MemoryTotalSize, NULL);
- MemoryBase = (u8*)VirtualAlloc(NULL, MemoryTotalSize, MEM_RESERVE, PAGE_READWRITE);
-
- FastMem9Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE);
- FastMem7Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE);
-
- // only free them after they have all been reserved
- // so they can't overlap
+ MemoryBase = (u8*)VirtualAlloc(NULL, AddrSpaceSize*4, MEM_RESERVE, PAGE_READWRITE);
VirtualFree(MemoryBase, 0, MEM_RELEASE);
- VirtualFree(FastMem9Start, 0, MEM_RELEASE);
- VirtualFree(FastMem7Start, 0, MEM_RELEASE);
+ FastMem9Start = MemoryBase;
+ FastMem7Start = MemoryBase + AddrSpaceSize;
+ MemoryBase = MemoryBase + AddrSpaceSize*2;
MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase);
From 891427c75c6c617bf61b2e7f2a3f0d79872f7f3c Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Tue, 9 Feb 2021 23:36:46 +0100
Subject: [PATCH 13/18] fix #994
---
src/ARMJIT_Memory.cpp | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp
index 5a011f3d52..063437e55f 100644
--- a/src/ARMJIT_Memory.cpp
+++ b/src/ARMJIT_Memory.cpp
@@ -713,9 +713,15 @@ void Init()
MemoryBase = (u8*)VirtualAlloc(NULL, AddrSpaceSize*4, MEM_RESERVE, PAGE_READWRITE);
VirtualFree(MemoryBase, 0, MEM_RELEASE);
- FastMem9Start = MemoryBase;
- FastMem7Start = MemoryBase + AddrSpaceSize;
- MemoryBase = MemoryBase + AddrSpaceSize*2;
+ // this is incredible hacky
+ // but someone else is trying to go into our address space!
+ // Windows will very likely give them virtual memory starting at the same address
+ // as it is giving us now.
+ // That's why we don't use this address, but instead 4gb inwards
+ // I know this is terrible
+ FastMem9Start = MemoryBase + AddrSpaceSize;
+ FastMem7Start = MemoryBase + AddrSpaceSize*2;
+ MemoryBase = MemoryBase + AddrSpaceSize*3;
MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase);
From a7029aebae2d09c2dd666a5832a90e227305bab1 Mon Sep 17 00:00:00 2001
From: Wunk
Date: Tue, 9 Feb 2021 14:38:51 -0800
Subject: [PATCH 14/18] Allow for a more modular renderer backends (#990)
* Draft GPU3D renderer modularization
* Update sources C++ standard to C++17
The top-level `CMakeLists.txt` is already using the C++17 standard.
* Move GLCompositor into class type
Some other misc fixes to push towards better modularity
* Make renderer-implementation types move-only
These types are going to be holding onto handles
of GPU-side resources and shouldn't ever be copied around.
* Fix OSX: Remove 'register' storage class specifier
`register` has been removed in C++17...
But this keyword hasn't done anything in years anyways.
OSX builds consider this "warning" an error and it
stops the whole build.
* Add RestartFrame to Renderer3D interface
* Move Accelerated property to Renderer3D interface
There are points in the code base where we do:
`renderer != 0` to know if we are feeding
an openGL renderer. Rather than that we can instead just have this be
a property of the renderer itself.
With this pattern a renderer can just say how it wants its data to come
in rather than have everyone know that they're talking to an OpenGL
renderer.
* Remove Accelerated flag from GPU
* Move 2D_Soft interface in separate header
Also make the current 2D engine an "owned" unique_ptr.
* Update alignment attribute to standard alignas
Uses standardized `alignas` rather than compiler-specific
attributes.
https://en.cppreference.com/w/cpp/language/alignas
* Fix Clang: alignas specifier
Alignment must be specified before the array to align the entire array.
https://en.cppreference.com/w/cpp/language/alignas
* Converted Renderer3D Accelerated to variable
This flag is checked a lot during scanline rasterization. So rather
than having an expensive vtable-lookup call during mainline rendering
code, it is now a public constant bool type that is written to only once
during Renderer3D initialization.
---
src/CMakeLists.txt | 2 +-
src/GPU.cpp | 101 ++++---
src/GPU.h | 32 +--
src/GPU2D.h | 69 +----
src/GPU2D_Soft.cpp | 39 ++-
src/GPU2D_Soft.h | 79 ++++++
src/GPU3D.cpp | 22 +-
src/GPU3D.h | 55 ++--
src/GPU3D_OpenGL.cpp | 143 ++--------
src/GPU3D_OpenGL.h | 152 ++++++++++
src/GPU3D_Soft.cpp | 531 +++--------------------------------
src/GPU3D_Soft.h | 516 ++++++++++++++++++++++++++++++++++
src/GPU_OpenGL.cpp | 48 +---
src/GPU_OpenGL.h | 68 +++++
src/NDS.cpp | 2 +-
src/frontend/qt_sdl/main.cpp | 12 +-
16 files changed, 1037 insertions(+), 834 deletions(-)
create mode 100644 src/GPU2D_Soft.h
create mode 100644 src/GPU3D_OpenGL.h
create mode 100644 src/GPU3D_Soft.h
create mode 100644 src/GPU_OpenGL.h
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3bcecbcd55..dc32b2aa0d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,6 +1,6 @@
project(core)
-set (CMAKE_CXX_STANDARD 14)
+set (CMAKE_CXX_STANDARD 17)
add_library(core STATIC
ARCodeFile.cpp
diff --git a/src/GPU.cpp b/src/GPU.cpp
index ab3a5f9360..d5465bb776 100644
--- a/src/GPU.cpp
+++ b/src/GPU.cpp
@@ -21,6 +21,7 @@
#include "NDS.h"
#include "GPU.h"
+#include "GPU2D_Soft.h"
namespace GPU
{
@@ -79,11 +80,10 @@ u8* VRAMPtr_BOBJ[0x8];
int FrontBuffer;
u32* Framebuffer[2][2];
-int Renderer;
-bool Accelerated;
+int Renderer = 0;
-GPU2D* GPU2D_A;
-GPU2D* GPU2D_B;
+std::unique_ptr GPU2D_A = {};
+std::unique_ptr GPU2D_B = {};
/*
VRAM invalidation tracking
@@ -145,25 +145,28 @@ u8 VRAMFlat_TexPal[128*1024];
u32 OAMDirty;
u32 PaletteDirty;
+#ifdef OGLRENDERER_ENABLED
+std::unique_ptr CurGLCompositor = {};
+#endif
+
bool Init()
{
- GPU2D_A = new GPU2D_Soft(0);
- GPU2D_B = new GPU2D_Soft(1);
+ GPU2D_A = std::make_unique(0);
+ GPU2D_B = std::make_unique(1);
if (!GPU3D::Init()) return false;
FrontBuffer = 0;
Framebuffer[0][0] = NULL; Framebuffer[0][1] = NULL;
Framebuffer[1][0] = NULL; Framebuffer[1][1] = NULL;
Renderer = 0;
- Accelerated = false;
return true;
}
void DeInit()
{
- delete GPU2D_A;
- delete GPU2D_B;
+ GPU2D_A.reset();
+ GPU2D_B.reset();
GPU3D::DeInit();
if (Framebuffer[0][0]) delete[] Framebuffer[0][0];
@@ -250,9 +253,12 @@ void Reset()
memset(VRAMPtr_BBG, 0, sizeof(VRAMPtr_BBG));
memset(VRAMPtr_BOBJ, 0, sizeof(VRAMPtr_BOBJ));
- int fbsize;
- if (Accelerated) fbsize = (256*3 + 1) * 192;
- else fbsize = 256 * 192;
+ size_t fbsize;
+ if (GPU3D::CurrentRenderer->Accelerated)
+ fbsize = (256*3 + 1) * 192;
+ else
+ fbsize = 256 * 192;
+
for (int i = 0; i < fbsize; i++)
{
Framebuffer[0][0][i] = 0xFFFFFFFF;
@@ -283,17 +289,22 @@ void Reset()
void Stop()
{
int fbsize;
- if (Accelerated) fbsize = (256*3 + 1) * 192;
- else fbsize = 256 * 192;
+ if (GPU3D::CurrentRenderer->Accelerated)
+ fbsize = (256*3 + 1) * 192;
+ else
+ fbsize = 256 * 192;
+
memset(Framebuffer[0][0], 0, fbsize*4);
memset(Framebuffer[0][1], 0, fbsize*4);
memset(Framebuffer[1][0], 0, fbsize*4);
memset(Framebuffer[1][1], 0, fbsize*4);
#ifdef OGLRENDERER_ENABLED
- if (Accelerated)
- GLCompositor::Stop();
-#endif
+ // This needs a better way to know that we're
+ // using the OpenGL renderer specifically
+ if (GPU3D::CurrentRenderer->Accelerated)
+ CurGLCompositor->Stop();
+#endif
}
void DoSavestate(Savestate* file)
@@ -382,37 +393,42 @@ void InitRenderer(int renderer)
#ifdef OGLRENDERER_ENABLED
if (renderer == 1)
{
- if (!GLCompositor::Init())
+ CurGLCompositor = std::make_unique();
+ // Create opengl rendrerer
+ if (!CurGLCompositor->Init())
{
+ // Fallback on software renderer
renderer = 0;
+ GPU3D::CurrentRenderer = std::make_unique();
+ GPU3D::CurrentRenderer->Init();
}
- else if (!GPU3D::GLRenderer::Init())
+ GPU3D::CurrentRenderer = std::make_unique();
+ if (!GPU3D::CurrentRenderer->Init())
{
- GLCompositor::DeInit();
+ // Fallback on software renderer
+ CurGLCompositor->DeInit();
+ CurGLCompositor.reset();
renderer = 0;
+ GPU3D::CurrentRenderer = std::make_unique();
}
}
else
#endif
{
- GPU3D::SoftRenderer::Init();
+ GPU3D::CurrentRenderer = std::make_unique();
+ GPU3D::CurrentRenderer->Init();
}
Renderer = renderer;
- Accelerated = renderer != 0;
}
void DeInitRenderer()
{
- if (Renderer == 0)
- {
- GPU3D::SoftRenderer::DeInit();
- }
+ GPU3D::CurrentRenderer->DeInit();
#ifdef OGLRENDERER_ENABLED
- else
+ if (Renderer == 1)
{
- GPU3D::GLRenderer::DeInit();
- GLCompositor::DeInit();
+ CurGLCompositor->DeInit();
}
#endif
}
@@ -421,13 +437,13 @@ void ResetRenderer()
{
if (Renderer == 0)
{
- GPU3D::SoftRenderer::Reset();
+ GPU3D::CurrentRenderer->Reset();
}
#ifdef OGLRENDERER_ENABLED
else
{
- GLCompositor::Reset();
- GPU3D::GLRenderer::Reset();
+ CurGLCompositor->Reset();
+ GPU3D::CurrentRenderer->Reset();
}
#endif
}
@@ -440,10 +456,12 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
InitRenderer(renderer);
}
- bool accel = Accelerated;
int fbsize;
- if (accel) fbsize = (256*3 + 1) * 192;
- else fbsize = 256 * 192;
+ if (GPU3D::CurrentRenderer->Accelerated)
+ fbsize = (256*3 + 1) * 192;
+ else
+ fbsize = 256 * 192;
+
if (Framebuffer[0][0]) { delete[] Framebuffer[0][0]; Framebuffer[0][0] = nullptr; }
if (Framebuffer[1][0]) { delete[] Framebuffer[1][0]; Framebuffer[1][0] = nullptr; }
if (Framebuffer[0][1]) { delete[] Framebuffer[0][1]; Framebuffer[0][1] = nullptr; }
@@ -461,18 +479,15 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
AssignFramebuffers();
- GPU2D_A->SetRenderSettings(accel);
- GPU2D_B->SetRenderSettings(accel);
-
if (Renderer == 0)
{
- GPU3D::SoftRenderer::SetRenderSettings(settings);
+ GPU3D::CurrentRenderer->SetRenderSettings(settings);
}
#ifdef OGLRENDERER_ENABLED
else
{
- GLCompositor::SetRenderSettings(settings);
- GPU3D::GLRenderer::SetRenderSettings(settings);
+ CurGLCompositor->SetRenderSettings(settings);
+ GPU3D::CurrentRenderer->SetRenderSettings(settings);
}
#endif
}
@@ -1149,7 +1164,9 @@ void StartScanline(u32 line)
GPU3D::VBlank();
#ifdef OGLRENDERER_ENABLED
- if (Accelerated) GLCompositor::RenderFrame();
+ // Need a better way to identify the openGL renderer in particular
+ if (GPU3D::CurrentRenderer->Accelerated)
+ CurGLCompositor->RenderFrame();
#endif
}
}
diff --git a/src/GPU.h b/src/GPU.h
index 2fc15f49de..1e24051de5 100644
--- a/src/GPU.h
+++ b/src/GPU.h
@@ -19,9 +19,15 @@
#ifndef GPU_H
#define GPU_H
+#include
+
#include "GPU2D.h"
#include "NonStupidBitfield.h"
+#ifdef OGLRENDERER_ENABLED
+#include "GPU_OpenGL.h"
+#endif
+
namespace GPU
{
@@ -69,8 +75,8 @@ extern u8* VRAMPtr_BOBJ[0x8];
extern int FrontBuffer;
extern u32* Framebuffer[2][2];
-extern GPU2D* GPU2D_A;
-extern GPU2D* GPU2D_B;
+extern std::unique_ptr GPU2D_A;
+extern std::unique_ptr GPU2D_B;
extern int Renderer;
@@ -149,6 +155,10 @@ void SyncDirtyFlags();
extern u32 OAMDirty;
extern u32 PaletteDirty;
+#ifdef OGLRENDERER_ENABLED
+extern std::unique_ptr CurGLCompositor;
+#endif
+
struct RenderSettings
{
bool Soft_Threaded;
@@ -550,24 +560,6 @@ void DisplayFIFO(u32 x);
void SetDispStat(u32 cpu, u16 val);
void SetVCount(u16 val);
-
-#ifdef OGLRENDERER_ENABLED
-namespace GLCompositor
-{
-
-bool Init();
-void DeInit();
-void Reset();
-
-void SetRenderSettings(RenderSettings& settings);
-
-void Stop();
-void RenderFrame();
-void BindOutputTexture(int buf);
-
-}
-#endif
-
}
#include "GPU3D.h"
diff --git a/src/GPU2D.h b/src/GPU2D.h
index 0f59ae36b3..e9ce8e1e8b 100644
--- a/src/GPU2D.h
+++ b/src/GPU2D.h
@@ -28,13 +28,15 @@ class GPU2D
GPU2D(u32 num);
virtual ~GPU2D() {}
+ GPU2D(const GPU2D&) = delete;
+ GPU2D& operator=(const GPU2D&) = delete;
+
void Reset();
void DoSavestate(Savestate* file);
void SetEnabled(bool enable) { Enabled = enable; }
void SetFramebuffer(u32* buf);
- virtual void SetRenderSettings(bool accel) = 0;
u8 Read8(u32 addr);
u16 Read16(u32 addr);
@@ -115,8 +117,8 @@ class GPU2D
u16 MasterBrightness;
- u8 WindowMask[256] __attribute__((aligned (8)));
- u8 OBJWindow[256] __attribute__((aligned (8)));
+ alignas(8) u8 WindowMask[256];
+ alignas(8) u8 OBJWindow[256];
void UpdateMosaicCounters(u32 line);
void CalculateWindowMask(u32 line);
@@ -124,65 +126,4 @@ class GPU2D
virtual void MosaicXSizeChanged() = 0;
};
-class GPU2D_Soft : public GPU2D
-{
-public:
- GPU2D_Soft(u32 num);
- ~GPU2D_Soft() override {}
-
- void SetRenderSettings(bool accel) override;
-
- void DrawScanline(u32 line) override;
- void DrawSprites(u32 line) override;
- void VBlankEnd() override;
-
-protected:
- void MosaicXSizeChanged() override;
-
-private:
- bool Accelerated;
-
- u32 BGOBJLine[256*3] __attribute__((aligned (8)));
- u32* _3DLine;
-
- u32 OBJLine[256] __attribute__((aligned (8)));
- u8 OBJIndex[256] __attribute__((aligned (8)));
-
- u32 NumSprites;
-
- u8 MosaicTable[16][256];
- u8* CurBGXMosaicTable;
- u8* CurOBJXMosaicTable;
-
- u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
- u32 ColorBlend5(u32 val1, u32 val2);
- u32 ColorBrightnessUp(u32 val, u32 factor);
- u32 ColorBrightnessDown(u32 val, u32 factor);
- u32 ColorComposite(int i, u32 val1, u32 val2);
-
- template void DrawScanlineBGMode(u32 line);
- void DrawScanlineBGMode6(u32 line);
- void DrawScanlineBGMode7(u32 line);
- void DrawScanline_BGOBJ(u32 line);
-
- static void DrawPixel_Normal(u32* dst, u16 color, u32 flag);
- static void DrawPixel_Accel(u32* dst, u16 color, u32 flag);
-
- typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag);
-
- void DrawBG_3D();
- template void DrawBG_Text(u32 line, u32 bgnum);
- template void DrawBG_Affine(u32 line, u32 bgnum);
- template void DrawBG_Extended(u32 line, u32 bgnum);
- template void DrawBG_Large(u32 line);
-
- void ApplySpriteMosaicX();
- template
- void InterleaveSprites(u32 prio);
- template void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos);
- template void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);
-
- void DoCapture(u32 line, u32 width);
-};
-
#endif
diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp
index 7cab67ad47..e455b7ca1c 100644
--- a/src/GPU2D_Soft.cpp
+++ b/src/GPU2D_Soft.cpp
@@ -1,4 +1,4 @@
-#include "GPU2D.h"
+#include "GPU2D_Soft.h"
#include "GPU.h"
GPU2D_Soft::GPU2D_Soft(u32 num)
@@ -15,11 +15,6 @@ GPU2D_Soft::GPU2D_Soft(u32 num)
}
}
-void GPU2D_Soft::SetRenderSettings(bool accel)
-{
- Accelerated = accel;
-}
-
u32 GPU2D_Soft::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb)
{
u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4;
@@ -152,7 +147,7 @@ u32 GPU2D_Soft::ColorComposite(int i, u32 val1, u32 val2)
void GPU2D_Soft::DrawScanline(u32 line)
{
- int stride = Accelerated ? (256*3 + 1) : 256;
+ int stride = GPU3D::CurrentRenderer->Accelerated ? (256*3 + 1) : 256;
u32* dst = &Framebuffer[stride * line];
int n3dline = line;
@@ -192,7 +187,7 @@ void GPU2D_Soft::DrawScanline(u32 line)
if (Num == 0)
{
- if (!Accelerated)
+ if (!GPU3D::CurrentRenderer->Accelerated)
_3DLine = GPU3D::GetLine(n3dline);
else if (CaptureLatch && (((CaptureCnt >> 29) & 0x3) != 1))
{
@@ -206,7 +201,7 @@ void GPU2D_Soft::DrawScanline(u32 line)
for (int i = 0; i < 256; i++)
dst[i] = 0xFFFFFFFF;
- if (Accelerated)
+ if (GPU3D::CurrentRenderer->Accelerated)
{
dst[256*3] = 0;
}
@@ -296,7 +291,7 @@ void GPU2D_Soft::DrawScanline(u32 line)
DoCapture(line, capwidth);
}
- if (Accelerated)
+ if (GPU3D::CurrentRenderer->Accelerated)
{
dst[256*3] = MasterBrightness | (DispCnt & 0x30000);
return;
@@ -350,11 +345,11 @@ void GPU2D_Soft::VBlankEnd()
GPU2D::VBlankEnd();
#ifdef OGLRENDERER_ENABLED
- if (Accelerated)
+ if (GPU3D::CurrentRenderer->Accelerated)
{
if ((Num == 0) && (CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1))
{
- GPU3D::GLRenderer::PrepareCaptureFrame();
+ reinterpret_cast(GPU3D::CurrentRenderer.get())->PrepareCaptureFrame();
}
}
#endif
@@ -372,7 +367,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width)
u16* dst = (u16*)GPU::VRAM[dstvram];
u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width);
- // TODO: handle 3D in accelerated mode!!
+ // TODO: handle 3D in GPU3D::CurrentRenderer->Accelerated mode!!
u32* srcA;
if (CaptureCnt & (1<<24))
@@ -382,9 +377,9 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width)
else
{
srcA = BGOBJLine;
- if (Accelerated)
+ if (GPU3D::CurrentRenderer->Accelerated)
{
- // in accelerated mode, compositing is normally done on the GPU
+ // in GPU3D::CurrentRenderer->Accelerated mode, compositing is normally done on the GPU
// but when doing display capture, we do need the composited output
// so we do it here
@@ -586,12 +581,12 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width)
{ \
if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \
{ \
- if (Accelerated) DrawBG_##type(line, num); \
+ if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type(line, num); \
else DrawBG_##type(line, num); \
} \
else \
{ \
- if (Accelerated) DrawBG_##type(line, num); \
+ if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type(line, num); \
else DrawBG_##type(line, num); \
} \
} while (false)
@@ -601,18 +596,18 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width)
{ \
if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \
{ \
- if (Accelerated) DrawBG_Large(line); \
+ if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large(line); \
else DrawBG_Large(line); \
} \
else \
{ \
- if (Accelerated) DrawBG_Large(line); \
+ if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large(line); \
else DrawBG_Large(line); \
} \
} while (false)
#define DoInterleaveSprites(prio) \
- if (Accelerated) InterleaveSprites(prio); else InterleaveSprites(prio);
+ if (GPU3D::CurrentRenderer->Accelerated) InterleaveSprites(prio); else InterleaveSprites(prio);
template
void GPU2D_Soft::DrawScanlineBGMode(u32 line)
@@ -773,7 +768,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line)
// color special effects
// can likely be optimized
- if (!Accelerated)
+ if (!GPU3D::CurrentRenderer->Accelerated)
{
for (int i = 0; i < 256; i++)
{
@@ -919,7 +914,7 @@ void GPU2D_Soft::DrawBG_3D()
{
int i = 0;
- if (Accelerated)
+ if (GPU3D::CurrentRenderer->Accelerated)
{
for (i = 0; i < 256; i++)
{
diff --git a/src/GPU2D_Soft.h b/src/GPU2D_Soft.h
new file mode 100644
index 0000000000..754f08a781
--- /dev/null
+++ b/src/GPU2D_Soft.h
@@ -0,0 +1,79 @@
+/*
+ Copyright 2016-2020 Arisotura
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#pragma once
+
+#include "GPU2D.h"
+
+class GPU2D_Soft : public GPU2D
+{
+public:
+ GPU2D_Soft(u32 num);
+ ~GPU2D_Soft() override {}
+
+ void DrawScanline(u32 line) override;
+ void DrawSprites(u32 line) override;
+ void VBlankEnd() override;
+
+protected:
+ void MosaicXSizeChanged() override;
+
+private:
+
+ alignas(8) u32 BGOBJLine[256*3];
+ u32* _3DLine;
+
+ alignas(8) u32 OBJLine[256];
+ alignas(8) u8 OBJIndex[256];
+
+ u32 NumSprites;
+
+ u8 MosaicTable[16][256];
+ u8* CurBGXMosaicTable;
+ u8* CurOBJXMosaicTable;
+
+ u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
+ u32 ColorBlend5(u32 val1, u32 val2);
+ u32 ColorBrightnessUp(u32 val, u32 factor);
+ u32 ColorBrightnessDown(u32 val, u32 factor);
+ u32 ColorComposite(int i, u32 val1, u32 val2);
+
+ template void DrawScanlineBGMode(u32 line);
+ void DrawScanlineBGMode6(u32 line);
+ void DrawScanlineBGMode7(u32 line);
+ void DrawScanline_BGOBJ(u32 line);
+
+ static void DrawPixel_Normal(u32* dst, u16 color, u32 flag);
+ static void DrawPixel_Accel(u32* dst, u16 color, u32 flag);
+
+ typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag);
+
+ void DrawBG_3D();
+ template void DrawBG_Text(u32 line, u32 bgnum);
+ template void DrawBG_Affine(u32 line, u32 bgnum);
+ template void DrawBG_Extended(u32 line, u32 bgnum);
+ template void DrawBG_Large(u32 line);
+
+ void ApplySpriteMosaicX();
+ template
+ void InterleaveSprites(u32 prio);
+ template void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos);
+ template void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);
+
+ void DoCapture(u32 line, u32 width);
+};
\ No newline at end of file
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index 9b418300bf..c933c829e0 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -273,7 +273,7 @@ u32 RenderNumPolygons;
u32 FlushRequest;
u32 FlushAttributes;
-
+std::unique_ptr CurrentRenderer = {};
bool Init()
{
@@ -2497,12 +2497,12 @@ void CheckFIFODMA()
void VCount144()
{
- if (GPU::Renderer == 0) SoftRenderer::VCount144();
+ CurrentRenderer->VCount144();
}
void RestartFrame()
{
- if (GPU::Renderer == 0) SoftRenderer::SetupRenderThread();
+ CurrentRenderer->RestartFrame();
}
@@ -2597,10 +2597,7 @@ void VBlank()
void VCount215()
{
- if (GPU::Renderer == 0) SoftRenderer::RenderFrame();
-#ifdef OGLRENDERER_ENABLED
- else GLRenderer::RenderFrame();
-#endif
+ CurrentRenderer->RenderFrame();
}
void SetRenderXPos(u16 xpos)
@@ -2614,12 +2611,7 @@ u32 ScrolledLine[256];
u32* GetLine(int line)
{
- u32* rawline = NULL;
-
- if (GPU::Renderer == 0) rawline = SoftRenderer::GetLine(line);
-#ifdef OGLRENDERER_ENABLED
- else rawline = GLRenderer::GetLine(line);
-#endif
+ u32* rawline = CurrentRenderer->GetLine(line);
if (RenderXPos == 0) return rawline;
@@ -3055,5 +3047,9 @@ void Write32(u32 addr, u32 val)
printf("unknown GPU3D write32 %08X %08X\n", addr, val);
}
+Renderer3D::Renderer3D(bool Accelerated)
+: Accelerated(Accelerated)
+{ }
+
}
diff --git a/src/GPU3D.h b/src/GPU3D.h
index e4629b04ac..1aba0bdd0e 100644
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@@ -20,6 +20,9 @@
#define GPU3D_H
#include
+#include
+
+#include "GPU.h"
#include "Savestate.h"
namespace GPU3D
@@ -96,8 +99,6 @@ extern u32 RenderNumPolygons;
extern u64 Timestamp;
-extern int Renderer;
-
bool Init();
void DeInit();
void Reset();
@@ -131,40 +132,42 @@ void Write8(u32 addr, u8 val);
void Write16(u32 addr, u16 val);
void Write32(u32 addr, u32 val);
-namespace SoftRenderer
+class Renderer3D
{
+public:
+ Renderer3D(bool Accelerated);
+ virtual ~Renderer3D() {};
-bool Init();
-void DeInit();
-void Reset();
+ Renderer3D(const Renderer3D&) = delete;
+ Renderer3D& operator=(const Renderer3D&) = delete;
-void SetRenderSettings(GPU::RenderSettings& settings);
-void SetupRenderThread();
+ virtual bool Init() = 0;
+ virtual void DeInit() = 0;
+ virtual void Reset() = 0;
-void VCount144();
-void RenderFrame();
-u32* GetLine(int line);
+ // This "Accelerated" flag currently communicates if the framebuffer should
+ // be allocated differently and other little misc handlers. Ideally there
+ // are more detailed "traits" that we can ask of the Renderer3D type
+ const bool Accelerated;
-}
+ virtual void SetRenderSettings(GPU::RenderSettings& settings) = 0;
-#ifdef OGLRENDERER_ENABLED
-namespace GLRenderer
-{
+ virtual void VCount144() {};
-bool Init();
-void DeInit();
-void Reset();
-
-void SetRenderSettings(GPU::RenderSettings& settings);
+ virtual void RenderFrame() = 0;
+ virtual void RestartFrame() {};
+ virtual u32* GetLine(int line) = 0;
+};
-void RenderFrame();
-void PrepareCaptureFrame();
-u32* GetLine(int line);
-void SetupAccelFrame();
+extern int Renderer;
+extern std::unique_ptr CurrentRenderer;
}
-#endif
-}
+#include "GPU3D_Soft.h"
+
+#ifdef OGLRENDERER_ENABLED
+#include "GPU3D_OpenGL.h"
+#endif
#endif
diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp
index 164f29a5fc..93c1523dab 100644
--- a/src/GPU3D_OpenGL.cpp
+++ b/src/GPU3D_OpenGL.cpp
@@ -16,118 +16,19 @@
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
+#include "GPU3D_OpenGL.h"
+
#include
#include
#include "NDS.h"
#include "GPU.h"
#include "Config.h"
-#include "OpenGLSupport.h"
#include "GPU3D_OpenGL_shaders.h"
namespace GPU3D
{
-namespace GLRenderer
-{
-
-using namespace OpenGL;
-
-// GL version requirements
-// * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS)
-// * UBO: 3.1
-
-
-enum
-{
- RenderFlag_WBuffer = 0x01,
- RenderFlag_Trans = 0x02,
- RenderFlag_ShadowMask = 0x04,
- RenderFlag_Edge = 0x08,
-};
-
-
-GLuint ClearShaderPlain[3];
-
-GLuint RenderShader[16][3];
-GLuint CurShaderID = -1;
-
-GLuint FinalPassEdgeShader[3];
-GLuint FinalPassFogShader[3];
-
-// std140 compliant structure
-struct
-{
- float uScreenSize[2]; // vec2 0 / 2
- u32 uDispCnt; // int 2 / 1
- u32 __pad0;
- float uToonColors[32][4]; // vec4[32] 4 / 128
- float uEdgeColors[8][4]; // vec4[8] 132 / 32
- float uFogColor[4]; // vec4 164 / 4
- float uFogDensity[34][4]; // float[34] 168 / 136
- u32 uFogOffset; // int 304 / 1
- u32 uFogShift; // int 305 / 1
- u32 _pad1[2]; // int 306 / 2
-} ShaderConfig;
-
-GLuint ShaderConfigUBO;
-
-struct RendererPolygon
-{
- Polygon* PolyData;
-
- u32 NumIndices;
- u32 IndicesOffset;
- GLuint PrimType;
-
- u32 NumEdgeIndices;
- u32 EdgeIndicesOffset;
- u32 RenderKey;
-};
-
-RendererPolygon PolygonList[2048];
-int NumFinalPolys, NumOpaqueFinalPolys;
-
-GLuint ClearVertexBufferID, ClearVertexArrayID;
-GLint ClearUniformLoc[4];
-
-// vertex buffer
-// * XYZW: 4x16bit
-// * RGBA: 4x8bit
-// * ST: 2x16bit
-// * polygon data: 3x32bit (polygon/texture attributes)
-//
-// polygon attributes:
-// * bit4-7, 11, 14-15, 24-29: POLYGON_ATTR
-// * bit16-20: Z shift
-// * bit8: front-facing (?)
-// * bit9: W-buffering (?)
-
-GLuint VertexBufferID;
-u32 VertexBuffer[10240 * 7];
-u32 NumVertices;
-
-GLuint VertexArrayID;
-GLuint IndexBufferID;
-u16 IndexBuffer[2048 * 40];
-u32 NumIndices, NumEdgeIndices;
-
-const u32 EdgeIndicesOffset = 2048 * 30;
-
-GLuint TexMemID;
-GLuint TexPalMemID;
-
-int ScaleFactor;
-bool BetterPolygons;
-int ScreenW, ScreenH;
-
-GLuint FramebufferTex[8];
-int FrontBuffer;
-GLuint FramebufferID[4], PixelbufferID;
-u32 Framebuffer[256*192];
-
-
-
-bool BuildRenderShader(u32 flags, const char* vs, const char* fs)
+bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs)
{
char shadername[32];
sprintf(shadername, "RenderShader%02X", flags);
@@ -180,7 +81,7 @@ bool BuildRenderShader(u32 flags, const char* vs, const char* fs)
return true;
}
-void UseRenderShader(u32 flags)
+void GLRenderer::UseRenderShader(u32 flags)
{
if (CurShaderID == flags) return;
glUseProgram(RenderShader[flags][2]);
@@ -196,7 +97,12 @@ void SetupDefaultTexParams(GLuint tex)
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
-bool Init()
+GLRenderer::GLRenderer()
+ : Renderer3D(true)
+{
+}
+
+bool GLRenderer::Init()
{
GLint uni_id;
@@ -382,7 +288,7 @@ bool Init()
return true;
}
-void DeInit()
+void GLRenderer::DeInit()
{
glDeleteTextures(1, &TexMemID);
glDeleteTextures(1, &TexPalMemID);
@@ -404,11 +310,11 @@ void DeInit()
}
}
-void Reset()
+void GLRenderer::Reset()
{
}
-void SetRenderSettings(GPU::RenderSettings& settings)
+void GLRenderer::SetRenderSettings(GPU::RenderSettings& settings)
{
int scale = settings.GL_ScaleFactor;
@@ -462,7 +368,7 @@ void SetRenderSettings(GPU::RenderSettings& settings)
}
-void SetupPolygon(RendererPolygon* rp, Polygon* polygon)
+void GLRenderer::SetupPolygon(GLRenderer::RendererPolygon* rp, Polygon* polygon)
{
rp->PolyData = polygon;
@@ -508,7 +414,7 @@ void SetupPolygon(RendererPolygon* rp, Polygon* polygon)
}
}
-u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr)
+u32* GLRenderer::SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr)
{
u32 z = poly->FinalZ[vid];
u32 w = poly->FinalW[vid];
@@ -569,7 +475,7 @@ u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr)
return vptr;
}
-void BuildPolygons(RendererPolygon* polygons, int npolys)
+void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys)
{
u32* vptr = &VertexBuffer[0];
u32 vidx = 0;
@@ -791,7 +697,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys)
NumEdgeIndices = eidx - EdgeIndicesOffset;
}
-int RenderSinglePolygon(int i)
+int GLRenderer::RenderSinglePolygon(int i)
{
RendererPolygon* rp = &PolygonList[i];
@@ -800,7 +706,7 @@ int RenderSinglePolygon(int i)
return 1;
}
-int RenderPolygonBatch(int i)
+int GLRenderer::RenderPolygonBatch(int i)
{
RendererPolygon* rp = &PolygonList[i];
GLuint primtype = rp->PrimType;
@@ -822,7 +728,7 @@ int RenderPolygonBatch(int i)
return numpolys;
}
-int RenderPolygonEdgeBatch(int i)
+int GLRenderer::RenderPolygonEdgeBatch(int i)
{
RendererPolygon* rp = &PolygonList[i];
u32 key = rp->RenderKey;
@@ -842,7 +748,7 @@ int RenderPolygonEdgeBatch(int i)
return numpolys;
}
-void RenderSceneChunk(int y, int h)
+void GLRenderer::RenderSceneChunk(int y, int h)
{
u32 flags = 0;
if (RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer;
@@ -1206,7 +1112,7 @@ void RenderSceneChunk(int y, int h)
}
-void RenderFrame()
+void GLRenderer::RenderFrame()
{
CurShaderID = -1;
@@ -1381,7 +1287,7 @@ void RenderFrame()
FrontBuffer = FrontBuffer ? 0 : 1;
}
-void PrepareCaptureFrame()
+void GLRenderer::PrepareCaptureFrame()
{
// TODO: make sure this picks the right buffer when doing antialiasing
int original_fb = FrontBuffer^1;
@@ -1396,7 +1302,7 @@ void PrepareCaptureFrame()
glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
}
-u32* GetLine(int line)
+u32* GLRenderer::GetLine(int line)
{
int stride = 256;
@@ -1419,10 +1325,9 @@ u32* GetLine(int line)
return &Framebuffer[stride * line];
}
-void SetupAccelFrame()
+void GLRenderer::SetupAccelFrame()
{
glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer]);
}
}
-}
diff --git a/src/GPU3D_OpenGL.h b/src/GPU3D_OpenGL.h
new file mode 100644
index 0000000000..73e295541e
--- /dev/null
+++ b/src/GPU3D_OpenGL.h
@@ -0,0 +1,152 @@
+/*
+ Copyright 2016-2020 Arisotura
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#pragma once
+
+#include "GPU3D.h"
+
+#include "OpenGLSupport.h"
+
+
+namespace GPU3D
+{
+class GLRenderer : public Renderer3D
+{
+public:
+ GLRenderer();
+ virtual ~GLRenderer() override {};
+ virtual bool Init() override;
+ virtual void DeInit() override;
+ virtual void Reset() override;
+
+ virtual void SetRenderSettings(GPU::RenderSettings& settings) override;
+
+ virtual void VCount144() override {};
+ virtual void RenderFrame() override;
+ virtual u32* GetLine(int line) override;
+
+ void SetupAccelFrame();
+ void PrepareCaptureFrame();
+private:
+
+ // GL version requirements
+ // * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS)
+ // * UBO: 3.1
+
+ struct RendererPolygon
+ {
+ Polygon* PolyData;
+
+ u32 NumIndices;
+ u32 IndicesOffset;
+ GLuint PrimType;
+
+ u32 NumEdgeIndices;
+ u32 EdgeIndicesOffset;
+
+ u32 RenderKey;
+ };
+
+ RendererPolygon PolygonList[2048];
+
+ bool BuildRenderShader(u32 flags, const char* vs, const char* fs);
+ void UseRenderShader(u32 flags);
+ void SetupPolygon(RendererPolygon* rp, Polygon* polygon);
+ u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr);
+ void BuildPolygons(RendererPolygon* polygons, int npolys);
+ int RenderSinglePolygon(int i);
+ int RenderPolygonBatch(int i);
+ int RenderPolygonEdgeBatch(int i);
+ void RenderSceneChunk(int y, int h);
+
+ enum
+ {
+ RenderFlag_WBuffer = 0x01,
+ RenderFlag_Trans = 0x02,
+ RenderFlag_ShadowMask = 0x04,
+ RenderFlag_Edge = 0x08,
+ };
+
+
+ GLuint ClearShaderPlain[3];
+
+ GLuint RenderShader[16][3];
+ GLuint CurShaderID = -1;
+
+ GLuint FinalPassEdgeShader[3];
+ GLuint FinalPassFogShader[3];
+
+ // std140 compliant structure
+ struct
+ {
+ float uScreenSize[2]; // vec2 0 / 2
+ u32 uDispCnt; // int 2 / 1
+ u32 __pad0;
+ float uToonColors[32][4]; // vec4[32] 4 / 128
+ float uEdgeColors[8][4]; // vec4[8] 132 / 32
+ float uFogColor[4]; // vec4 164 / 4
+ float uFogDensity[34][4]; // float[34] 168 / 136
+ u32 uFogOffset; // int 304 / 1
+ u32 uFogShift; // int 305 / 1
+ u32 _pad1[2]; // int 306 / 2
+ } ShaderConfig;
+
+ GLuint ShaderConfigUBO;
+ int NumFinalPolys, NumOpaqueFinalPolys;
+
+ GLuint ClearVertexBufferID, ClearVertexArrayID;
+ GLint ClearUniformLoc[4];
+
+ // vertex buffer
+ // * XYZW: 4x16bit
+ // * RGBA: 4x8bit
+ // * ST: 2x16bit
+ // * polygon data: 3x32bit (polygon/texture attributes)
+ //
+ // polygon attributes:
+ // * bit4-7, 11, 14-15, 24-29: POLYGON_ATTR
+ // * bit16-20: Z shift
+ // * bit8: front-facing (?)
+ // * bit9: W-buffering (?)
+
+ GLuint VertexBufferID;
+ u32 VertexBuffer[10240 * 7];
+ u32 NumVertices;
+
+ GLuint VertexArrayID;
+ GLuint IndexBufferID;
+ u16 IndexBuffer[2048 * 40];
+ u32 NumIndices, NumEdgeIndices;
+
+ const u32 EdgeIndicesOffset = 2048 * 30;
+
+ GLuint TexMemID;
+ GLuint TexPalMemID;
+
+ int ScaleFactor;
+ bool BetterPolygons;
+ int ScreenW, ScreenH;
+
+ GLuint FramebufferTex[8];
+ int FrontBuffer;
+ GLuint FramebufferID[4], PixelbufferID;
+ u32 Framebuffer[256*192];
+
+
+};
+}
\ No newline at end of file
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index 3d6ace6cc1..f6d27a0df5 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -16,82 +16,43 @@
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
+#include "GPU3D_Soft.h"
+
#include
#include
#include "NDS.h"
#include "GPU.h"
#include "Config.h"
-#include "Platform.h"
namespace GPU3D
{
-namespace SoftRenderer
-{
-
-// buffer dimensions are 258x194 to add a offscreen 1px border
-// which simplifies edge marking tests
-// buffer is duplicated to keep track of the two topmost pixels
-// TODO: check if the hardware can accidentally plot pixels
-// offscreen in that border
-
-const int ScanlineWidth = 258;
-const int NumScanlines = 194;
-const int BufferSize = ScanlineWidth * NumScanlines;
-const int FirstPixelOffset = ScanlineWidth + 1;
-
-u32 ColorBuffer[BufferSize * 2];
-u32 DepthBuffer[BufferSize * 2];
-u32 AttrBuffer[BufferSize * 2];
-
-// attribute buffer:
-// bit0-3: edge flags (left/right/top/bottom)
-// bit4: backfacing flag
-// bit8-12: antialiasing alpha
-// bit15: fog enable
-// bit16-21: polygon ID for translucent pixels
-// bit22: translucent flag
-// bit24-29: polygon ID for opaque pixels
-
-u8 StencilBuffer[256*2];
-bool PrevIsShadowMask;
-
-bool Enabled;
-
-bool FrameIdentical;
-
-// threading
-
-bool Threaded;
-Platform::Thread* RenderThread;
-bool RenderThreadRunning;
-bool RenderThreadRendering;
-Platform::Semaphore* Sema_RenderStart;
-Platform::Semaphore* Sema_RenderDone;
-Platform::Semaphore* Sema_ScanlineCount;
void RenderThreadFunc();
-void StopRenderThread()
+void SoftRenderer::StopRenderThread()
{
if (RenderThreadRunning)
{
RenderThreadRunning = false;
Platform::Semaphore_Post(Sema_RenderStart);
- Platform::Thread_Wait(RenderThread);
- Platform::Thread_Free(RenderThread);
+ // Platform::Thread_Wait(RenderThread);
+ // Platform::Thread_Free(RenderThread);
+ RenderThread.join();
+
}
}
-void SetupRenderThread()
+void SoftRenderer::SetupRenderThread()
{
if (Threaded)
{
if (!RenderThreadRunning)
{
RenderThreadRunning = true;
- RenderThread = Platform::Thread_Create(RenderThreadFunc);
+ //RenderThread = Platform::Thread_Create(RenderThreadFunc);
+ RenderThread = std::thread(&SoftRenderer::RenderThreadFunc, this);
}
// otherwise more than one frame can be queued up at once
@@ -113,7 +74,13 @@ void SetupRenderThread()
}
-bool Init()
+SoftRenderer::SoftRenderer()
+ : Renderer3D(false)
+{
+
+}
+
+bool SoftRenderer::Init()
{
Sema_RenderStart = Platform::Semaphore_Create();
Sema_RenderDone = Platform::Semaphore_Create();
@@ -126,7 +93,7 @@ bool Init()
return true;
}
-void DeInit()
+void SoftRenderer::DeInit()
{
StopRenderThread();
@@ -135,7 +102,7 @@ void DeInit()
Platform::Semaphore_Free(Sema_ScanlineCount);
}
-void Reset()
+void SoftRenderer::Reset()
{
memset(ColorBuffer, 0, BufferSize * 2 * 4);
memset(DepthBuffer, 0, BufferSize * 2 * 4);
@@ -146,428 +113,13 @@ void Reset()
SetupRenderThread();
}
-void SetRenderSettings(GPU::RenderSettings& settings)
+void SoftRenderer::SetRenderSettings(GPU::RenderSettings& settings)
{
Threaded = settings.Soft_Threaded;
SetupRenderThread();
}
-
-
-// Notes on the interpolator:
-//
-// This is a theory on how the DS hardware interpolates values. It matches hardware output
-// in the tests I did, but the hardware may be doing it differently. You never know.
-//
-// Assuming you want to perspective-correctly interpolate a variable named A across two points
-// in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly,
-// then divide A/W by 1/W to recover the correct A value.
-//
-// The DS GPU approximates interpolation by calculating a perspective-correct interpolation
-// between 0 and 1, then using the result as a factor to linearly interpolate the actual
-// vertex attributes. The factor has 9 bits of precision when interpolating along Y and
-// 8 bits along X.
-//
-// There's a special path for when the two W values are equal: it directly does linear
-// interpolation, avoiding precision loss from the aforementioned approximation.
-// Which is desirable when using the GPU to draw 2D graphics.
-
-template
-class Interpolator
-{
-public:
- Interpolator() {}
- Interpolator(s32 x0, s32 x1, s32 w0, s32 w1)
- {
- Setup(x0, x1, w0, w1);
- }
-
- void Setup(s32 x0, s32 x1, s32 w0, s32 w1)
- {
- this->x0 = x0;
- this->x1 = x1;
- this->xdiff = x1 - x0;
-
- // calculate reciprocals for linear mode and Z interpolation
- // TODO eventually: use a faster reciprocal function?
- if (this->xdiff != 0)
- this->xrecip = (1<<30) / this->xdiff;
- else
- this->xrecip = 0;
- this->xrecip_z = this->xrecip >> 8;
-
- // linear mode is used if both W values are equal and have
- // low-order bits cleared (0-6 along X, 1-6 along Y)
- u32 mask = dir ? 0x7E : 0x7F;
- if ((w0 == w1) && !(w0 & mask) && !(w1 & mask))
- this->linear = true;
- else
- this->linear = false;
-
- if (dir)
- {
- // along Y
-
- if ((w0 & 0x1) && !(w1 & 0x1))
- {
- this->w0n = w0 - 1;
- this->w0d = w0 + 1;
- this->w1d = w1;
- }
- else
- {
- this->w0n = w0 & 0xFFFE;
- this->w0d = w0 & 0xFFFE;
- this->w1d = w1 & 0xFFFE;
- }
-
- this->shift = 9;
- }
- else
- {
- // along X
-
- this->w0n = w0;
- this->w0d = w0;
- this->w1d = w1;
-
- this->shift = 8;
- }
- }
-
- void SetX(s32 x)
- {
- x -= x0;
- this->x = x;
- if (xdiff != 0 && !linear)
- {
- s64 num = ((s64)x * w0n) << shift;
- s32 den = (x * w0d) + ((xdiff-x) * w1d);
-
- // this seems to be a proper division on hardware :/
- // I haven't been able to find cases that produce imperfect output
- if (den == 0) yfactor = 0;
- else yfactor = (s32)(num / den);
- }
- }
-
- s32 Interpolate(s32 y0, s32 y1)
- {
- if (xdiff == 0 || y0 == y1) return y0;
-
- if (!linear)
- {
- // perspective-correct approx. interpolation
- if (y0 < y1)
- return y0 + (((y1-y0) * yfactor) >> shift);
- else
- return y1 + (((y0-y1) * ((1<> shift);
- }
- else
- {
- // linear interpolation
- // checkme: the rounding bias there (3<<24) is a guess
- if (y0 < y1)
- return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30);
- else
- return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30);
- }
- }
-
- s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer)
- {
- if (xdiff == 0 || z0 == z1) return z0;
-
- if (wbuffer)
- {
- // W-buffering: perspective-correct approx. interpolation
- if (z0 < z1)
- return z0 + (((s64)(z1-z0) * yfactor) >> shift);
- else
- return z1 + (((s64)(z0-z1) * ((1<> shift);
- }
- else
- {
- // Z-buffering: linear interpolation
- // still doesn't quite match hardware...
- s32 base, disp, factor;
-
- if (z0 < z1)
- {
- base = z0;
- disp = z1 - z0;
- factor = x;
- }
- else
- {
- base = z1;
- disp = z0 - z1,
- factor = xdiff - x;
- }
-
- if (dir)
- {
- int shift = 0;
- while (disp > 0x3FF)
- {
- disp >>= 1;
- shift++;
- }
-
- return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift);
- }
- else
- {
- disp >>= 9;
- return base + (((s64)disp * factor * xrecip_z) >> 13);
- }
- }
- }
-
-private:
- s32 x0, x1, xdiff, x;
-
- int shift;
- bool linear;
-
- s32 xrecip, xrecip_z;
- s32 w0n, w0d, w1d;
-
- u32 yfactor;
-};
-
-
-template
-class Slope
-{
-public:
- Slope() {}
-
- s32 SetupDummy(s32 x0)
- {
- if (side)
- {
- dx = -0x40000;
- x0--;
- }
- else
- {
- dx = 0;
- }
-
- this->x0 = x0;
- this->xmin = x0;
- this->xmax = x0;
-
- Increment = 0;
- XMajor = false;
-
- Interp.Setup(0, 0, 0, 0);
- Interp.SetX(0);
-
- xcov_incr = 0;
-
- return x0;
- }
-
- s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
- {
- this->x0 = x0;
- this->y = y;
-
- if (x1 > x0)
- {
- this->xmin = x0;
- this->xmax = x1-1;
- this->Negative = false;
- }
- else if (x1 < x0)
- {
- this->xmin = x1;
- this->xmax = x0-1;
- this->Negative = true;
- }
- else
- {
- this->xmin = x0;
- if (side) this->xmin--;
- this->xmax = this->xmin;
- this->Negative = false;
- }
-
- xlen = xmax+1 - xmin;
- ylen = y1 - y0;
-
- // slope increment has a 18-bit fractional part
- // note: for some reason, x/y isn't calculated directly,
- // instead, 1/y is calculated and then multiplied by x
- // TODO: this is still not perfect (see for example x=169 y=33)
- if (ylen == 0)
- Increment = 0;
- else if (ylen == xlen)
- Increment = 0x40000;
- else
- {
- s32 yrecip = (1<<18) / ylen;
- Increment = (x1-x0) * yrecip;
- if (Increment < 0) Increment = -Increment;
- }
-
- XMajor = (Increment > 0x40000);
-
- if (side)
- {
- // right
-
- if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000);
- else if (Increment != 0) dx = Negative ? 0x40000 : 0;
- else dx = -0x40000;
- }
- else
- {
- // left
-
- if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000;
- else if (Increment != 0) dx = Negative ? 0x40000 : 0;
- else dx = 0;
- }
-
- dx += (y - y0) * Increment;
-
- s32 x = XVal();
-
- if (XMajor)
- {
- if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme
- else Interp.Setup(x0, x1, w0, w1);
- Interp.SetX(x);
-
- // used for calculating AA coverage
- xcov_incr = (ylen << 10) / xlen;
- }
- else
- {
- Interp.Setup(y0, y1, w0, w1);
- Interp.SetX(y);
- }
-
- return x;
- }
-
- s32 Step()
- {
- dx += Increment;
- y++;
-
- s32 x = XVal();
- if (XMajor)
- {
- Interp.SetX(x);
- }
- else
- {
- Interp.SetX(y);
- }
- return x;
- }
-
- s32 XVal()
- {
- s32 ret;
- if (Negative) ret = x0 - (dx >> 18);
- else ret = x0 + (dx >> 18);
-
- if (ret < xmin) ret = xmin;
- else if (ret > xmax) ret = xmax;
- return ret;
- }
-
- void EdgeParams_XMajor(s32* length, s32* coverage)
- {
- if (side ^ Negative)
- *length = (dx >> 18) - ((dx-Increment) >> 18);
- else
- *length = ((dx+Increment) >> 18) - (dx >> 18);
-
- // for X-major edges, we return the coverage
- // for the first pixel, and the increment for
- // further pixels on the same scanline
- s32 startx = dx >> 18;
- if (Negative) startx = xlen - startx;
- if (side) startx = startx - *length + 1;
-
- s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen;
- *coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF);
- }
-
- void EdgeParams_YMajor(s32* length, s32* coverage)
- {
- *length = 1;
-
- if (Increment == 0)
- {
- *coverage = 31;
- }
- else
- {
- s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4;
- if ((cov >> 5) != (dx >> 18)) cov = 31;
- cov &= 0x1F;
- if (!(side ^ Negative)) cov = 0x1F - cov;
-
- *coverage = cov;
- }
- }
-
- void EdgeParams(s32* length, s32* coverage)
- {
- if (XMajor)
- return EdgeParams_XMajor(length, coverage);
- else
- return EdgeParams_YMajor(length, coverage);
- }
-
- s32 Increment;
- bool Negative;
- bool XMajor;
- Interpolator<1> Interp;
-
-private:
- s32 x0, xmin, xmax;
- s32 xlen, ylen;
- s32 dx;
- s32 y;
-
- s32 xcov_incr;
- s32 ycoverage, ycov_incr;
-};
-
-struct RendererPolygon
-{
- Polygon* PolyData;
-
- Slope<0> SlopeL;
- Slope<1> SlopeR;
- s32 XL, XR;
- u32 CurVL, CurVR;
- u32 NextVL, NextVR;
-
-};
-
-RendererPolygon PolygonList[2048];
-
-template
-inline T ReadVRAM_Texture(u32 addr)
-{
- return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
-}
-template
-inline T ReadVRAM_TexPal(u32 addr)
-{
- return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
-}
-
-void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
+void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
{
u32 vramaddr = (texparam & 0xFFFF) << 3;
@@ -873,7 +425,7 @@ u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha)
return srcR | (srcG << 8) | (srcB << 16) | (dstalpha << 24);
}
-u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
+u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
{
u8 r, g, b, a;
@@ -981,7 +533,7 @@ u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
return r | (g << 8) | (b << 16) | (a << 24);
}
-void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow)
+void SoftRenderer::PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow)
{
u32 dstattr = AttrBuffer[pixeladdr];
u32 attr = (polyattr & 0xE0F0) | ((polyattr >> 8) & 0xFF0000) | (1<<22) | (dstattr & 0xFF001F0F);
@@ -1020,7 +572,7 @@ void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 sha
AttrBuffer[pixeladdr] = attr;
}
-void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y)
+void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y)
{
Polygon* polygon = rp->PolyData;
@@ -1047,7 +599,7 @@ void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y)
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
}
-void SetupPolygonRightEdge(RendererPolygon* rp, s32 y)
+void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y)
{
Polygon* polygon = rp->PolyData;
@@ -1074,7 +626,7 @@ void SetupPolygonRightEdge(RendererPolygon* rp, s32 y)
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
}
-void SetupPolygon(RendererPolygon* rp, Polygon* polygon)
+void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon)
{
u32 nverts = polygon->NumVertices;
@@ -1127,7 +679,7 @@ void SetupPolygon(RendererPolygon* rp, Polygon* polygon)
}
}
-void RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
+void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
{
Polygon* polygon = rp->PolyData;
@@ -1340,7 +892,7 @@ void RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
rp->XR = rp->SlopeR.Step();
}
-void RenderPolygonScanline(RendererPolygon* rp, s32 y)
+void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
{
Polygon* polygon = rp->PolyData;
@@ -1755,7 +1307,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y)
rp->XR = rp->SlopeR.Step();
}
-void RenderScanline(s32 y, int npolys)
+void SoftRenderer::RenderScanline(s32 y, int npolys)
{
for (int i = 0; i < npolys; i++)
{
@@ -1772,8 +1324,7 @@ void RenderScanline(s32 y, int npolys)
}
}
-
-u32 CalculateFogDensity(u32 pixeladdr)
+u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr)
{
u32 z = DepthBuffer[pixeladdr];
u32 densityid, densityfrac;
@@ -1812,7 +1363,7 @@ u32 CalculateFogDensity(u32 pixeladdr)
return density;
}
-void ScanlineFinalPass(s32 y)
+void SoftRenderer::ScanlineFinalPass(s32 y)
{
// to consider:
// clearing all polygon fog flags if the master flag isn't set?
@@ -1981,7 +1532,7 @@ void ScanlineFinalPass(s32 y)
}
}
-void ClearBuffers()
+void SoftRenderer::ClearBuffers()
{
u32 clearz = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
u32 polyid = RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID
@@ -2055,7 +1606,7 @@ void ClearBuffers()
u32 a = (RenderClearAttr1 >> 16) & 0x1F;
u32 color = r | (g << 8) | (b << 16) | (a << 24);
- polyid |= (RenderClearAttr1 & 0x8000);
+ polyid |= (RenderClearAttr1 & 0x8000);
for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth)
{
@@ -2070,7 +1621,7 @@ void ClearBuffers()
}
}
-void RenderPolygons(bool threaded, Polygon** polygons, int npolys)
+void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
{
int j = 0;
for (int i = 0; i < npolys; i++)
@@ -2096,13 +1647,13 @@ void RenderPolygons(bool threaded, Polygon** polygons, int npolys)
Platform::Semaphore_Post(Sema_ScanlineCount);
}
-void VCount144()
+void SoftRenderer::VCount144()
{
if (RenderThreadRunning)
Platform::Semaphore_Wait(Sema_RenderDone);
}
-void RenderFrame()
+void SoftRenderer::RenderFrame()
{
auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture);
auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal);
@@ -2123,7 +1674,12 @@ void RenderFrame()
}
}
-void RenderThreadFunc()
+void SoftRenderer::RestartFrame()
+{
+ SetupRenderThread();
+}
+
+void SoftRenderer::RenderThreadFunc()
{
for (;;)
{
@@ -2146,7 +1702,7 @@ void RenderThreadFunc()
}
}
-u32* GetLine(int line)
+u32* SoftRenderer::GetLine(int line)
{
if (RenderThreadRunning)
{
@@ -2158,4 +1714,3 @@ u32* GetLine(int line)
}
}
-}
diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h
new file mode 100644
index 0000000000..851b7c19b5
--- /dev/null
+++ b/src/GPU3D_Soft.h
@@ -0,0 +1,516 @@
+/*
+ Copyright 2016-2020 Arisotura
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#pragma once
+
+#include "GPU3D.h"
+#include "Platform.h"
+#include
+
+namespace GPU3D
+{
+class SoftRenderer : public Renderer3D
+{
+public:
+ SoftRenderer();
+ virtual ~SoftRenderer() override {};
+ virtual bool Init() override;
+ virtual void DeInit() override;
+ virtual void Reset() override;
+
+ virtual void SetRenderSettings(GPU::RenderSettings& settings) override;
+
+ virtual void VCount144() override;
+ virtual void RenderFrame() override;
+ virtual void RestartFrame() override;
+ virtual u32* GetLine(int line) override;
+
+ void SetupRenderThread();
+ void StopRenderThread();
+private:
+ // Notes on the interpolator:
+ //
+ // This is a theory on how the DS hardware interpolates values. It matches hardware output
+ // in the tests I did, but the hardware may be doing it differently. You never know.
+ //
+ // Assuming you want to perspective-correctly interpolate a variable named A across two points
+ // in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly,
+ // then divide A/W by 1/W to recover the correct A value.
+ //
+ // The DS GPU approximates interpolation by calculating a perspective-correct interpolation
+ // between 0 and 1, then using the result as a factor to linearly interpolate the actual
+ // vertex attributes. The factor has 9 bits of precision when interpolating along Y and
+ // 8 bits along X.
+ //
+ // There's a special path for when the two W values are equal: it directly does linear
+ // interpolation, avoiding precision loss from the aforementioned approximation.
+ // Which is desirable when using the GPU to draw 2D graphics.
+
+ template
+ class Interpolator
+ {
+ public:
+ Interpolator() {}
+ Interpolator(s32 x0, s32 x1, s32 w0, s32 w1)
+ {
+ Setup(x0, x1, w0, w1);
+ }
+
+ void Setup(s32 x0, s32 x1, s32 w0, s32 w1)
+ {
+ this->x0 = x0;
+ this->x1 = x1;
+ this->xdiff = x1 - x0;
+
+ // calculate reciprocals for linear mode and Z interpolation
+ // TODO eventually: use a faster reciprocal function?
+ if (this->xdiff != 0)
+ this->xrecip = (1<<30) / this->xdiff;
+ else
+ this->xrecip = 0;
+ this->xrecip_z = this->xrecip >> 8;
+
+ // linear mode is used if both W values are equal and have
+ // low-order bits cleared (0-6 along X, 1-6 along Y)
+ u32 mask = dir ? 0x7E : 0x7F;
+ if ((w0 == w1) && !(w0 & mask) && !(w1 & mask))
+ this->linear = true;
+ else
+ this->linear = false;
+
+ if (dir)
+ {
+ // along Y
+
+ if ((w0 & 0x1) && !(w1 & 0x1))
+ {
+ this->w0n = w0 - 1;
+ this->w0d = w0 + 1;
+ this->w1d = w1;
+ }
+ else
+ {
+ this->w0n = w0 & 0xFFFE;
+ this->w0d = w0 & 0xFFFE;
+ this->w1d = w1 & 0xFFFE;
+ }
+
+ this->shift = 9;
+ }
+ else
+ {
+ // along X
+
+ this->w0n = w0;
+ this->w0d = w0;
+ this->w1d = w1;
+
+ this->shift = 8;
+ }
+ }
+
+ void SetX(s32 x)
+ {
+ x -= x0;
+ this->x = x;
+ if (xdiff != 0 && !linear)
+ {
+ s64 num = ((s64)x * w0n) << shift;
+ s32 den = (x * w0d) + ((xdiff-x) * w1d);
+
+ // this seems to be a proper division on hardware :/
+ // I haven't been able to find cases that produce imperfect output
+ if (den == 0) yfactor = 0;
+ else yfactor = (s32)(num / den);
+ }
+ }
+
+ s32 Interpolate(s32 y0, s32 y1)
+ {
+ if (xdiff == 0 || y0 == y1) return y0;
+
+ if (!linear)
+ {
+ // perspective-correct approx. interpolation
+ if (y0 < y1)
+ return y0 + (((y1-y0) * yfactor) >> shift);
+ else
+ return y1 + (((y0-y1) * ((1<> shift);
+ }
+ else
+ {
+ // linear interpolation
+ // checkme: the rounding bias there (3<<24) is a guess
+ if (y0 < y1)
+ return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30);
+ else
+ return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30);
+ }
+ }
+
+ s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer)
+ {
+ if (xdiff == 0 || z0 == z1) return z0;
+
+ if (wbuffer)
+ {
+ // W-buffering: perspective-correct approx. interpolation
+ if (z0 < z1)
+ return z0 + (((s64)(z1-z0) * yfactor) >> shift);
+ else
+ return z1 + (((s64)(z0-z1) * ((1<> shift);
+ }
+ else
+ {
+ // Z-buffering: linear interpolation
+ // still doesn't quite match hardware...
+ s32 base, disp, factor;
+
+ if (z0 < z1)
+ {
+ base = z0;
+ disp = z1 - z0;
+ factor = x;
+ }
+ else
+ {
+ base = z1;
+ disp = z0 - z1,
+ factor = xdiff - x;
+ }
+
+ if (dir)
+ {
+ int shift = 0;
+ while (disp > 0x3FF)
+ {
+ disp >>= 1;
+ shift++;
+ }
+
+ return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift);
+ }
+ else
+ {
+ disp >>= 9;
+ return base + (((s64)disp * factor * xrecip_z) >> 13);
+ }
+ }
+ }
+
+ private:
+ s32 x0, x1, xdiff, x;
+
+ int shift;
+ bool linear;
+
+ s32 xrecip, xrecip_z;
+ s32 w0n, w0d, w1d;
+
+ u32 yfactor;
+ };
+
+
+ template
+ class Slope
+ {
+ public:
+ Slope() {}
+
+ s32 SetupDummy(s32 x0)
+ {
+ if (side)
+ {
+ dx = -0x40000;
+ x0--;
+ }
+ else
+ {
+ dx = 0;
+ }
+
+ this->x0 = x0;
+ this->xmin = x0;
+ this->xmax = x0;
+
+ Increment = 0;
+ XMajor = false;
+
+ Interp.Setup(0, 0, 0, 0);
+ Interp.SetX(0);
+
+ xcov_incr = 0;
+
+ return x0;
+ }
+
+ s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
+ {
+ this->x0 = x0;
+ this->y = y;
+
+ if (x1 > x0)
+ {
+ this->xmin = x0;
+ this->xmax = x1-1;
+ this->Negative = false;
+ }
+ else if (x1 < x0)
+ {
+ this->xmin = x1;
+ this->xmax = x0-1;
+ this->Negative = true;
+ }
+ else
+ {
+ this->xmin = x0;
+ if (side) this->xmin--;
+ this->xmax = this->xmin;
+ this->Negative = false;
+ }
+
+ xlen = xmax+1 - xmin;
+ ylen = y1 - y0;
+
+ // slope increment has a 18-bit fractional part
+ // note: for some reason, x/y isn't calculated directly,
+ // instead, 1/y is calculated and then multiplied by x
+ // TODO: this is still not perfect (see for example x=169 y=33)
+ if (ylen == 0)
+ Increment = 0;
+ else if (ylen == xlen)
+ Increment = 0x40000;
+ else
+ {
+ s32 yrecip = (1<<18) / ylen;
+ Increment = (x1-x0) * yrecip;
+ if (Increment < 0) Increment = -Increment;
+ }
+
+ XMajor = (Increment > 0x40000);
+
+ if (side)
+ {
+ // right
+
+ if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000);
+ else if (Increment != 0) dx = Negative ? 0x40000 : 0;
+ else dx = -0x40000;
+ }
+ else
+ {
+ // left
+
+ if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000;
+ else if (Increment != 0) dx = Negative ? 0x40000 : 0;
+ else dx = 0;
+ }
+
+ dx += (y - y0) * Increment;
+
+ s32 x = XVal();
+
+ if (XMajor)
+ {
+ if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme
+ else Interp.Setup(x0, x1, w0, w1);
+ Interp.SetX(x);
+
+ // used for calculating AA coverage
+ xcov_incr = (ylen << 10) / xlen;
+ }
+ else
+ {
+ Interp.Setup(y0, y1, w0, w1);
+ Interp.SetX(y);
+ }
+
+ return x;
+ }
+
+ s32 Step()
+ {
+ dx += Increment;
+ y++;
+
+ s32 x = XVal();
+ if (XMajor)
+ {
+ Interp.SetX(x);
+ }
+ else
+ {
+ Interp.SetX(y);
+ }
+ return x;
+ }
+
+ s32 XVal()
+ {
+ s32 ret;
+ if (Negative) ret = x0 - (dx >> 18);
+ else ret = x0 + (dx >> 18);
+
+ if (ret < xmin) ret = xmin;
+ else if (ret > xmax) ret = xmax;
+ return ret;
+ }
+
+ void EdgeParams_XMajor(s32* length, s32* coverage)
+ {
+ if (side ^ Negative)
+ *length = (dx >> 18) - ((dx-Increment) >> 18);
+ else
+ *length = ((dx+Increment) >> 18) - (dx >> 18);
+
+ // for X-major edges, we return the coverage
+ // for the first pixel, and the increment for
+ // further pixels on the same scanline
+ s32 startx = dx >> 18;
+ if (Negative) startx = xlen - startx;
+ if (side) startx = startx - *length + 1;
+
+ s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen;
+ *coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF);
+ }
+
+ void EdgeParams_YMajor(s32* length, s32* coverage)
+ {
+ *length = 1;
+
+ if (Increment == 0)
+ {
+ *coverage = 31;
+ }
+ else
+ {
+ s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4;
+ if ((cov >> 5) != (dx >> 18)) cov = 31;
+ cov &= 0x1F;
+ if (!(side ^ Negative)) cov = 0x1F - cov;
+
+ *coverage = cov;
+ }
+ }
+
+ void EdgeParams(s32* length, s32* coverage)
+ {
+ if (XMajor)
+ return EdgeParams_XMajor(length, coverage);
+ else
+ return EdgeParams_YMajor(length, coverage);
+ }
+
+ s32 Increment;
+ bool Negative;
+ bool XMajor;
+ Interpolator<1> Interp;
+
+ private:
+ s32 x0, xmin, xmax;
+ s32 xlen, ylen;
+ s32 dx;
+ s32 y;
+
+ s32 xcov_incr;
+ s32 ycoverage, ycov_incr;
+ };
+
+ template
+ inline T ReadVRAM_Texture(u32 addr)
+ {
+ return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF];
+ }
+ template
+ inline T ReadVRAM_TexPal(u32 addr)
+ {
+ return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF];
+ }
+
+ struct RendererPolygon
+ {
+ Polygon* PolyData;
+
+ Slope<0> SlopeL;
+ Slope<1> SlopeR;
+ s32 XL, XR;
+ u32 CurVL, CurVR;
+ u32 NextVL, NextVR;
+
+ };
+
+ RendererPolygon PolygonList[2048];
+ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha);
+ u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t);
+ void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
+ void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y);
+ void SetupPolygonRightEdge(RendererPolygon* rp, s32 y);
+ void SetupPolygon(RendererPolygon* rp, Polygon* polygon);
+ void RenderShadowMaskScanline(RendererPolygon* rp, s32 y);
+ void RenderPolygonScanline(RendererPolygon* rp, s32 y);
+ void RenderScanline(s32 y, int npolys);
+ u32 CalculateFogDensity(u32 pixeladdr);
+ void ScanlineFinalPass(s32 y);
+ void ClearBuffers();
+ void RenderPolygons(bool threaded, Polygon** polygons, int npolys);
+
+ void RenderThreadFunc();
+
+ // buffer dimensions are 258x194 to add a offscreen 1px border
+ // which simplifies edge marking tests
+ // buffer is duplicated to keep track of the two topmost pixels
+ // TODO: check if the hardware can accidentally plot pixels
+ // offscreen in that border
+
+ static constexpr int ScanlineWidth = 258;
+ static constexpr int NumScanlines = 194;
+ static constexpr int BufferSize = ScanlineWidth * NumScanlines;
+ static constexpr int FirstPixelOffset = ScanlineWidth + 1;
+
+ u32 ColorBuffer[BufferSize * 2];
+ u32 DepthBuffer[BufferSize * 2];
+ u32 AttrBuffer[BufferSize * 2];
+
+ // attribute buffer:
+ // bit0-3: edge flags (left/right/top/bottom)
+ // bit4: backfacing flag
+ // bit8-12: antialiasing alpha
+ // bit15: fog enable
+ // bit16-21: polygon ID for translucent pixels
+ // bit22: translucent flag
+ // bit24-29: polygon ID for opaque pixels
+
+ u8 StencilBuffer[256*2];
+ bool PrevIsShadowMask;
+
+ bool Enabled;
+
+ bool FrameIdentical;
+
+ // threading
+
+ bool Threaded;
+ // Platform::Thread* RenderThread;
+ std::thread RenderThread;
+ bool RenderThreadRunning;
+ bool RenderThreadRendering;
+ Platform::Semaphore* Sema_RenderStart;
+ Platform::Semaphore* Sema_RenderDone;
+ Platform::Semaphore* Sema_ScanlineCount;
+};
+}
\ No newline at end of file
diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp
index 8f2d5a138e..c02d955e72 100644
--- a/src/GPU_OpenGL.cpp
+++ b/src/GPU_OpenGL.cpp
@@ -16,8 +16,11 @@
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
-#include
-#include
+#include "GPU_OpenGL.h"
+
+#include
+#include
+
#include "NDS.h"
#include "GPU.h"
#include "Config.h"
@@ -26,34 +29,10 @@
namespace GPU
{
-namespace GLCompositor
-{
using namespace OpenGL;
-int Scale;
-int ScreenH, ScreenW;
-
-GLuint CompShader[1][3];
-GLuint CompScaleLoc[1];
-GLuint Comp3DXPosLoc[1];
-
-GLuint CompVertexBufferID;
-GLuint CompVertexArrayID;
-
-struct CompVertex
-{
- float Position[2];
- float Texcoord[2];
-};
-CompVertex CompVertices[2 * 3*2];
-
-GLuint CompScreenInputTex;
-GLuint CompScreenOutputTex[2];
-GLuint CompScreenOutputFB[2];
-
-
-bool Init()
+bool GLCompositor::Init()
{
if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Nearest, CompShader[0], "CompositorShader"))
//if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Linear, CompShader[0], "CompositorShader"))
@@ -144,7 +123,7 @@ bool Init()
return true;
}
-void DeInit()
+void GLCompositor::DeInit()
{
glDeleteFramebuffers(2, CompScreenOutputFB);
glDeleteTextures(1, &CompScreenInputTex);
@@ -157,12 +136,12 @@ void DeInit()
OpenGL::DeleteShaderProgram(CompShader[i]);
}
-void Reset()
+void GLCompositor::Reset()
{
}
-void SetRenderSettings(RenderSettings& settings)
+void GLCompositor::SetRenderSettings(RenderSettings& settings)
{
int scale = settings.GL_ScaleFactor;
@@ -188,7 +167,7 @@ void SetRenderSettings(RenderSettings& settings)
glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
-void Stop()
+void GLCompositor::Stop()
{
for (int i = 0; i < 2; i++)
{
@@ -202,7 +181,7 @@ void Stop()
glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
-void RenderFrame()
+void GLCompositor::RenderFrame()
{
int frontbuf = GPU::FrontBuffer;
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
@@ -236,17 +215,16 @@ void RenderFrame()
}
glActiveTexture(GL_TEXTURE1);
- GPU3D::GLRenderer::SetupAccelFrame();
+ reinterpret_cast(GPU3D::CurrentRenderer.get())->SetupAccelFrame();
glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID);
glBindVertexArray(CompVertexArrayID);
glDrawArrays(GL_TRIANGLES, 0, 4*3);
}
-void BindOutputTexture(int buf)
+void GLCompositor::BindOutputTexture(int buf)
{
glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[buf]);
}
}
-}
diff --git a/src/GPU_OpenGL.h b/src/GPU_OpenGL.h
new file mode 100644
index 0000000000..1fcb08f712
--- /dev/null
+++ b/src/GPU_OpenGL.h
@@ -0,0 +1,68 @@
+/*
+ Copyright 2016-2020 Arisotura
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#pragma once
+
+#include "OpenGLSupport.h"
+
+namespace GPU
+{
+
+struct RenderSettings;
+
+class GLCompositor
+{
+public:
+ GLCompositor() = default;
+ GLCompositor(const GLCompositor&) = delete;
+ GLCompositor& operator=(const GLCompositor&) = delete;
+
+ bool Init();
+ void DeInit();
+ void Reset();
+
+ void SetRenderSettings(RenderSettings& settings);
+
+ void Stop();
+ void RenderFrame();
+ void BindOutputTexture(int buf);
+private:
+
+ int Scale;
+ int ScreenH, ScreenW;
+
+ GLuint CompShader[1][3];
+ GLuint CompScaleLoc[1];
+ GLuint Comp3DXPosLoc[1];
+
+ GLuint CompVertexBufferID;
+ GLuint CompVertexArrayID;
+
+ struct CompVertex
+ {
+ float Position[2];
+ float Texcoord[2];
+ };
+ CompVertex CompVertices[2 * 3*2];
+
+ GLuint CompScreenInputTex;
+ GLuint CompScreenOutputTex[2];
+ GLuint CompScreenOutputFB[2];
+};
+
+}
\ No newline at end of file
diff --git a/src/NDS.cpp b/src/NDS.cpp
index 7c0ecea1c6..6c41cb55b0 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -1564,7 +1564,7 @@ void RunTimer(u32 tid, s32 cycles)
void RunTimers(u32 cpu)
{
- register u32 timermask = TimerCheckMask[cpu];
+ u32 timermask = TimerCheckMask[cpu];
s32 cycles;
if (cpu == 0)
diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp
index 5aa4959ecb..460457c46e 100644
--- a/src/frontend/qt_sdl/main.cpp
+++ b/src/frontend/qt_sdl/main.cpp
@@ -1013,7 +1013,7 @@ void ScreenPanelGL::paintGL()
if (GPU::Renderer != 0)
{
// hardware-accelerated render
- GPU::GLCompositor::BindOutputTexture(frontbuf);
+ GPU::CurGLCompositor->BindOutputTexture(frontbuf);
}
else
#endif
@@ -2536,9 +2536,15 @@ int main(int argc, char** argv)
Config::Load();
-#define SANITIZE(var, min, max) { if (var < min) var = min; else if (var > max) var = max; }
+#define SANITIZE(var, min, max) { var = std::clamp(var, min, max); }
SANITIZE(Config::ConsoleType, 0, 1);
- SANITIZE(Config::_3DRenderer, 0, 1);
+ SANITIZE(Config::_3DRenderer,
+ 0,
+ 0 // Minimum, Software renderer
+ #ifdef OGLRENDERER_ENABLED
+ + 1 // OpenGL Renderer
+ #endif
+ );
SANITIZE(Config::ScreenVSyncInterval, 1, 20);
SANITIZE(Config::GL_ScaleFactor, 1, 16);
SANITIZE(Config::AudioVolume, 0, 256);
From d63f7977f83fb4bc48c633c3b1ecbfa23423370f Mon Sep 17 00:00:00 2001
From: gal20 <71563441+gal20@users.noreply.github.com>
Date: Wed, 10 Feb 2021 00:42:31 +0200
Subject: [PATCH 15/18] Remove code duplication in `onChangeScreenSize` (#968)
---
src/frontend/qt_sdl/main.cpp | 43 ++++--------------------------------
src/frontend/qt_sdl/main.h | 3 +--
2 files changed, 5 insertions(+), 41 deletions(-)
diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp
index 460457c46e..3e26489792 100644
--- a/src/frontend/qt_sdl/main.cpp
+++ b/src/frontend/qt_sdl/main.cpp
@@ -681,13 +681,13 @@ void ScreenHandler::screenSetupLayout(int w, int h)
numScreens = Frontend::GetScreenTransforms(screenMatrix[0], screenKind);
}
-QSize ScreenHandler::screenGetMinSize()
+QSize ScreenHandler::screenGetMinSize(int factor = 1)
{
bool isHori = (Config::ScreenRotation == 1 || Config::ScreenRotation == 3);
int gap = Config::ScreenGap;
- int w = 256;
- int h = 192;
+ int w = 256 * factor;
+ int h = 192 * factor;
if (Config::ScreenLayout == 0) // natural
{
@@ -2272,43 +2272,8 @@ void MainWindow::onChangeSavestateSRAMReloc(bool checked)
void MainWindow::onChangeScreenSize()
{
int factor = ((QAction*)sender())->data().toInt();
-
- bool isHori = (Config::ScreenRotation == 1 || Config::ScreenRotation == 3);
- int gap = Config::ScreenGap;
-
- int w = 256*factor;
- int h = 192*factor;
-
QSize diff = size() - panel->size();
-
- if (Config::ScreenLayout == 0) // natural
- {
- if (isHori)
- resize(QSize(h+gap+h, w) + diff);
- else
- resize(QSize(w, h+gap+h) + diff);
- }
- else if (Config::ScreenLayout == 1) // vertical
- {
- if (isHori)
- resize(QSize(h, w+gap+w) + diff);
- else
- resize(QSize(w, h+gap+h) + diff);
- }
- else if (Config::ScreenLayout == 2) // horizontal
- {
- if (isHori)
- resize(QSize(h+gap+h, w) + diff);
- else
- resize(QSize(w+gap+w, h) + diff);
- }
- else // hybrid
- {
- if (isHori)
- return resize(QSize(h+gap+h, 3*w +(4*gap) / 3) + diff);
- else
- return resize(QSize(3*w +(4*gap) / 3, h+gap+h) + diff);
- }
+ resize(dynamic_cast(panel)->screenGetMinSize(factor) + diff);
}
void MainWindow::onChangeScreenRotation(QAction* act)
diff --git a/src/frontend/qt_sdl/main.h b/src/frontend/qt_sdl/main.h
index 9bfcd0a71a..0009551a7b 100644
--- a/src/frontend/qt_sdl/main.h
+++ b/src/frontend/qt_sdl/main.h
@@ -100,12 +100,11 @@ class ScreenHandler
QTimer* setupMouseTimer();
void updateMouseTimer();
QTimer* mouseTimer;
+ QSize screenGetMinSize(int factor);
protected:
void screenSetupLayout(int w, int h);
- QSize screenGetMinSize();
-
void screenOnMousePress(QMouseEvent* event);
void screenOnMouseRelease(QMouseEvent* event);
void screenOnMouseMove(QMouseEvent* event);
From f05bc50d40cded130e188d165e6b310f2a72d58f Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Thu, 11 Feb 2021 16:00:36 +0100
Subject: [PATCH 16/18] use std::function in Thread_Create so we can revert
back to using it
---
src/GPU3D_Soft.cpp | 9 +++------
src/GPU3D_Soft.h | 3 +--
src/Platform.h | 4 +++-
src/frontend/qt_sdl/Platform.cpp | 2 +-
4 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index f6d27a0df5..de66b6b023 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -37,10 +37,8 @@ void SoftRenderer::StopRenderThread()
{
RenderThreadRunning = false;
Platform::Semaphore_Post(Sema_RenderStart);
- // Platform::Thread_Wait(RenderThread);
- // Platform::Thread_Free(RenderThread);
- RenderThread.join();
-
+ Platform::Thread_Wait(RenderThread);
+ Platform::Thread_Free(RenderThread);
}
}
@@ -51,8 +49,7 @@ void SoftRenderer::SetupRenderThread()
if (!RenderThreadRunning)
{
RenderThreadRunning = true;
- //RenderThread = Platform::Thread_Create(RenderThreadFunc);
- RenderThread = std::thread(&SoftRenderer::RenderThreadFunc, this);
+ RenderThread = Platform::Thread_Create(std::bind(&RenderThreadFunc, this));
}
// otherwise more than one frame can be queued up at once
diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h
index 851b7c19b5..ee1977d34a 100644
--- a/src/GPU3D_Soft.h
+++ b/src/GPU3D_Soft.h
@@ -505,8 +505,7 @@ class SoftRenderer : public Renderer3D
// threading
bool Threaded;
- // Platform::Thread* RenderThread;
- std::thread RenderThread;
+ Platform::Thread* RenderThread;
bool RenderThreadRunning;
bool RenderThreadRendering;
Platform::Semaphore* Sema_RenderStart;
diff --git a/src/Platform.h b/src/Platform.h
index 42e1e24695..9542233a9d 100644
--- a/src/Platform.h
+++ b/src/Platform.h
@@ -21,6 +21,8 @@
#include "types.h"
+#include
+
namespace Platform
{
@@ -68,7 +70,7 @@ inline bool LocalFileExists(const char* name)
}
struct Thread;
-Thread* Thread_Create(void (*func)());
+Thread* Thread_Create(std::function func);
void Thread_Free(Thread* thread);
void Thread_Wait(Thread* thread);
diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp
index 7c4b553709..64013058dd 100644
--- a/src/frontend/qt_sdl/Platform.cpp
+++ b/src/frontend/qt_sdl/Platform.cpp
@@ -188,7 +188,7 @@ FILE* OpenLocalFile(const char* path, const char* mode)
return OpenFile(fullpath.toUtf8(), mode, mode[0] != 'w');
}
-Thread* Thread_Create(void (* func)())
+Thread* Thread_Create(std::function func)
{
QThread* t = QThread::create(func);
t->start();
From f1e0816c1af934f38baa8c3095d03512b462db67 Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Thu, 11 Feb 2021 18:38:52 +0100
Subject: [PATCH 17/18] detach and delete shaders directly after linking
---
src/OpenGLSupport.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/OpenGLSupport.cpp b/src/OpenGLSupport.cpp
index 01fa9d8635..a133c41305 100644
--- a/src/OpenGLSupport.cpp
+++ b/src/OpenGLSupport.cpp
@@ -87,6 +87,12 @@ bool LinkShaderProgram(GLuint* ids)
glLinkProgram(ids[2]);
+ glDetachShader(ids[2], ids[0]);
+ glDetachShader(ids[2], ids[1]);
+
+ glDeleteShader(ids[0]);
+ glDeleteShader(ids[1]);
+
glGetProgramiv(ids[2], GL_LINK_STATUS, &res);
if (res != GL_TRUE)
{
@@ -97,8 +103,6 @@ bool LinkShaderProgram(GLuint* ids)
printf("OpenGL: failed to link shader program: %s\n", log);
delete[] log;
- glDeleteShader(ids[0]);
- glDeleteShader(ids[1]);
glDeleteProgram(ids[2]);
return false;
@@ -109,8 +113,6 @@ bool LinkShaderProgram(GLuint* ids)
void DeleteShaderProgram(GLuint* ids)
{
- glDeleteShader(ids[0]);
- glDeleteShader(ids[1]);
glDeleteProgram(ids[2]);
}
From 295d60e4cb217f73c28e514f1e05127d6892e0ac Mon Sep 17 00:00:00 2001
From: RSDuck
Date: Thu, 11 Feb 2021 19:11:18 +0100
Subject: [PATCH 18/18] try to fix build when the compiler is stricter
---
src/GPU3D_Soft.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index de66b6b023..22f7f01c27 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -49,7 +49,7 @@ void SoftRenderer::SetupRenderThread()
if (!RenderThreadRunning)
{
RenderThreadRunning = true;
- RenderThread = Platform::Thread_Create(std::bind(&RenderThreadFunc, this));
+ RenderThread = Platform::Thread_Create(std::bind(&SoftRenderer::RenderThreadFunc, this));
}
// otherwise more than one frame can be queued up at once