From b75b3f69b783ea7d8919f58d2c86dfa7c9dc6a69 Mon Sep 17 00:00:00 2001 From: Nadia Holmquist Pedersen Date: Wed, 27 Jan 2021 00:14:24 +0100 Subject: [PATCH 01/18] Don't save the window size to the config if in full screen (#933) --- src/frontend/qt_sdl/main.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 527a507085..a3b0249548 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -1474,8 +1474,11 @@ void MainWindow::resizeEvent(QResizeEvent* event) int w = event->size().width(); int h = event->size().height(); - Config::WindowWidth = w; - Config::WindowHeight = h; + if (mainWindow != nullptr && !mainWindow->isFullScreen()) + { + Config::WindowWidth = w; + Config::WindowHeight = h; + } // TODO: detect when the window gets maximized! } From a3f4aaf50314f6ab528afb8dbcef88cfb012e765 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Fri, 29 Jan 2021 12:38:18 +0100 Subject: [PATCH 02/18] call glFlush only once that seems to atleast get rid of the flicker the weird issue that clears don't work is still there --- src/GPU3D_OpenGL.cpp | 4 ---- src/GPU_OpenGL.cpp | 2 -- src/frontend/qt_sdl/main.cpp | 8 ++++++++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 24c2751c97..164f29a5fc 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -1145,8 +1145,6 @@ void RenderSceneChunk(int y, int h) } } - glFlush(); - if (RenderDispCnt & 0x00A0) // fog/edge enabled { glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); @@ -1204,8 +1202,6 @@ void RenderSceneChunk(int y, int h) glDrawArrays(GL_TRIANGLES, 0, 2*3); } - - glFlush(); } } diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index dd28bcdce2..59ced93a6d 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -226,8 +226,6 @@ void RenderFrame() glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID); glBindVertexArray(CompVertexArrayID); glDrawArrays(GL_TRIANGLES, 0, 4*3); - - glFlush(); } void BindOutputTexture() diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index a3b0249548..240d52e66d 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -490,6 +490,14 @@ void EmuThread::run() // emulate u32 nlines = NDS::RunFrame(); +#ifdef OGLRENDERER_ENABLED + // this is hacky but this is the easiest way to call + // this function without dealling with a ton of + // macro mess + if (videoRenderer == 1) + epoxy_glFlush(); +#endif + #ifdef MELONCAP MelonCap::Update(); #endif // MELONCAP From f9e701a719413469be4295909bd020b48873fb25 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Fri, 29 Jan 2021 16:05:51 +0000 Subject: [PATCH 03/18] Initialise cursor hiding timer before potential deletion of ScreenPanelGL --- src/frontend/qt_sdl/main.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 240d52e66d..6766252d6a 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -1431,6 +1431,11 @@ void MainWindow::createScreenPanel() { panelGL = new ScreenPanelGL(this); panelGL->show(); + + panel = panelGL; + panelGL->setMouseTracking(true); + mouseTimer = panelGL->setupMouseTimer(); + connect(mouseTimer, &QTimer::timeout, [=] { if (Config::MouseHide) panelGL->setCursor(Qt::BlankCursor);}); if (!panelGL->isValid()) hasOGL = false; @@ -1443,14 +1448,6 @@ void MainWindow::createScreenPanel() if (!hasOGL) delete panelGL; - - if (hasOGL) - { - panel = panelGL; - panelGL->setMouseTracking(true); - mouseTimer = panelGL->setupMouseTimer(); - connect(mouseTimer, &QTimer::timeout, [=] { if (Config::MouseHide) panelGL->setCursor(Qt::BlankCursor);}); - } } if (!hasOGL) From 0d301c243469baf53344a7610a1c835597c6b3e8 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Mon, 1 Feb 2021 17:49:37 +0000 Subject: [PATCH 04/18] Remove flatpak from main repo melonDS is on flathub and the flatpak package is maintained on a seperate repository. --- flatpak/net.kuribo64.melonDS.yml | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 flatpak/net.kuribo64.melonDS.yml diff --git a/flatpak/net.kuribo64.melonDS.yml b/flatpak/net.kuribo64.melonDS.yml deleted file mode 100644 index e336990908..0000000000 --- a/flatpak/net.kuribo64.melonDS.yml +++ /dev/null @@ -1,29 +0,0 @@ ---- -app-id: net.kuribo64.melonDS -runtime: org.freedesktop.Platform -runtime-version: '18.08' -sdk: org.freedesktop.Sdk -command: melonDS -finish-args: - - "--share=ipc" - - "--socket=x11" - - "--socket=pulseaudio" - - "--share=network" - - "--device=all" - - "--filesystem=home" -modules: - - name: libpcap - sources: - - type: archive - url: http://www.tcpdump.org/release/libpcap-1.9.0.tar.gz - sha256: 2edb88808e5913fdaa8e9c1fcaf272e19b2485338742b5074b9fe44d68f37019 - - - name: melonds - buildsystem: cmake-ninja - sources: - - type: git - url: https://github.com/Arisotura/melonDS.git - branch: master - post-install: - - "desktop-file-install --dir=/app/share/applications net.kuribo64.melonDS.desktop" - - "install -D icon/melon_256x256.png /app/share/icons/hicolor/256x256/apps/net.kuribo64.melonDS.png" From b5e601bb88858b124b0cc41a4ee7eb6896dee8d3 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Tue, 2 Feb 2021 13:29:51 +0000 Subject: [PATCH 05/18] Try to fix Ubuntu AArch64 CI (#979) Also remove previous fixes, they were fixed upstream --- .github/workflows/build-ubuntu-aarch64.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-ubuntu-aarch64.yml b/.github/workflows/build-ubuntu-aarch64.yml index 9186263c3f..227785feaa 100644 --- a/.github/workflows/build-ubuntu-aarch64.yml +++ b/.github/workflows/build-ubuntu-aarch64.yml @@ -20,11 +20,9 @@ jobs: - name: Upgrade system shell: bash working-directory: ${{runner.workspace}} - run: | #Fix grub installation error - https://github.com/actions/virtual-environments/issues/1605 + run: | sudo apt update - sudo apt-get install grub-efi - sudo update-grub - sudo apt full-upgrade + sudo ACCEPT_EULA=Y apt full-upgrade - name: Install dependencies shell: bash working-directory: ${{runner.workspace}} From 40aae154cf77e0611057a05702f28d9cf17b08f4 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 2 Feb 2021 15:31:32 +0100 Subject: [PATCH 06/18] prevent race condition around framebuffers --- src/GPU.h | 2 +- src/GPU_OpenGL.cpp | 67 +++++++++++++---------- src/frontend/qt_sdl/main.cpp | 101 +++++++++++++++++++++-------------- src/frontend/qt_sdl/main.h | 4 ++ 4 files changed, 106 insertions(+), 68 deletions(-) diff --git a/src/GPU.h b/src/GPU.h index 3a254dfb18..2fc15f49de 100644 --- a/src/GPU.h +++ b/src/GPU.h @@ -563,7 +563,7 @@ void SetRenderSettings(RenderSettings& settings); void Stop(); void RenderFrame(); -void BindOutputTexture(); +void BindOutputTexture(int buf); } #endif diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index 59ced93a6d..e7ab1f704d 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -49,8 +49,8 @@ struct CompVertex CompVertex CompVertices[2 * 3*2]; GLuint CompScreenInputTex; -GLuint CompScreenOutputTex; -GLuint CompScreenOutputFB; +GLuint CompScreenOutputTex[2]; +GLuint CompScreenOutputFB[2]; bool Init() @@ -118,7 +118,7 @@ bool Init() glEnableVertexAttribArray(1); // texcoord glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(CompVertex), (void*)(offsetof(CompVertex, Texcoord))); - glGenFramebuffers(1, &CompScreenOutputFB); + glGenFramebuffers(2, CompScreenOutputFB); glGenTextures(1, &CompScreenInputTex); glActiveTexture(GL_TEXTURE0); @@ -129,12 +129,15 @@ bool Init() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI, 256*3 + 1, 192*2, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, NULL); - glGenTextures(1, &CompScreenOutputTex); - glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glGenTextures(2, CompScreenOutputTex); + for (int i = 0; i < 2; i++) + { + glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[i]); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } glBindFramebuffer(GL_FRAMEBUFFER, 0); @@ -143,9 +146,9 @@ bool Init() void DeInit() { - glDeleteFramebuffers(1, &CompScreenOutputFB); + glDeleteFramebuffers(2, CompScreenOutputFB); glDeleteTextures(1, &CompScreenInputTex); - glDeleteTextures(1, &CompScreenOutputTex); + glDeleteTextures(2, CompScreenOutputTex); glDeleteVertexArrays(1, &CompVertexArrayID); glDeleteBuffers(1, &CompVertexBufferID); @@ -167,30 +170,41 @@ void SetRenderSettings(RenderSettings& settings) ScreenW = 256 * scale; ScreenH = (384+2) * scale; - glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - // fill the padding - u8 zeroPixels[ScreenW*2*scale*4]; - memset(zeroPixels, 0, sizeof(zeroPixels)); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192*scale, ScreenW, 2*scale, GL_RGBA, GL_UNSIGNED_BYTE, zeroPixels); - - GLenum fbassign[] = {GL_COLOR_ATTACHMENT0}; - glBindFramebuffer(GL_FRAMEBUFFER, CompScreenOutputFB); - glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CompScreenOutputTex, 0); - glDrawBuffers(1, fbassign); + for (int i = 0; i < 2; i++) + { + glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[i]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + // fill the padding + u8 zeroPixels[ScreenW*2*scale*4]; + memset(zeroPixels, 0, sizeof(zeroPixels)); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192*scale, ScreenW, 2*scale, GL_RGBA, GL_UNSIGNED_BYTE, zeroPixels); + + GLenum fbassign[] = {GL_COLOR_ATTACHMENT0}; + glBindFramebuffer(GL_FRAMEBUFFER, CompScreenOutputFB[i]); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CompScreenOutputTex[i], 0); + glDrawBuffers(1, fbassign); + } glBindFramebuffer(GL_FRAMEBUFFER, 0); } void Stop() { - RenderFrame(); + for (int i = 0; i < 2; i++) + { + int frontbuf = GPU::FrontBuffer; + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[frontbuf]); + + glClear(GL_COLOR_BUFFER_BIT); + } } void RenderFrame() { + int frontbuf = GPU::FrontBuffer; glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[frontbuf]); glDisable(GL_DEPTH_TEST); glDisable(GL_STENCIL_TEST); @@ -208,7 +222,6 @@ void RenderFrame() // TODO: support setting this midframe, if ever needed glUniform1i(Comp3DXPosLoc[0], ((int)GPU3D::RenderXPos << 23) >> 23); - int frontbuf = GPU::FrontBuffer; glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, CompScreenInputTex); @@ -228,9 +241,9 @@ void RenderFrame() glDrawArrays(GL_TRIANGLES, 0, 4*3); } -void BindOutputTexture() +void BindOutputTexture(int buf) { - glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex); + glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[buf]); } } diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 6766252d6a..5aa4959ecb 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -490,13 +490,18 @@ void EmuThread::run() // emulate u32 nlines = NDS::RunFrame(); + FrontBufferLock.lock(); #ifdef OGLRENDERER_ENABLED - // this is hacky but this is the easiest way to call - // this function without dealling with a ton of - // macro mess if (videoRenderer == 1) - epoxy_glFlush(); + { + // this is hacky but this is the easiest way to call + // this function without dealling with a ton of + // macro mess + epoxy_glFinish(); + } #endif + FrontBuffer = GPU::FrontBuffer; + FrontBufferLock.unlock(); #ifdef MELONCAP MelonCap::Update(); @@ -824,11 +829,17 @@ void ScreenPanelNative::paintEvent(QPaintEvent* event) // fill background painter.fillRect(event->rect(), QColor::fromRgb(0, 0, 0)); - int frontbuf = GPU::FrontBuffer; - if (!GPU::Framebuffer[frontbuf][0] || !GPU::Framebuffer[frontbuf][1]) return; + emuThread->FrontBufferLock.lock(); + int frontbuf = emuThread->FrontBuffer; + if (!GPU::Framebuffer[frontbuf][0] || !GPU::Framebuffer[frontbuf][1]) + { + emuThread->FrontBufferLock.unlock(); + return; + } memcpy(screen[0].scanLine(0), GPU::Framebuffer[frontbuf][0], 256*192*4); memcpy(screen[1].scanLine(0), GPU::Framebuffer[frontbuf][1], 256*192*4); + emuThread->FrontBufferLock.unlock(); painter.setRenderHint(QPainter::SmoothPixmapTransform, Config::ScreenFilter!=0); @@ -988,53 +999,63 @@ void ScreenPanelGL::paintGL() glViewport(0, 0, w*factor, h*factor); - screenShader->bind(); + if (emuThread) + { + screenShader->bind(); - screenShader->setUniformValue("uScreenSize", (float)w*factor, (float)h*factor); + screenShader->setUniformValue("uScreenSize", (float)w*factor, (float)h*factor); - int frontbuf = GPU::FrontBuffer; - glActiveTexture(GL_TEXTURE0); + emuThread->FrontBufferLock.lock(); + int frontbuf = emuThread->FrontBuffer; + glActiveTexture(GL_TEXTURE0); -#ifdef OGLRENDERER_ENABLED - if (GPU::Renderer != 0) - { - // hardware-accelerated render - GPU::GLCompositor::BindOutputTexture(); - } - else -#endif - { - // regular render - glBindTexture(GL_TEXTURE_2D, screenTexture); - - if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1]) + #ifdef OGLRENDERER_ENABLED + if (GPU::Renderer != 0) { - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA, - GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192+2, 256, 192, GL_RGBA, - GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]); + // hardware-accelerated render + GPU::GLCompositor::BindOutputTexture(frontbuf); } - } + else + #endif + { + // regular render + glBindTexture(GL_TEXTURE_2D, screenTexture); - GLint filter = Config::ScreenFilter ? GL_LINEAR : GL_NEAREST; - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); + if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1]) + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192+2, 256, 192, GL_RGBA, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]); + } + } - glBindBuffer(GL_ARRAY_BUFFER, screenVertexBuffer); - glBindVertexArray(screenVertexArray); + GLint filter = Config::ScreenFilter ? GL_LINEAR : GL_NEAREST; + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); - GLint transloc = screenShader->uniformLocation("uTransform"); + glBindBuffer(GL_ARRAY_BUFFER, screenVertexBuffer); + glBindVertexArray(screenVertexArray); - for (int i = 0; i < numScreens; i++) - { - glUniformMatrix2x3fv(transloc, 1, GL_TRUE, screenMatrix[i]); - glDrawArrays(GL_TRIANGLES, screenKind[i] == 0 ? 0 : 2*3, 2*3); - } + GLint transloc = screenShader->uniformLocation("uTransform"); - screenShader->release(); + for (int i = 0; i < numScreens; i++) + { + glUniformMatrix2x3fv(transloc, 1, GL_TRUE, screenMatrix[i]); + glDrawArrays(GL_TRIANGLES, screenKind[i] == 0 ? 0 : 2*3, 2*3); + } + + screenShader->release(); + } OSD::Update(this); OSD::DrawGL(this, w*factor, h*factor); + + if (emuThread) + { + glFinish(); + emuThread->FrontBufferLock.unlock(); + } } void ScreenPanelGL::resizeEvent(QResizeEvent* event) diff --git a/src/frontend/qt_sdl/main.h b/src/frontend/qt_sdl/main.h index c226fbc07e..9bfcd0a71a 100644 --- a/src/frontend/qt_sdl/main.h +++ b/src/frontend/qt_sdl/main.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,9 @@ class EmuThread : public QThread bool emuIsRunning(); + int FrontBuffer = 0; + QMutex FrontBufferLock; + signals: void windowUpdate(); void windowTitleChange(QString title); From 2e999ae1b8e88e41fce3b2289372b44344cc79b0 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 2 Feb 2021 16:29:23 +0100 Subject: [PATCH 07/18] attempt at fixing #972 --- src/GPU_OpenGL.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index e7ab1f704d..8f2d5a138e 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -198,6 +198,8 @@ void Stop() glClear(GL_COLOR_BUFFER_BIT); } + + glBindFramebuffer(GL_FRAMEBUFFER, 0); } void RenderFrame() From 7b9b8418cb91c98525a63fdd1c53089da0d0c758 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 2 Feb 2021 20:37:28 +0100 Subject: [PATCH 08/18] fix #978 --- src/GPU2D_Soft.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp index 53e7b73313..7cab67ad47 100644 --- a/src/GPU2D_Soft.cpp +++ b/src/GPU2D_Soft.cpp @@ -2126,7 +2126,7 @@ void GPU2D_Soft::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 for (; xoff < xend;) { - color = objvram[pixelsaddr]; + color = objvram[pixelsaddr & objvrammask]; pixelsaddr += pixelstride; From 2502c8d212a9adb1910fb0ccd4ad72acb0231b39 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Wed, 3 Feb 2021 16:14:53 +0000 Subject: [PATCH 09/18] Add NetBSD support (#985) Note - This will require PaX MPROTECT to be disabled for melonDS by running: paxctl +m melonDS --- src/ARMJIT_Memory.cpp | 4 ++++ src/frontend/qt_sdl/CMakeLists.txt | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 5de185bacf..2ff38f97b5 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -159,6 +159,8 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) desc.FaultPC = (u8*)context->uc_mcontext->__ss.__rip; #elif defined(__FreeBSD__) desc.FaultPC = (u8*)context->uc_mcontext.mc_rip; + #elif defined(__NetBSD__) + desc.FaultPC = (u8*)context->uc_mcontext.__gregs[_REG_RIP]; #else desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP]; #endif @@ -180,6 +182,8 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) context->uc_mcontext->__ss.__rip = (u64)desc.FaultPC; #elif defined(__FreeBSD__) context->uc_mcontext.mc_rip = (u64)desc.FaultPC; + #elif defined(__NetBSD__) + context->uc_mcontext.__gregs[_REG_RIP] = (u64)desc.FaultPC; #else context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC; #endif diff --git a/src/frontend/qt_sdl/CMakeLists.txt b/src/frontend/qt_sdl/CMakeLists.txt index 8f48390e68..f61e0caaca 100644 --- a/src/frontend/qt_sdl/CMakeLists.txt +++ b/src/frontend/qt_sdl/CMakeLists.txt @@ -92,7 +92,10 @@ endif() if (UNIX) option(PORTABLE "Make a portable build that looks for its configuration in the current directory" OFF) - target_link_libraries(melonDS dl Qt5::Core Qt5::Gui Qt5::Widgets) + target_link_libraries(melonDS Qt5::Core Qt5::Gui Qt5::Widgets) + if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_libraries(melonDS dl) + endif() elseif (WIN32) option(PORTABLE "Make a portable build that looks for its configuration in the current directory" ON) configure_file("${CMAKE_SOURCE_DIR}/melon.rc.in" "${CMAKE_SOURCE_DIR}/melon.rc") From 1112162e994638f2c0085accc6bba77d52cc27f9 Mon Sep 17 00:00:00 2001 From: WaluigiWare64 <68647953+WaluigiWare64@users.noreply.github.com> Date: Thu, 4 Feb 2021 10:10:49 +0000 Subject: [PATCH 10/18] Add build status badges --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 4c7b2c4301..8cefa34cd0 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,11 @@ +
+ + + +

DS emulator, sorta From 6256a42e0099cef9f567148083c55bbb798d8184 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 9 Feb 2021 19:24:57 +0100 Subject: [PATCH 11/18] improve and fix NonStupidBitfield also get rid of some UB fixes optimised lto clang build --- src/GPU.cpp | 18 +++--- src/GPU2D.cpp | 2 + src/NonStupidBitfield.h | 130 +++++++++++++++++++++++++--------------- 3 files changed, 93 insertions(+), 57 deletions(-) diff --git a/src/GPU.cpp b/src/GPU.cpp index e31b2392bd..ab3a5f9360 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -1187,7 +1187,7 @@ NonStupidBitField VRAMTrackingSet(i*VRAMBitsPerMapping, VRAMBitsPerMapping); + result.SetRange(i*VRAMBitsPerMapping, VRAMBitsPerMapping); banksToBeZeroed |= currentMappings[i]; Mapping[i] = currentMappings[i]; } @@ -1209,19 +1209,19 @@ NonStupidBitField VRAMTrackingSet> 14)]; - ((u32*)result.Data)[i] |= dirty; + result.Data[i / 2] |= (u64)dirty << ((i&1)*32); } else if (MappingGranularity == 8*1024) { u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)]; - ((u16*)result.Data)[i] |= dirty; + result.Data[i / 4] |= (u64)dirty << ((i&3)*16); } else if (MappingGranularity == 128*1024) { - ((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0]; - ((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1]; - ((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2]; - ((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3]; + result.Data[i * 4 + 0] |= VRAMDirty[num].Data[0]; + result.Data[i * 4 + 1] |= VRAMDirty[num].Data[1]; + result.Data[i * 4 + 2] |= VRAMDirty[num].Data[2]; + result.Data[i * 4 + 3] |= VRAMDirty[num].Data[3]; } else { @@ -1236,7 +1236,7 @@ NonStupidBitField VRAMTrackingSet& writtenFlags) mapping &= ~(1 << num); } } - memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data)); + writtenFlags.Clear(); } void SyncDirtyFlags() diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index fa05e79548..cbe09d6c39 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -117,6 +117,7 @@ void GPU2D::Reset() BGMosaicYMax = 0; OBJMosaicY = 0; OBJMosaicYMax = 0; + OBJMosaicYCount = 0; BlendCnt = 0; EVA = 16; @@ -130,6 +131,7 @@ void GPU2D::Reset() memset(DispFIFOBuffer, 0, 256*2); CaptureCnt = 0; + CaptureLatch = false; MasterBrightness = 0; diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h index 22e13a96f4..8e87c3c007 100644 --- a/src/NonStupidBitfield.h +++ b/src/NonStupidBitfield.h @@ -14,9 +14,8 @@ template struct NonStupidBitField { - static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8"); - static const u32 DataLength = Size / 8; - u8 Data[DataLength]; + static constexpr u32 DataLength = (Size + 0x3F) >> 6; + u64 Data[DataLength]; struct Ref { @@ -25,13 +24,13 @@ struct NonStupidBitField operator bool() { - return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7)); + return BitField.Data[Idx >> 6] & (1ULL << (Idx & 0x3F)); } Ref& operator=(bool set) { - BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7)); - BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7)); + BitField.Data[Idx >> 6] &= ~(1ULL << (Idx & 0x3F)); + BitField.Data[Idx >> 6] |= ((u64)set << (Idx & 0x3F)); return *this; } }; @@ -43,27 +42,40 @@ struct NonStupidBitField u32 BitIdx; u64 RemainingBits; - u32 operator*() { return DataIdx * 8 + BitIdx; } + u32 operator*() { return DataIdx * 64 + BitIdx; } - bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; } - bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; } + bool operator==(const Iterator& other) + { + return other.DataIdx == DataIdx; + } + bool operator!=(const Iterator& other) + { + return other.DataIdx != DataIdx; + } - template void Next() { - if (DataIdx >= DataLength) - return; - - while (RemainingBits == 0) + if (RemainingBits == 0) { - DataIdx += sizeof(T); - if (DataIdx >= DataLength) - return; - RemainingBits = *(T*)&BitField.Data[DataIdx]; + for (u32 i = DataIdx + 1; i < DataLength; i++) + { + if (BitField.Data[i]) + { + DataIdx = i; + RemainingBits = BitField.Data[i]; + goto done; + } + } + DataIdx = DataLength; + return; + done:; } BitIdx = __builtin_ctzll(RemainingBits); RemainingBits &= ~(1ULL << BitIdx); + + if ((Size & 0x3F) && BitIdx >= Size) + DataIdx = DataLength; } Iterator operator++(int) @@ -75,40 +87,35 @@ struct NonStupidBitField Iterator& operator++() { - if ((DataLength % 8) == 0) - Next(); - else if ((DataLength % 4) == 0) - Next(); - else if ((DataLength % 2) == 0) - Next(); - else - Next(); - + Next(); return *this; } }; - NonStupidBitField(u32 start, u32 size) + NonStupidBitField(u32 startBit, u32 bitsCount) { - memset(Data, 0, sizeof(Data)); + Clear(); - if (size == 0) + if (bitsCount == 0) return; - u32 roundedStartBit = (start + 7) & ~7; - u32 roundedEndBit = (start + size) & ~7; - if (roundedStartBit != roundedEndBit) - memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8); - - if (start & 0x7) - Data[start >> 3] = 0xFF << (start & 0x7); - if ((start + size) & 0x7) - Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7); + SetRange(startBit, bitsCount); + /*for (int i = 0; i < Size; i++) + { + bool state = (*this)[i]; + if (state != (i >= startBit && i < startBit + bitsCount)) + { + for (u32 j = 0; j < DataLength; j++) + printf("data %016lx\n", Data[j]); + printf("blarg %d %d %d %d\n", i, startBit, bitsCount, Size); + abort(); + } + }*/ } NonStupidBitField() { - memset(Data, 0, sizeof(Data)); + Clear(); } Iterator End() @@ -117,14 +124,20 @@ struct NonStupidBitField } Iterator Begin() { - if ((DataLength % 8) == 0) - return ++Iterator{*this, 0, 0, *(u64*)Data}; - else if ((DataLength % 4) == 0) - return ++Iterator{*this, 0, 0, *(u32*)Data}; - else if ((DataLength % 2) == 0) - return ++Iterator{*this, 0, 0, *(u16*)Data}; - else - return ++Iterator{*this, 0, 0, *Data}; + for (u32 i = 0; i < DataLength; i++) + { + u32 idx = __builtin_ctzll(Data[i]); + if (Data[i] && idx + i * 64 < Size) + { + return {*this, i, idx, Data[i] & ~(1ULL << idx)}; + } + } + return End(); + } + + void Clear() + { + memset(Data, 0, sizeof(Data)); } Ref operator[](u32 idx) @@ -132,6 +145,27 @@ struct NonStupidBitField return Ref{*this, idx}; } + void SetRange(u32 startBit, u32 bitsCount) + { + u32 startEntry = startBit >> 6; + u64 entriesCount = ((startBit + bitsCount + 0x3F & ~0x3F) >> 6) - startEntry; + + if (entriesCount > 1) + { + Data[startEntry] |= 0xFFFFFFFFFFFFFFFF << (startBit & 0x3F); + if ((startBit + bitsCount) & 0x3F) + Data[startEntry + entriesCount - 1] |= ~(0xFFFFFFFFFFFFFFFF << ((startBit + bitsCount) & 0x3F)); + else + Data[startEntry + entriesCount - 1] = 0xFFFFFFFFFFFFFFFF; + for (int i = startEntry + 1; i < startEntry + entriesCount - 1; i++) + Data[i] = 0xFFFFFFFFFFFFFFFF; + } + else + { + Data[startEntry] |= ((1ULL << bitsCount) - 1) << (startBit & 0x3F); + } + } + NonStupidBitField& operator|=(const NonStupidBitField& other) { for (u32 i = 0; i < DataLength; i++) From e7ee3b7bc84e583fbfbe7c9896db53d3e808edcc Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 9 Feb 2021 22:19:44 +0100 Subject: [PATCH 12/18] wild shot into the dark --- src/ARMJIT_Memory.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 2ff38f97b5..5a011f3d52 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -711,16 +711,11 @@ void Init() MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, MemoryTotalSize, NULL); - MemoryBase = (u8*)VirtualAlloc(NULL, MemoryTotalSize, MEM_RESERVE, PAGE_READWRITE); - - FastMem9Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); - FastMem7Start = VirtualAlloc(NULL, AddrSpaceSize, MEM_RESERVE, PAGE_READWRITE); - - // only free them after they have all been reserved - // so they can't overlap + MemoryBase = (u8*)VirtualAlloc(NULL, AddrSpaceSize*4, MEM_RESERVE, PAGE_READWRITE); VirtualFree(MemoryBase, 0, MEM_RELEASE); - VirtualFree(FastMem9Start, 0, MEM_RELEASE); - VirtualFree(FastMem7Start, 0, MEM_RELEASE); + FastMem9Start = MemoryBase; + FastMem7Start = MemoryBase + AddrSpaceSize; + MemoryBase = MemoryBase + AddrSpaceSize*2; MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase); From 891427c75c6c617bf61b2e7f2a3f0d79872f7f3c Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 9 Feb 2021 23:36:46 +0100 Subject: [PATCH 13/18] fix #994 --- src/ARMJIT_Memory.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 5a011f3d52..063437e55f 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -713,9 +713,15 @@ void Init() MemoryBase = (u8*)VirtualAlloc(NULL, AddrSpaceSize*4, MEM_RESERVE, PAGE_READWRITE); VirtualFree(MemoryBase, 0, MEM_RELEASE); - FastMem9Start = MemoryBase; - FastMem7Start = MemoryBase + AddrSpaceSize; - MemoryBase = MemoryBase + AddrSpaceSize*2; + // this is incredible hacky + // but someone else is trying to go into our address space! + // Windows will very likely give them virtual memory starting at the same address + // as it is giving us now. + // That's why we don't use this address, but instead 4gb inwards + // I know this is terrible + FastMem9Start = MemoryBase + AddrSpaceSize; + FastMem7Start = MemoryBase + AddrSpaceSize*2; + MemoryBase = MemoryBase + AddrSpaceSize*3; MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase); From a7029aebae2d09c2dd666a5832a90e227305bab1 Mon Sep 17 00:00:00 2001 From: Wunk Date: Tue, 9 Feb 2021 14:38:51 -0800 Subject: [PATCH 14/18] Allow for a more modular renderer backends (#990) * Draft GPU3D renderer modularization * Update sources C++ standard to C++17 The top-level `CMakeLists.txt` is already using the C++17 standard. * Move GLCompositor into class type Some other misc fixes to push towards better modularity * Make renderer-implementation types move-only These types are going to be holding onto handles of GPU-side resources and shouldn't ever be copied around. * Fix OSX: Remove 'register' storage class specifier `register` has been removed in C++17... But this keyword hasn't done anything in years anyways. OSX builds consider this "warning" an error and it stops the whole build. * Add RestartFrame to Renderer3D interface * Move Accelerated property to Renderer3D interface There are points in the code base where we do: `renderer != 0` to know if we are feeding an openGL renderer. Rather than that we can instead just have this be a property of the renderer itself. With this pattern a renderer can just say how it wants its data to come in rather than have everyone know that they're talking to an OpenGL renderer. * Remove Accelerated flag from GPU * Move 2D_Soft interface in separate header Also make the current 2D engine an "owned" unique_ptr. * Update alignment attribute to standard alignas Uses standardized `alignas` rather than compiler-specific attributes. https://en.cppreference.com/w/cpp/language/alignas * Fix Clang: alignas specifier Alignment must be specified before the array to align the entire array. https://en.cppreference.com/w/cpp/language/alignas * Converted Renderer3D Accelerated to variable This flag is checked a lot during scanline rasterization. So rather than having an expensive vtable-lookup call during mainline rendering code, it is now a public constant bool type that is written to only once during Renderer3D initialization. --- src/CMakeLists.txt | 2 +- src/GPU.cpp | 101 ++++--- src/GPU.h | 32 +-- src/GPU2D.h | 69 +---- src/GPU2D_Soft.cpp | 39 ++- src/GPU2D_Soft.h | 79 ++++++ src/GPU3D.cpp | 22 +- src/GPU3D.h | 55 ++-- src/GPU3D_OpenGL.cpp | 143 ++-------- src/GPU3D_OpenGL.h | 152 ++++++++++ src/GPU3D_Soft.cpp | 531 +++-------------------------------- src/GPU3D_Soft.h | 516 ++++++++++++++++++++++++++++++++++ src/GPU_OpenGL.cpp | 48 +--- src/GPU_OpenGL.h | 68 +++++ src/NDS.cpp | 2 +- src/frontend/qt_sdl/main.cpp | 12 +- 16 files changed, 1037 insertions(+), 834 deletions(-) create mode 100644 src/GPU2D_Soft.h create mode 100644 src/GPU3D_OpenGL.h create mode 100644 src/GPU3D_Soft.h create mode 100644 src/GPU_OpenGL.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3bcecbcd55..dc32b2aa0d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,6 @@ project(core) -set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_STANDARD 17) add_library(core STATIC ARCodeFile.cpp diff --git a/src/GPU.cpp b/src/GPU.cpp index ab3a5f9360..d5465bb776 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -21,6 +21,7 @@ #include "NDS.h" #include "GPU.h" +#include "GPU2D_Soft.h" namespace GPU { @@ -79,11 +80,10 @@ u8* VRAMPtr_BOBJ[0x8]; int FrontBuffer; u32* Framebuffer[2][2]; -int Renderer; -bool Accelerated; +int Renderer = 0; -GPU2D* GPU2D_A; -GPU2D* GPU2D_B; +std::unique_ptr GPU2D_A = {}; +std::unique_ptr GPU2D_B = {}; /* VRAM invalidation tracking @@ -145,25 +145,28 @@ u8 VRAMFlat_TexPal[128*1024]; u32 OAMDirty; u32 PaletteDirty; +#ifdef OGLRENDERER_ENABLED +std::unique_ptr CurGLCompositor = {}; +#endif + bool Init() { - GPU2D_A = new GPU2D_Soft(0); - GPU2D_B = new GPU2D_Soft(1); + GPU2D_A = std::make_unique(0); + GPU2D_B = std::make_unique(1); if (!GPU3D::Init()) return false; FrontBuffer = 0; Framebuffer[0][0] = NULL; Framebuffer[0][1] = NULL; Framebuffer[1][0] = NULL; Framebuffer[1][1] = NULL; Renderer = 0; - Accelerated = false; return true; } void DeInit() { - delete GPU2D_A; - delete GPU2D_B; + GPU2D_A.reset(); + GPU2D_B.reset(); GPU3D::DeInit(); if (Framebuffer[0][0]) delete[] Framebuffer[0][0]; @@ -250,9 +253,12 @@ void Reset() memset(VRAMPtr_BBG, 0, sizeof(VRAMPtr_BBG)); memset(VRAMPtr_BOBJ, 0, sizeof(VRAMPtr_BOBJ)); - int fbsize; - if (Accelerated) fbsize = (256*3 + 1) * 192; - else fbsize = 256 * 192; + size_t fbsize; + if (GPU3D::CurrentRenderer->Accelerated) + fbsize = (256*3 + 1) * 192; + else + fbsize = 256 * 192; + for (int i = 0; i < fbsize; i++) { Framebuffer[0][0][i] = 0xFFFFFFFF; @@ -283,17 +289,22 @@ void Reset() void Stop() { int fbsize; - if (Accelerated) fbsize = (256*3 + 1) * 192; - else fbsize = 256 * 192; + if (GPU3D::CurrentRenderer->Accelerated) + fbsize = (256*3 + 1) * 192; + else + fbsize = 256 * 192; + memset(Framebuffer[0][0], 0, fbsize*4); memset(Framebuffer[0][1], 0, fbsize*4); memset(Framebuffer[1][0], 0, fbsize*4); memset(Framebuffer[1][1], 0, fbsize*4); #ifdef OGLRENDERER_ENABLED - if (Accelerated) - GLCompositor::Stop(); -#endif + // This needs a better way to know that we're + // using the OpenGL renderer specifically + if (GPU3D::CurrentRenderer->Accelerated) + CurGLCompositor->Stop(); +#endif } void DoSavestate(Savestate* file) @@ -382,37 +393,42 @@ void InitRenderer(int renderer) #ifdef OGLRENDERER_ENABLED if (renderer == 1) { - if (!GLCompositor::Init()) + CurGLCompositor = std::make_unique(); + // Create opengl rendrerer + if (!CurGLCompositor->Init()) { + // Fallback on software renderer renderer = 0; + GPU3D::CurrentRenderer = std::make_unique(); + GPU3D::CurrentRenderer->Init(); } - else if (!GPU3D::GLRenderer::Init()) + GPU3D::CurrentRenderer = std::make_unique(); + if (!GPU3D::CurrentRenderer->Init()) { - GLCompositor::DeInit(); + // Fallback on software renderer + CurGLCompositor->DeInit(); + CurGLCompositor.reset(); renderer = 0; + GPU3D::CurrentRenderer = std::make_unique(); } } else #endif { - GPU3D::SoftRenderer::Init(); + GPU3D::CurrentRenderer = std::make_unique(); + GPU3D::CurrentRenderer->Init(); } Renderer = renderer; - Accelerated = renderer != 0; } void DeInitRenderer() { - if (Renderer == 0) - { - GPU3D::SoftRenderer::DeInit(); - } + GPU3D::CurrentRenderer->DeInit(); #ifdef OGLRENDERER_ENABLED - else + if (Renderer == 1) { - GPU3D::GLRenderer::DeInit(); - GLCompositor::DeInit(); + CurGLCompositor->DeInit(); } #endif } @@ -421,13 +437,13 @@ void ResetRenderer() { if (Renderer == 0) { - GPU3D::SoftRenderer::Reset(); + GPU3D::CurrentRenderer->Reset(); } #ifdef OGLRENDERER_ENABLED else { - GLCompositor::Reset(); - GPU3D::GLRenderer::Reset(); + CurGLCompositor->Reset(); + GPU3D::CurrentRenderer->Reset(); } #endif } @@ -440,10 +456,12 @@ void SetRenderSettings(int renderer, RenderSettings& settings) InitRenderer(renderer); } - bool accel = Accelerated; int fbsize; - if (accel) fbsize = (256*3 + 1) * 192; - else fbsize = 256 * 192; + if (GPU3D::CurrentRenderer->Accelerated) + fbsize = (256*3 + 1) * 192; + else + fbsize = 256 * 192; + if (Framebuffer[0][0]) { delete[] Framebuffer[0][0]; Framebuffer[0][0] = nullptr; } if (Framebuffer[1][0]) { delete[] Framebuffer[1][0]; Framebuffer[1][0] = nullptr; } if (Framebuffer[0][1]) { delete[] Framebuffer[0][1]; Framebuffer[0][1] = nullptr; } @@ -461,18 +479,15 @@ void SetRenderSettings(int renderer, RenderSettings& settings) AssignFramebuffers(); - GPU2D_A->SetRenderSettings(accel); - GPU2D_B->SetRenderSettings(accel); - if (Renderer == 0) { - GPU3D::SoftRenderer::SetRenderSettings(settings); + GPU3D::CurrentRenderer->SetRenderSettings(settings); } #ifdef OGLRENDERER_ENABLED else { - GLCompositor::SetRenderSettings(settings); - GPU3D::GLRenderer::SetRenderSettings(settings); + CurGLCompositor->SetRenderSettings(settings); + GPU3D::CurrentRenderer->SetRenderSettings(settings); } #endif } @@ -1149,7 +1164,9 @@ void StartScanline(u32 line) GPU3D::VBlank(); #ifdef OGLRENDERER_ENABLED - if (Accelerated) GLCompositor::RenderFrame(); + // Need a better way to identify the openGL renderer in particular + if (GPU3D::CurrentRenderer->Accelerated) + CurGLCompositor->RenderFrame(); #endif } } diff --git a/src/GPU.h b/src/GPU.h index 2fc15f49de..1e24051de5 100644 --- a/src/GPU.h +++ b/src/GPU.h @@ -19,9 +19,15 @@ #ifndef GPU_H #define GPU_H +#include + #include "GPU2D.h" #include "NonStupidBitfield.h" +#ifdef OGLRENDERER_ENABLED +#include "GPU_OpenGL.h" +#endif + namespace GPU { @@ -69,8 +75,8 @@ extern u8* VRAMPtr_BOBJ[0x8]; extern int FrontBuffer; extern u32* Framebuffer[2][2]; -extern GPU2D* GPU2D_A; -extern GPU2D* GPU2D_B; +extern std::unique_ptr GPU2D_A; +extern std::unique_ptr GPU2D_B; extern int Renderer; @@ -149,6 +155,10 @@ void SyncDirtyFlags(); extern u32 OAMDirty; extern u32 PaletteDirty; +#ifdef OGLRENDERER_ENABLED +extern std::unique_ptr CurGLCompositor; +#endif + struct RenderSettings { bool Soft_Threaded; @@ -550,24 +560,6 @@ void DisplayFIFO(u32 x); void SetDispStat(u32 cpu, u16 val); void SetVCount(u16 val); - -#ifdef OGLRENDERER_ENABLED -namespace GLCompositor -{ - -bool Init(); -void DeInit(); -void Reset(); - -void SetRenderSettings(RenderSettings& settings); - -void Stop(); -void RenderFrame(); -void BindOutputTexture(int buf); - -} -#endif - } #include "GPU3D.h" diff --git a/src/GPU2D.h b/src/GPU2D.h index 0f59ae36b3..e9ce8e1e8b 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -28,13 +28,15 @@ class GPU2D GPU2D(u32 num); virtual ~GPU2D() {} + GPU2D(const GPU2D&) = delete; + GPU2D& operator=(const GPU2D&) = delete; + void Reset(); void DoSavestate(Savestate* file); void SetEnabled(bool enable) { Enabled = enable; } void SetFramebuffer(u32* buf); - virtual void SetRenderSettings(bool accel) = 0; u8 Read8(u32 addr); u16 Read16(u32 addr); @@ -115,8 +117,8 @@ class GPU2D u16 MasterBrightness; - u8 WindowMask[256] __attribute__((aligned (8))); - u8 OBJWindow[256] __attribute__((aligned (8))); + alignas(8) u8 WindowMask[256]; + alignas(8) u8 OBJWindow[256]; void UpdateMosaicCounters(u32 line); void CalculateWindowMask(u32 line); @@ -124,65 +126,4 @@ class GPU2D virtual void MosaicXSizeChanged() = 0; }; -class GPU2D_Soft : public GPU2D -{ -public: - GPU2D_Soft(u32 num); - ~GPU2D_Soft() override {} - - void SetRenderSettings(bool accel) override; - - void DrawScanline(u32 line) override; - void DrawSprites(u32 line) override; - void VBlankEnd() override; - -protected: - void MosaicXSizeChanged() override; - -private: - bool Accelerated; - - u32 BGOBJLine[256*3] __attribute__((aligned (8))); - u32* _3DLine; - - u32 OBJLine[256] __attribute__((aligned (8))); - u8 OBJIndex[256] __attribute__((aligned (8))); - - u32 NumSprites; - - u8 MosaicTable[16][256]; - u8* CurBGXMosaicTable; - u8* CurOBJXMosaicTable; - - u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); - u32 ColorBlend5(u32 val1, u32 val2); - u32 ColorBrightnessUp(u32 val, u32 factor); - u32 ColorBrightnessDown(u32 val, u32 factor); - u32 ColorComposite(int i, u32 val1, u32 val2); - - template void DrawScanlineBGMode(u32 line); - void DrawScanlineBGMode6(u32 line); - void DrawScanlineBGMode7(u32 line); - void DrawScanline_BGOBJ(u32 line); - - static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); - static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); - - typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag); - - void DrawBG_3D(); - template void DrawBG_Text(u32 line, u32 bgnum); - template void DrawBG_Affine(u32 line, u32 bgnum); - template void DrawBG_Extended(u32 line, u32 bgnum); - template void DrawBG_Large(u32 line); - - void ApplySpriteMosaicX(); - template - void InterleaveSprites(u32 prio); - template void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); - template void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos); - - void DoCapture(u32 line, u32 width); -}; - #endif diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp index 7cab67ad47..e455b7ca1c 100644 --- a/src/GPU2D_Soft.cpp +++ b/src/GPU2D_Soft.cpp @@ -1,4 +1,4 @@ -#include "GPU2D.h" +#include "GPU2D_Soft.h" #include "GPU.h" GPU2D_Soft::GPU2D_Soft(u32 num) @@ -15,11 +15,6 @@ GPU2D_Soft::GPU2D_Soft(u32 num) } } -void GPU2D_Soft::SetRenderSettings(bool accel) -{ - Accelerated = accel; -} - u32 GPU2D_Soft::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb) { u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4; @@ -152,7 +147,7 @@ u32 GPU2D_Soft::ColorComposite(int i, u32 val1, u32 val2) void GPU2D_Soft::DrawScanline(u32 line) { - int stride = Accelerated ? (256*3 + 1) : 256; + int stride = GPU3D::CurrentRenderer->Accelerated ? (256*3 + 1) : 256; u32* dst = &Framebuffer[stride * line]; int n3dline = line; @@ -192,7 +187,7 @@ void GPU2D_Soft::DrawScanline(u32 line) if (Num == 0) { - if (!Accelerated) + if (!GPU3D::CurrentRenderer->Accelerated) _3DLine = GPU3D::GetLine(n3dline); else if (CaptureLatch && (((CaptureCnt >> 29) & 0x3) != 1)) { @@ -206,7 +201,7 @@ void GPU2D_Soft::DrawScanline(u32 line) for (int i = 0; i < 256; i++) dst[i] = 0xFFFFFFFF; - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { dst[256*3] = 0; } @@ -296,7 +291,7 @@ void GPU2D_Soft::DrawScanline(u32 line) DoCapture(line, capwidth); } - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { dst[256*3] = MasterBrightness | (DispCnt & 0x30000); return; @@ -350,11 +345,11 @@ void GPU2D_Soft::VBlankEnd() GPU2D::VBlankEnd(); #ifdef OGLRENDERER_ENABLED - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { if ((Num == 0) && (CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1)) { - GPU3D::GLRenderer::PrepareCaptureFrame(); + reinterpret_cast(GPU3D::CurrentRenderer.get())->PrepareCaptureFrame(); } } #endif @@ -372,7 +367,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) u16* dst = (u16*)GPU::VRAM[dstvram]; u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width); - // TODO: handle 3D in accelerated mode!! + // TODO: handle 3D in GPU3D::CurrentRenderer->Accelerated mode!! u32* srcA; if (CaptureCnt & (1<<24)) @@ -382,9 +377,9 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) else { srcA = BGOBJLine; - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { - // in accelerated mode, compositing is normally done on the GPU + // in GPU3D::CurrentRenderer->Accelerated mode, compositing is normally done on the GPU // but when doing display capture, we do need the composited output // so we do it here @@ -586,12 +581,12 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { \ if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \ { \ - if (Accelerated) DrawBG_##type(line, num); \ + if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type(line, num); \ else DrawBG_##type(line, num); \ } \ else \ { \ - if (Accelerated) DrawBG_##type(line, num); \ + if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type(line, num); \ else DrawBG_##type(line, num); \ } \ } while (false) @@ -601,18 +596,18 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { \ if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \ { \ - if (Accelerated) DrawBG_Large(line); \ + if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large(line); \ else DrawBG_Large(line); \ } \ else \ { \ - if (Accelerated) DrawBG_Large(line); \ + if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large(line); \ else DrawBG_Large(line); \ } \ } while (false) #define DoInterleaveSprites(prio) \ - if (Accelerated) InterleaveSprites(prio); else InterleaveSprites(prio); + if (GPU3D::CurrentRenderer->Accelerated) InterleaveSprites(prio); else InterleaveSprites(prio); template void GPU2D_Soft::DrawScanlineBGMode(u32 line) @@ -773,7 +768,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line) // color special effects // can likely be optimized - if (!Accelerated) + if (!GPU3D::CurrentRenderer->Accelerated) { for (int i = 0; i < 256; i++) { @@ -919,7 +914,7 @@ void GPU2D_Soft::DrawBG_3D() { int i = 0; - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { for (i = 0; i < 256; i++) { diff --git a/src/GPU2D_Soft.h b/src/GPU2D_Soft.h new file mode 100644 index 0000000000..754f08a781 --- /dev/null +++ b/src/GPU2D_Soft.h @@ -0,0 +1,79 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#pragma once + +#include "GPU2D.h" + +class GPU2D_Soft : public GPU2D +{ +public: + GPU2D_Soft(u32 num); + ~GPU2D_Soft() override {} + + void DrawScanline(u32 line) override; + void DrawSprites(u32 line) override; + void VBlankEnd() override; + +protected: + void MosaicXSizeChanged() override; + +private: + + alignas(8) u32 BGOBJLine[256*3]; + u32* _3DLine; + + alignas(8) u32 OBJLine[256]; + alignas(8) u8 OBJIndex[256]; + + u32 NumSprites; + + u8 MosaicTable[16][256]; + u8* CurBGXMosaicTable; + u8* CurOBJXMosaicTable; + + u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); + u32 ColorBlend5(u32 val1, u32 val2); + u32 ColorBrightnessUp(u32 val, u32 factor); + u32 ColorBrightnessDown(u32 val, u32 factor); + u32 ColorComposite(int i, u32 val1, u32 val2); + + template void DrawScanlineBGMode(u32 line); + void DrawScanlineBGMode6(u32 line); + void DrawScanlineBGMode7(u32 line); + void DrawScanline_BGOBJ(u32 line); + + static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); + static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); + + typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag); + + void DrawBG_3D(); + template void DrawBG_Text(u32 line, u32 bgnum); + template void DrawBG_Affine(u32 line, u32 bgnum); + template void DrawBG_Extended(u32 line, u32 bgnum); + template void DrawBG_Large(u32 line); + + void ApplySpriteMosaicX(); + template + void InterleaveSprites(u32 prio); + template void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); + template void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos); + + void DoCapture(u32 line, u32 width); +}; \ No newline at end of file diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 9b418300bf..c933c829e0 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -273,7 +273,7 @@ u32 RenderNumPolygons; u32 FlushRequest; u32 FlushAttributes; - +std::unique_ptr CurrentRenderer = {}; bool Init() { @@ -2497,12 +2497,12 @@ void CheckFIFODMA() void VCount144() { - if (GPU::Renderer == 0) SoftRenderer::VCount144(); + CurrentRenderer->VCount144(); } void RestartFrame() { - if (GPU::Renderer == 0) SoftRenderer::SetupRenderThread(); + CurrentRenderer->RestartFrame(); } @@ -2597,10 +2597,7 @@ void VBlank() void VCount215() { - if (GPU::Renderer == 0) SoftRenderer::RenderFrame(); -#ifdef OGLRENDERER_ENABLED - else GLRenderer::RenderFrame(); -#endif + CurrentRenderer->RenderFrame(); } void SetRenderXPos(u16 xpos) @@ -2614,12 +2611,7 @@ u32 ScrolledLine[256]; u32* GetLine(int line) { - u32* rawline = NULL; - - if (GPU::Renderer == 0) rawline = SoftRenderer::GetLine(line); -#ifdef OGLRENDERER_ENABLED - else rawline = GLRenderer::GetLine(line); -#endif + u32* rawline = CurrentRenderer->GetLine(line); if (RenderXPos == 0) return rawline; @@ -3055,5 +3047,9 @@ void Write32(u32 addr, u32 val) printf("unknown GPU3D write32 %08X %08X\n", addr, val); } +Renderer3D::Renderer3D(bool Accelerated) +: Accelerated(Accelerated) +{ } + } diff --git a/src/GPU3D.h b/src/GPU3D.h index e4629b04ac..1aba0bdd0e 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -20,6 +20,9 @@ #define GPU3D_H #include +#include + +#include "GPU.h" #include "Savestate.h" namespace GPU3D @@ -96,8 +99,6 @@ extern u32 RenderNumPolygons; extern u64 Timestamp; -extern int Renderer; - bool Init(); void DeInit(); void Reset(); @@ -131,40 +132,42 @@ void Write8(u32 addr, u8 val); void Write16(u32 addr, u16 val); void Write32(u32 addr, u32 val); -namespace SoftRenderer +class Renderer3D { +public: + Renderer3D(bool Accelerated); + virtual ~Renderer3D() {}; -bool Init(); -void DeInit(); -void Reset(); + Renderer3D(const Renderer3D&) = delete; + Renderer3D& operator=(const Renderer3D&) = delete; -void SetRenderSettings(GPU::RenderSettings& settings); -void SetupRenderThread(); + virtual bool Init() = 0; + virtual void DeInit() = 0; + virtual void Reset() = 0; -void VCount144(); -void RenderFrame(); -u32* GetLine(int line); + // This "Accelerated" flag currently communicates if the framebuffer should + // be allocated differently and other little misc handlers. Ideally there + // are more detailed "traits" that we can ask of the Renderer3D type + const bool Accelerated; -} + virtual void SetRenderSettings(GPU::RenderSettings& settings) = 0; -#ifdef OGLRENDERER_ENABLED -namespace GLRenderer -{ + virtual void VCount144() {}; -bool Init(); -void DeInit(); -void Reset(); - -void SetRenderSettings(GPU::RenderSettings& settings); + virtual void RenderFrame() = 0; + virtual void RestartFrame() {}; + virtual u32* GetLine(int line) = 0; +}; -void RenderFrame(); -void PrepareCaptureFrame(); -u32* GetLine(int line); -void SetupAccelFrame(); +extern int Renderer; +extern std::unique_ptr CurrentRenderer; } -#endif -} +#include "GPU3D_Soft.h" + +#ifdef OGLRENDERER_ENABLED +#include "GPU3D_OpenGL.h" +#endif #endif diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 164f29a5fc..93c1523dab 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -16,118 +16,19 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ +#include "GPU3D_OpenGL.h" + #include #include #include "NDS.h" #include "GPU.h" #include "Config.h" -#include "OpenGLSupport.h" #include "GPU3D_OpenGL_shaders.h" namespace GPU3D { -namespace GLRenderer -{ - -using namespace OpenGL; - -// GL version requirements -// * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS) -// * UBO: 3.1 - - -enum -{ - RenderFlag_WBuffer = 0x01, - RenderFlag_Trans = 0x02, - RenderFlag_ShadowMask = 0x04, - RenderFlag_Edge = 0x08, -}; - - -GLuint ClearShaderPlain[3]; - -GLuint RenderShader[16][3]; -GLuint CurShaderID = -1; - -GLuint FinalPassEdgeShader[3]; -GLuint FinalPassFogShader[3]; - -// std140 compliant structure -struct -{ - float uScreenSize[2]; // vec2 0 / 2 - u32 uDispCnt; // int 2 / 1 - u32 __pad0; - float uToonColors[32][4]; // vec4[32] 4 / 128 - float uEdgeColors[8][4]; // vec4[8] 132 / 32 - float uFogColor[4]; // vec4 164 / 4 - float uFogDensity[34][4]; // float[34] 168 / 136 - u32 uFogOffset; // int 304 / 1 - u32 uFogShift; // int 305 / 1 - u32 _pad1[2]; // int 306 / 2 -} ShaderConfig; - -GLuint ShaderConfigUBO; - -struct RendererPolygon -{ - Polygon* PolyData; - - u32 NumIndices; - u32 IndicesOffset; - GLuint PrimType; - - u32 NumEdgeIndices; - u32 EdgeIndicesOffset; - u32 RenderKey; -}; - -RendererPolygon PolygonList[2048]; -int NumFinalPolys, NumOpaqueFinalPolys; - -GLuint ClearVertexBufferID, ClearVertexArrayID; -GLint ClearUniformLoc[4]; - -// vertex buffer -// * XYZW: 4x16bit -// * RGBA: 4x8bit -// * ST: 2x16bit -// * polygon data: 3x32bit (polygon/texture attributes) -// -// polygon attributes: -// * bit4-7, 11, 14-15, 24-29: POLYGON_ATTR -// * bit16-20: Z shift -// * bit8: front-facing (?) -// * bit9: W-buffering (?) - -GLuint VertexBufferID; -u32 VertexBuffer[10240 * 7]; -u32 NumVertices; - -GLuint VertexArrayID; -GLuint IndexBufferID; -u16 IndexBuffer[2048 * 40]; -u32 NumIndices, NumEdgeIndices; - -const u32 EdgeIndicesOffset = 2048 * 30; - -GLuint TexMemID; -GLuint TexPalMemID; - -int ScaleFactor; -bool BetterPolygons; -int ScreenW, ScreenH; - -GLuint FramebufferTex[8]; -int FrontBuffer; -GLuint FramebufferID[4], PixelbufferID; -u32 Framebuffer[256*192]; - - - -bool BuildRenderShader(u32 flags, const char* vs, const char* fs) +bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs) { char shadername[32]; sprintf(shadername, "RenderShader%02X", flags); @@ -180,7 +81,7 @@ bool BuildRenderShader(u32 flags, const char* vs, const char* fs) return true; } -void UseRenderShader(u32 flags) +void GLRenderer::UseRenderShader(u32 flags) { if (CurShaderID == flags) return; glUseProgram(RenderShader[flags][2]); @@ -196,7 +97,12 @@ void SetupDefaultTexParams(GLuint tex) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); } -bool Init() +GLRenderer::GLRenderer() + : Renderer3D(true) +{ +} + +bool GLRenderer::Init() { GLint uni_id; @@ -382,7 +288,7 @@ bool Init() return true; } -void DeInit() +void GLRenderer::DeInit() { glDeleteTextures(1, &TexMemID); glDeleteTextures(1, &TexPalMemID); @@ -404,11 +310,11 @@ void DeInit() } } -void Reset() +void GLRenderer::Reset() { } -void SetRenderSettings(GPU::RenderSettings& settings) +void GLRenderer::SetRenderSettings(GPU::RenderSettings& settings) { int scale = settings.GL_ScaleFactor; @@ -462,7 +368,7 @@ void SetRenderSettings(GPU::RenderSettings& settings) } -void SetupPolygon(RendererPolygon* rp, Polygon* polygon) +void GLRenderer::SetupPolygon(GLRenderer::RendererPolygon* rp, Polygon* polygon) { rp->PolyData = polygon; @@ -508,7 +414,7 @@ void SetupPolygon(RendererPolygon* rp, Polygon* polygon) } } -u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr) +u32* GLRenderer::SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr) { u32 z = poly->FinalZ[vid]; u32 w = poly->FinalW[vid]; @@ -569,7 +475,7 @@ u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr) return vptr; } -void BuildPolygons(RendererPolygon* polygons, int npolys) +void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys) { u32* vptr = &VertexBuffer[0]; u32 vidx = 0; @@ -791,7 +697,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) NumEdgeIndices = eidx - EdgeIndicesOffset; } -int RenderSinglePolygon(int i) +int GLRenderer::RenderSinglePolygon(int i) { RendererPolygon* rp = &PolygonList[i]; @@ -800,7 +706,7 @@ int RenderSinglePolygon(int i) return 1; } -int RenderPolygonBatch(int i) +int GLRenderer::RenderPolygonBatch(int i) { RendererPolygon* rp = &PolygonList[i]; GLuint primtype = rp->PrimType; @@ -822,7 +728,7 @@ int RenderPolygonBatch(int i) return numpolys; } -int RenderPolygonEdgeBatch(int i) +int GLRenderer::RenderPolygonEdgeBatch(int i) { RendererPolygon* rp = &PolygonList[i]; u32 key = rp->RenderKey; @@ -842,7 +748,7 @@ int RenderPolygonEdgeBatch(int i) return numpolys; } -void RenderSceneChunk(int y, int h) +void GLRenderer::RenderSceneChunk(int y, int h) { u32 flags = 0; if (RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; @@ -1206,7 +1112,7 @@ void RenderSceneChunk(int y, int h) } -void RenderFrame() +void GLRenderer::RenderFrame() { CurShaderID = -1; @@ -1381,7 +1287,7 @@ void RenderFrame() FrontBuffer = FrontBuffer ? 0 : 1; } -void PrepareCaptureFrame() +void GLRenderer::PrepareCaptureFrame() { // TODO: make sure this picks the right buffer when doing antialiasing int original_fb = FrontBuffer^1; @@ -1396,7 +1302,7 @@ void PrepareCaptureFrame() glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL); } -u32* GetLine(int line) +u32* GLRenderer::GetLine(int line) { int stride = 256; @@ -1419,10 +1325,9 @@ u32* GetLine(int line) return &Framebuffer[stride * line]; } -void SetupAccelFrame() +void GLRenderer::SetupAccelFrame() { glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer]); } } -} diff --git a/src/GPU3D_OpenGL.h b/src/GPU3D_OpenGL.h new file mode 100644 index 0000000000..73e295541e --- /dev/null +++ b/src/GPU3D_OpenGL.h @@ -0,0 +1,152 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#pragma once + +#include "GPU3D.h" + +#include "OpenGLSupport.h" + + +namespace GPU3D +{ +class GLRenderer : public Renderer3D +{ +public: + GLRenderer(); + virtual ~GLRenderer() override {}; + virtual bool Init() override; + virtual void DeInit() override; + virtual void Reset() override; + + virtual void SetRenderSettings(GPU::RenderSettings& settings) override; + + virtual void VCount144() override {}; + virtual void RenderFrame() override; + virtual u32* GetLine(int line) override; + + void SetupAccelFrame(); + void PrepareCaptureFrame(); +private: + + // GL version requirements + // * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS) + // * UBO: 3.1 + + struct RendererPolygon + { + Polygon* PolyData; + + u32 NumIndices; + u32 IndicesOffset; + GLuint PrimType; + + u32 NumEdgeIndices; + u32 EdgeIndicesOffset; + + u32 RenderKey; + }; + + RendererPolygon PolygonList[2048]; + + bool BuildRenderShader(u32 flags, const char* vs, const char* fs); + void UseRenderShader(u32 flags); + void SetupPolygon(RendererPolygon* rp, Polygon* polygon); + u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr); + void BuildPolygons(RendererPolygon* polygons, int npolys); + int RenderSinglePolygon(int i); + int RenderPolygonBatch(int i); + int RenderPolygonEdgeBatch(int i); + void RenderSceneChunk(int y, int h); + + enum + { + RenderFlag_WBuffer = 0x01, + RenderFlag_Trans = 0x02, + RenderFlag_ShadowMask = 0x04, + RenderFlag_Edge = 0x08, + }; + + + GLuint ClearShaderPlain[3]; + + GLuint RenderShader[16][3]; + GLuint CurShaderID = -1; + + GLuint FinalPassEdgeShader[3]; + GLuint FinalPassFogShader[3]; + + // std140 compliant structure + struct + { + float uScreenSize[2]; // vec2 0 / 2 + u32 uDispCnt; // int 2 / 1 + u32 __pad0; + float uToonColors[32][4]; // vec4[32] 4 / 128 + float uEdgeColors[8][4]; // vec4[8] 132 / 32 + float uFogColor[4]; // vec4 164 / 4 + float uFogDensity[34][4]; // float[34] 168 / 136 + u32 uFogOffset; // int 304 / 1 + u32 uFogShift; // int 305 / 1 + u32 _pad1[2]; // int 306 / 2 + } ShaderConfig; + + GLuint ShaderConfigUBO; + int NumFinalPolys, NumOpaqueFinalPolys; + + GLuint ClearVertexBufferID, ClearVertexArrayID; + GLint ClearUniformLoc[4]; + + // vertex buffer + // * XYZW: 4x16bit + // * RGBA: 4x8bit + // * ST: 2x16bit + // * polygon data: 3x32bit (polygon/texture attributes) + // + // polygon attributes: + // * bit4-7, 11, 14-15, 24-29: POLYGON_ATTR + // * bit16-20: Z shift + // * bit8: front-facing (?) + // * bit9: W-buffering (?) + + GLuint VertexBufferID; + u32 VertexBuffer[10240 * 7]; + u32 NumVertices; + + GLuint VertexArrayID; + GLuint IndexBufferID; + u16 IndexBuffer[2048 * 40]; + u32 NumIndices, NumEdgeIndices; + + const u32 EdgeIndicesOffset = 2048 * 30; + + GLuint TexMemID; + GLuint TexPalMemID; + + int ScaleFactor; + bool BetterPolygons; + int ScreenW, ScreenH; + + GLuint FramebufferTex[8]; + int FrontBuffer; + GLuint FramebufferID[4], PixelbufferID; + u32 Framebuffer[256*192]; + + +}; +} \ No newline at end of file diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 3d6ace6cc1..f6d27a0df5 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -16,82 +16,43 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ +#include "GPU3D_Soft.h" + #include #include #include "NDS.h" #include "GPU.h" #include "Config.h" -#include "Platform.h" namespace GPU3D { -namespace SoftRenderer -{ - -// buffer dimensions are 258x194 to add a offscreen 1px border -// which simplifies edge marking tests -// buffer is duplicated to keep track of the two topmost pixels -// TODO: check if the hardware can accidentally plot pixels -// offscreen in that border - -const int ScanlineWidth = 258; -const int NumScanlines = 194; -const int BufferSize = ScanlineWidth * NumScanlines; -const int FirstPixelOffset = ScanlineWidth + 1; - -u32 ColorBuffer[BufferSize * 2]; -u32 DepthBuffer[BufferSize * 2]; -u32 AttrBuffer[BufferSize * 2]; - -// attribute buffer: -// bit0-3: edge flags (left/right/top/bottom) -// bit4: backfacing flag -// bit8-12: antialiasing alpha -// bit15: fog enable -// bit16-21: polygon ID for translucent pixels -// bit22: translucent flag -// bit24-29: polygon ID for opaque pixels - -u8 StencilBuffer[256*2]; -bool PrevIsShadowMask; - -bool Enabled; - -bool FrameIdentical; - -// threading - -bool Threaded; -Platform::Thread* RenderThread; -bool RenderThreadRunning; -bool RenderThreadRendering; -Platform::Semaphore* Sema_RenderStart; -Platform::Semaphore* Sema_RenderDone; -Platform::Semaphore* Sema_ScanlineCount; void RenderThreadFunc(); -void StopRenderThread() +void SoftRenderer::StopRenderThread() { if (RenderThreadRunning) { RenderThreadRunning = false; Platform::Semaphore_Post(Sema_RenderStart); - Platform::Thread_Wait(RenderThread); - Platform::Thread_Free(RenderThread); + // Platform::Thread_Wait(RenderThread); + // Platform::Thread_Free(RenderThread); + RenderThread.join(); + } } -void SetupRenderThread() +void SoftRenderer::SetupRenderThread() { if (Threaded) { if (!RenderThreadRunning) { RenderThreadRunning = true; - RenderThread = Platform::Thread_Create(RenderThreadFunc); + //RenderThread = Platform::Thread_Create(RenderThreadFunc); + RenderThread = std::thread(&SoftRenderer::RenderThreadFunc, this); } // otherwise more than one frame can be queued up at once @@ -113,7 +74,13 @@ void SetupRenderThread() } -bool Init() +SoftRenderer::SoftRenderer() + : Renderer3D(false) +{ + +} + +bool SoftRenderer::Init() { Sema_RenderStart = Platform::Semaphore_Create(); Sema_RenderDone = Platform::Semaphore_Create(); @@ -126,7 +93,7 @@ bool Init() return true; } -void DeInit() +void SoftRenderer::DeInit() { StopRenderThread(); @@ -135,7 +102,7 @@ void DeInit() Platform::Semaphore_Free(Sema_ScanlineCount); } -void Reset() +void SoftRenderer::Reset() { memset(ColorBuffer, 0, BufferSize * 2 * 4); memset(DepthBuffer, 0, BufferSize * 2 * 4); @@ -146,428 +113,13 @@ void Reset() SetupRenderThread(); } -void SetRenderSettings(GPU::RenderSettings& settings) +void SoftRenderer::SetRenderSettings(GPU::RenderSettings& settings) { Threaded = settings.Soft_Threaded; SetupRenderThread(); } - - -// Notes on the interpolator: -// -// This is a theory on how the DS hardware interpolates values. It matches hardware output -// in the tests I did, but the hardware may be doing it differently. You never know. -// -// Assuming you want to perspective-correctly interpolate a variable named A across two points -// in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly, -// then divide A/W by 1/W to recover the correct A value. -// -// The DS GPU approximates interpolation by calculating a perspective-correct interpolation -// between 0 and 1, then using the result as a factor to linearly interpolate the actual -// vertex attributes. The factor has 9 bits of precision when interpolating along Y and -// 8 bits along X. -// -// There's a special path for when the two W values are equal: it directly does linear -// interpolation, avoiding precision loss from the aforementioned approximation. -// Which is desirable when using the GPU to draw 2D graphics. - -template -class Interpolator -{ -public: - Interpolator() {} - Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) - { - Setup(x0, x1, w0, w1); - } - - void Setup(s32 x0, s32 x1, s32 w0, s32 w1) - { - this->x0 = x0; - this->x1 = x1; - this->xdiff = x1 - x0; - - // calculate reciprocals for linear mode and Z interpolation - // TODO eventually: use a faster reciprocal function? - if (this->xdiff != 0) - this->xrecip = (1<<30) / this->xdiff; - else - this->xrecip = 0; - this->xrecip_z = this->xrecip >> 8; - - // linear mode is used if both W values are equal and have - // low-order bits cleared (0-6 along X, 1-6 along Y) - u32 mask = dir ? 0x7E : 0x7F; - if ((w0 == w1) && !(w0 & mask) && !(w1 & mask)) - this->linear = true; - else - this->linear = false; - - if (dir) - { - // along Y - - if ((w0 & 0x1) && !(w1 & 0x1)) - { - this->w0n = w0 - 1; - this->w0d = w0 + 1; - this->w1d = w1; - } - else - { - this->w0n = w0 & 0xFFFE; - this->w0d = w0 & 0xFFFE; - this->w1d = w1 & 0xFFFE; - } - - this->shift = 9; - } - else - { - // along X - - this->w0n = w0; - this->w0d = w0; - this->w1d = w1; - - this->shift = 8; - } - } - - void SetX(s32 x) - { - x -= x0; - this->x = x; - if (xdiff != 0 && !linear) - { - s64 num = ((s64)x * w0n) << shift; - s32 den = (x * w0d) + ((xdiff-x) * w1d); - - // this seems to be a proper division on hardware :/ - // I haven't been able to find cases that produce imperfect output - if (den == 0) yfactor = 0; - else yfactor = (s32)(num / den); - } - } - - s32 Interpolate(s32 y0, s32 y1) - { - if (xdiff == 0 || y0 == y1) return y0; - - if (!linear) - { - // perspective-correct approx. interpolation - if (y0 < y1) - return y0 + (((y1-y0) * yfactor) >> shift); - else - return y1 + (((y0-y1) * ((1<> shift); - } - else - { - // linear interpolation - // checkme: the rounding bias there (3<<24) is a guess - if (y0 < y1) - return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30); - else - return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30); - } - } - - s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) - { - if (xdiff == 0 || z0 == z1) return z0; - - if (wbuffer) - { - // W-buffering: perspective-correct approx. interpolation - if (z0 < z1) - return z0 + (((s64)(z1-z0) * yfactor) >> shift); - else - return z1 + (((s64)(z0-z1) * ((1<> shift); - } - else - { - // Z-buffering: linear interpolation - // still doesn't quite match hardware... - s32 base, disp, factor; - - if (z0 < z1) - { - base = z0; - disp = z1 - z0; - factor = x; - } - else - { - base = z1; - disp = z0 - z1, - factor = xdiff - x; - } - - if (dir) - { - int shift = 0; - while (disp > 0x3FF) - { - disp >>= 1; - shift++; - } - - return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift); - } - else - { - disp >>= 9; - return base + (((s64)disp * factor * xrecip_z) >> 13); - } - } - } - -private: - s32 x0, x1, xdiff, x; - - int shift; - bool linear; - - s32 xrecip, xrecip_z; - s32 w0n, w0d, w1d; - - u32 yfactor; -}; - - -template -class Slope -{ -public: - Slope() {} - - s32 SetupDummy(s32 x0) - { - if (side) - { - dx = -0x40000; - x0--; - } - else - { - dx = 0; - } - - this->x0 = x0; - this->xmin = x0; - this->xmax = x0; - - Increment = 0; - XMajor = false; - - Interp.Setup(0, 0, 0, 0); - Interp.SetX(0); - - xcov_incr = 0; - - return x0; - } - - s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) - { - this->x0 = x0; - this->y = y; - - if (x1 > x0) - { - this->xmin = x0; - this->xmax = x1-1; - this->Negative = false; - } - else if (x1 < x0) - { - this->xmin = x1; - this->xmax = x0-1; - this->Negative = true; - } - else - { - this->xmin = x0; - if (side) this->xmin--; - this->xmax = this->xmin; - this->Negative = false; - } - - xlen = xmax+1 - xmin; - ylen = y1 - y0; - - // slope increment has a 18-bit fractional part - // note: for some reason, x/y isn't calculated directly, - // instead, 1/y is calculated and then multiplied by x - // TODO: this is still not perfect (see for example x=169 y=33) - if (ylen == 0) - Increment = 0; - else if (ylen == xlen) - Increment = 0x40000; - else - { - s32 yrecip = (1<<18) / ylen; - Increment = (x1-x0) * yrecip; - if (Increment < 0) Increment = -Increment; - } - - XMajor = (Increment > 0x40000); - - if (side) - { - // right - - if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000); - else if (Increment != 0) dx = Negative ? 0x40000 : 0; - else dx = -0x40000; - } - else - { - // left - - if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000; - else if (Increment != 0) dx = Negative ? 0x40000 : 0; - else dx = 0; - } - - dx += (y - y0) * Increment; - - s32 x = XVal(); - - if (XMajor) - { - if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme - else Interp.Setup(x0, x1, w0, w1); - Interp.SetX(x); - - // used for calculating AA coverage - xcov_incr = (ylen << 10) / xlen; - } - else - { - Interp.Setup(y0, y1, w0, w1); - Interp.SetX(y); - } - - return x; - } - - s32 Step() - { - dx += Increment; - y++; - - s32 x = XVal(); - if (XMajor) - { - Interp.SetX(x); - } - else - { - Interp.SetX(y); - } - return x; - } - - s32 XVal() - { - s32 ret; - if (Negative) ret = x0 - (dx >> 18); - else ret = x0 + (dx >> 18); - - if (ret < xmin) ret = xmin; - else if (ret > xmax) ret = xmax; - return ret; - } - - void EdgeParams_XMajor(s32* length, s32* coverage) - { - if (side ^ Negative) - *length = (dx >> 18) - ((dx-Increment) >> 18); - else - *length = ((dx+Increment) >> 18) - (dx >> 18); - - // for X-major edges, we return the coverage - // for the first pixel, and the increment for - // further pixels on the same scanline - s32 startx = dx >> 18; - if (Negative) startx = xlen - startx; - if (side) startx = startx - *length + 1; - - s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen; - *coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF); - } - - void EdgeParams_YMajor(s32* length, s32* coverage) - { - *length = 1; - - if (Increment == 0) - { - *coverage = 31; - } - else - { - s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4; - if ((cov >> 5) != (dx >> 18)) cov = 31; - cov &= 0x1F; - if (!(side ^ Negative)) cov = 0x1F - cov; - - *coverage = cov; - } - } - - void EdgeParams(s32* length, s32* coverage) - { - if (XMajor) - return EdgeParams_XMajor(length, coverage); - else - return EdgeParams_YMajor(length, coverage); - } - - s32 Increment; - bool Negative; - bool XMajor; - Interpolator<1> Interp; - -private: - s32 x0, xmin, xmax; - s32 xlen, ylen; - s32 dx; - s32 y; - - s32 xcov_incr; - s32 ycoverage, ycov_incr; -}; - -struct RendererPolygon -{ - Polygon* PolyData; - - Slope<0> SlopeL; - Slope<1> SlopeR; - s32 XL, XR; - u32 CurVL, CurVR; - u32 NextVL, NextVR; - -}; - -RendererPolygon PolygonList[2048]; - -template -inline T ReadVRAM_Texture(u32 addr) -{ - return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF]; -} -template -inline T ReadVRAM_TexPal(u32 addr) -{ - return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF]; -} - -void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) +void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) { u32 vramaddr = (texparam & 0xFFFF) << 3; @@ -873,7 +425,7 @@ u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) return srcR | (srcG << 8) | (srcB << 16) | (dstalpha << 24); } -u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) +u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) { u8 r, g, b, a; @@ -981,7 +533,7 @@ u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) return r | (g << 8) | (b << 16) | (a << 24); } -void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) +void SoftRenderer::PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) { u32 dstattr = AttrBuffer[pixeladdr]; u32 attr = (polyattr & 0xE0F0) | ((polyattr >> 8) & 0xFF0000) | (1<<22) | (dstattr & 0xFF001F0F); @@ -1020,7 +572,7 @@ void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 sha AttrBuffer[pixeladdr] = attr; } -void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) +void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1047,7 +599,7 @@ void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); } -void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) +void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1074,7 +626,7 @@ void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); } -void SetupPolygon(RendererPolygon* rp, Polygon* polygon) +void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) { u32 nverts = polygon->NumVertices; @@ -1127,7 +679,7 @@ void SetupPolygon(RendererPolygon* rp, Polygon* polygon) } } -void RenderShadowMaskScanline(RendererPolygon* rp, s32 y) +void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1340,7 +892,7 @@ void RenderShadowMaskScanline(RendererPolygon* rp, s32 y) rp->XR = rp->SlopeR.Step(); } -void RenderPolygonScanline(RendererPolygon* rp, s32 y) +void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1755,7 +1307,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y) rp->XR = rp->SlopeR.Step(); } -void RenderScanline(s32 y, int npolys) +void SoftRenderer::RenderScanline(s32 y, int npolys) { for (int i = 0; i < npolys; i++) { @@ -1772,8 +1324,7 @@ void RenderScanline(s32 y, int npolys) } } - -u32 CalculateFogDensity(u32 pixeladdr) +u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) { u32 z = DepthBuffer[pixeladdr]; u32 densityid, densityfrac; @@ -1812,7 +1363,7 @@ u32 CalculateFogDensity(u32 pixeladdr) return density; } -void ScanlineFinalPass(s32 y) +void SoftRenderer::ScanlineFinalPass(s32 y) { // to consider: // clearing all polygon fog flags if the master flag isn't set? @@ -1981,7 +1532,7 @@ void ScanlineFinalPass(s32 y) } } -void ClearBuffers() +void SoftRenderer::ClearBuffers() { u32 clearz = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; u32 polyid = RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID @@ -2055,7 +1606,7 @@ void ClearBuffers() u32 a = (RenderClearAttr1 >> 16) & 0x1F; u32 color = r | (g << 8) | (b << 16) | (a << 24); - polyid |= (RenderClearAttr1 & 0x8000); + polyid |= (RenderClearAttr1 & 0x8000); for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) { @@ -2070,7 +1621,7 @@ void ClearBuffers() } } -void RenderPolygons(bool threaded, Polygon** polygons, int npolys) +void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys) { int j = 0; for (int i = 0; i < npolys; i++) @@ -2096,13 +1647,13 @@ void RenderPolygons(bool threaded, Polygon** polygons, int npolys) Platform::Semaphore_Post(Sema_ScanlineCount); } -void VCount144() +void SoftRenderer::VCount144() { if (RenderThreadRunning) Platform::Semaphore_Wait(Sema_RenderDone); } -void RenderFrame() +void SoftRenderer::RenderFrame() { auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture); auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal); @@ -2123,7 +1674,12 @@ void RenderFrame() } } -void RenderThreadFunc() +void SoftRenderer::RestartFrame() +{ + SetupRenderThread(); +} + +void SoftRenderer::RenderThreadFunc() { for (;;) { @@ -2146,7 +1702,7 @@ void RenderThreadFunc() } } -u32* GetLine(int line) +u32* SoftRenderer::GetLine(int line) { if (RenderThreadRunning) { @@ -2158,4 +1714,3 @@ u32* GetLine(int line) } } -} diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h new file mode 100644 index 0000000000..851b7c19b5 --- /dev/null +++ b/src/GPU3D_Soft.h @@ -0,0 +1,516 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#pragma once + +#include "GPU3D.h" +#include "Platform.h" +#include + +namespace GPU3D +{ +class SoftRenderer : public Renderer3D +{ +public: + SoftRenderer(); + virtual ~SoftRenderer() override {}; + virtual bool Init() override; + virtual void DeInit() override; + virtual void Reset() override; + + virtual void SetRenderSettings(GPU::RenderSettings& settings) override; + + virtual void VCount144() override; + virtual void RenderFrame() override; + virtual void RestartFrame() override; + virtual u32* GetLine(int line) override; + + void SetupRenderThread(); + void StopRenderThread(); +private: + // Notes on the interpolator: + // + // This is a theory on how the DS hardware interpolates values. It matches hardware output + // in the tests I did, but the hardware may be doing it differently. You never know. + // + // Assuming you want to perspective-correctly interpolate a variable named A across two points + // in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly, + // then divide A/W by 1/W to recover the correct A value. + // + // The DS GPU approximates interpolation by calculating a perspective-correct interpolation + // between 0 and 1, then using the result as a factor to linearly interpolate the actual + // vertex attributes. The factor has 9 bits of precision when interpolating along Y and + // 8 bits along X. + // + // There's a special path for when the two W values are equal: it directly does linear + // interpolation, avoiding precision loss from the aforementioned approximation. + // Which is desirable when using the GPU to draw 2D graphics. + + template + class Interpolator + { + public: + Interpolator() {} + Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) + { + Setup(x0, x1, w0, w1); + } + + void Setup(s32 x0, s32 x1, s32 w0, s32 w1) + { + this->x0 = x0; + this->x1 = x1; + this->xdiff = x1 - x0; + + // calculate reciprocals for linear mode and Z interpolation + // TODO eventually: use a faster reciprocal function? + if (this->xdiff != 0) + this->xrecip = (1<<30) / this->xdiff; + else + this->xrecip = 0; + this->xrecip_z = this->xrecip >> 8; + + // linear mode is used if both W values are equal and have + // low-order bits cleared (0-6 along X, 1-6 along Y) + u32 mask = dir ? 0x7E : 0x7F; + if ((w0 == w1) && !(w0 & mask) && !(w1 & mask)) + this->linear = true; + else + this->linear = false; + + if (dir) + { + // along Y + + if ((w0 & 0x1) && !(w1 & 0x1)) + { + this->w0n = w0 - 1; + this->w0d = w0 + 1; + this->w1d = w1; + } + else + { + this->w0n = w0 & 0xFFFE; + this->w0d = w0 & 0xFFFE; + this->w1d = w1 & 0xFFFE; + } + + this->shift = 9; + } + else + { + // along X + + this->w0n = w0; + this->w0d = w0; + this->w1d = w1; + + this->shift = 8; + } + } + + void SetX(s32 x) + { + x -= x0; + this->x = x; + if (xdiff != 0 && !linear) + { + s64 num = ((s64)x * w0n) << shift; + s32 den = (x * w0d) + ((xdiff-x) * w1d); + + // this seems to be a proper division on hardware :/ + // I haven't been able to find cases that produce imperfect output + if (den == 0) yfactor = 0; + else yfactor = (s32)(num / den); + } + } + + s32 Interpolate(s32 y0, s32 y1) + { + if (xdiff == 0 || y0 == y1) return y0; + + if (!linear) + { + // perspective-correct approx. interpolation + if (y0 < y1) + return y0 + (((y1-y0) * yfactor) >> shift); + else + return y1 + (((y0-y1) * ((1<> shift); + } + else + { + // linear interpolation + // checkme: the rounding bias there (3<<24) is a guess + if (y0 < y1) + return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30); + else + return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30); + } + } + + s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) + { + if (xdiff == 0 || z0 == z1) return z0; + + if (wbuffer) + { + // W-buffering: perspective-correct approx. interpolation + if (z0 < z1) + return z0 + (((s64)(z1-z0) * yfactor) >> shift); + else + return z1 + (((s64)(z0-z1) * ((1<> shift); + } + else + { + // Z-buffering: linear interpolation + // still doesn't quite match hardware... + s32 base, disp, factor; + + if (z0 < z1) + { + base = z0; + disp = z1 - z0; + factor = x; + } + else + { + base = z1; + disp = z0 - z1, + factor = xdiff - x; + } + + if (dir) + { + int shift = 0; + while (disp > 0x3FF) + { + disp >>= 1; + shift++; + } + + return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift); + } + else + { + disp >>= 9; + return base + (((s64)disp * factor * xrecip_z) >> 13); + } + } + } + + private: + s32 x0, x1, xdiff, x; + + int shift; + bool linear; + + s32 xrecip, xrecip_z; + s32 w0n, w0d, w1d; + + u32 yfactor; + }; + + + template + class Slope + { + public: + Slope() {} + + s32 SetupDummy(s32 x0) + { + if (side) + { + dx = -0x40000; + x0--; + } + else + { + dx = 0; + } + + this->x0 = x0; + this->xmin = x0; + this->xmax = x0; + + Increment = 0; + XMajor = false; + + Interp.Setup(0, 0, 0, 0); + Interp.SetX(0); + + xcov_incr = 0; + + return x0; + } + + s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) + { + this->x0 = x0; + this->y = y; + + if (x1 > x0) + { + this->xmin = x0; + this->xmax = x1-1; + this->Negative = false; + } + else if (x1 < x0) + { + this->xmin = x1; + this->xmax = x0-1; + this->Negative = true; + } + else + { + this->xmin = x0; + if (side) this->xmin--; + this->xmax = this->xmin; + this->Negative = false; + } + + xlen = xmax+1 - xmin; + ylen = y1 - y0; + + // slope increment has a 18-bit fractional part + // note: for some reason, x/y isn't calculated directly, + // instead, 1/y is calculated and then multiplied by x + // TODO: this is still not perfect (see for example x=169 y=33) + if (ylen == 0) + Increment = 0; + else if (ylen == xlen) + Increment = 0x40000; + else + { + s32 yrecip = (1<<18) / ylen; + Increment = (x1-x0) * yrecip; + if (Increment < 0) Increment = -Increment; + } + + XMajor = (Increment > 0x40000); + + if (side) + { + // right + + if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000); + else if (Increment != 0) dx = Negative ? 0x40000 : 0; + else dx = -0x40000; + } + else + { + // left + + if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000; + else if (Increment != 0) dx = Negative ? 0x40000 : 0; + else dx = 0; + } + + dx += (y - y0) * Increment; + + s32 x = XVal(); + + if (XMajor) + { + if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme + else Interp.Setup(x0, x1, w0, w1); + Interp.SetX(x); + + // used for calculating AA coverage + xcov_incr = (ylen << 10) / xlen; + } + else + { + Interp.Setup(y0, y1, w0, w1); + Interp.SetX(y); + } + + return x; + } + + s32 Step() + { + dx += Increment; + y++; + + s32 x = XVal(); + if (XMajor) + { + Interp.SetX(x); + } + else + { + Interp.SetX(y); + } + return x; + } + + s32 XVal() + { + s32 ret; + if (Negative) ret = x0 - (dx >> 18); + else ret = x0 + (dx >> 18); + + if (ret < xmin) ret = xmin; + else if (ret > xmax) ret = xmax; + return ret; + } + + void EdgeParams_XMajor(s32* length, s32* coverage) + { + if (side ^ Negative) + *length = (dx >> 18) - ((dx-Increment) >> 18); + else + *length = ((dx+Increment) >> 18) - (dx >> 18); + + // for X-major edges, we return the coverage + // for the first pixel, and the increment for + // further pixels on the same scanline + s32 startx = dx >> 18; + if (Negative) startx = xlen - startx; + if (side) startx = startx - *length + 1; + + s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen; + *coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF); + } + + void EdgeParams_YMajor(s32* length, s32* coverage) + { + *length = 1; + + if (Increment == 0) + { + *coverage = 31; + } + else + { + s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4; + if ((cov >> 5) != (dx >> 18)) cov = 31; + cov &= 0x1F; + if (!(side ^ Negative)) cov = 0x1F - cov; + + *coverage = cov; + } + } + + void EdgeParams(s32* length, s32* coverage) + { + if (XMajor) + return EdgeParams_XMajor(length, coverage); + else + return EdgeParams_YMajor(length, coverage); + } + + s32 Increment; + bool Negative; + bool XMajor; + Interpolator<1> Interp; + + private: + s32 x0, xmin, xmax; + s32 xlen, ylen; + s32 dx; + s32 y; + + s32 xcov_incr; + s32 ycoverage, ycov_incr; + }; + + template + inline T ReadVRAM_Texture(u32 addr) + { + return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF]; + } + template + inline T ReadVRAM_TexPal(u32 addr) + { + return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF]; + } + + struct RendererPolygon + { + Polygon* PolyData; + + Slope<0> SlopeL; + Slope<1> SlopeR; + s32 XL, XR; + u32 CurVL, CurVR; + u32 NextVL, NextVR; + + }; + + RendererPolygon PolygonList[2048]; + void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha); + u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t); + void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); + void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y); + void SetupPolygonRightEdge(RendererPolygon* rp, s32 y); + void SetupPolygon(RendererPolygon* rp, Polygon* polygon); + void RenderShadowMaskScanline(RendererPolygon* rp, s32 y); + void RenderPolygonScanline(RendererPolygon* rp, s32 y); + void RenderScanline(s32 y, int npolys); + u32 CalculateFogDensity(u32 pixeladdr); + void ScanlineFinalPass(s32 y); + void ClearBuffers(); + void RenderPolygons(bool threaded, Polygon** polygons, int npolys); + + void RenderThreadFunc(); + + // buffer dimensions are 258x194 to add a offscreen 1px border + // which simplifies edge marking tests + // buffer is duplicated to keep track of the two topmost pixels + // TODO: check if the hardware can accidentally plot pixels + // offscreen in that border + + static constexpr int ScanlineWidth = 258; + static constexpr int NumScanlines = 194; + static constexpr int BufferSize = ScanlineWidth * NumScanlines; + static constexpr int FirstPixelOffset = ScanlineWidth + 1; + + u32 ColorBuffer[BufferSize * 2]; + u32 DepthBuffer[BufferSize * 2]; + u32 AttrBuffer[BufferSize * 2]; + + // attribute buffer: + // bit0-3: edge flags (left/right/top/bottom) + // bit4: backfacing flag + // bit8-12: antialiasing alpha + // bit15: fog enable + // bit16-21: polygon ID for translucent pixels + // bit22: translucent flag + // bit24-29: polygon ID for opaque pixels + + u8 StencilBuffer[256*2]; + bool PrevIsShadowMask; + + bool Enabled; + + bool FrameIdentical; + + // threading + + bool Threaded; + // Platform::Thread* RenderThread; + std::thread RenderThread; + bool RenderThreadRunning; + bool RenderThreadRendering; + Platform::Semaphore* Sema_RenderStart; + Platform::Semaphore* Sema_RenderDone; + Platform::Semaphore* Sema_ScanlineCount; +}; +} \ No newline at end of file diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index 8f2d5a138e..c02d955e72 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -16,8 +16,11 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ -#include -#include +#include "GPU_OpenGL.h" + +#include +#include + #include "NDS.h" #include "GPU.h" #include "Config.h" @@ -26,34 +29,10 @@ namespace GPU { -namespace GLCompositor -{ using namespace OpenGL; -int Scale; -int ScreenH, ScreenW; - -GLuint CompShader[1][3]; -GLuint CompScaleLoc[1]; -GLuint Comp3DXPosLoc[1]; - -GLuint CompVertexBufferID; -GLuint CompVertexArrayID; - -struct CompVertex -{ - float Position[2]; - float Texcoord[2]; -}; -CompVertex CompVertices[2 * 3*2]; - -GLuint CompScreenInputTex; -GLuint CompScreenOutputTex[2]; -GLuint CompScreenOutputFB[2]; - - -bool Init() +bool GLCompositor::Init() { if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Nearest, CompShader[0], "CompositorShader")) //if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Linear, CompShader[0], "CompositorShader")) @@ -144,7 +123,7 @@ bool Init() return true; } -void DeInit() +void GLCompositor::DeInit() { glDeleteFramebuffers(2, CompScreenOutputFB); glDeleteTextures(1, &CompScreenInputTex); @@ -157,12 +136,12 @@ void DeInit() OpenGL::DeleteShaderProgram(CompShader[i]); } -void Reset() +void GLCompositor::Reset() { } -void SetRenderSettings(RenderSettings& settings) +void GLCompositor::SetRenderSettings(RenderSettings& settings) { int scale = settings.GL_ScaleFactor; @@ -188,7 +167,7 @@ void SetRenderSettings(RenderSettings& settings) glBindFramebuffer(GL_FRAMEBUFFER, 0); } -void Stop() +void GLCompositor::Stop() { for (int i = 0; i < 2; i++) { @@ -202,7 +181,7 @@ void Stop() glBindFramebuffer(GL_FRAMEBUFFER, 0); } -void RenderFrame() +void GLCompositor::RenderFrame() { int frontbuf = GPU::FrontBuffer; glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); @@ -236,17 +215,16 @@ void RenderFrame() } glActiveTexture(GL_TEXTURE1); - GPU3D::GLRenderer::SetupAccelFrame(); + reinterpret_cast(GPU3D::CurrentRenderer.get())->SetupAccelFrame(); glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID); glBindVertexArray(CompVertexArrayID); glDrawArrays(GL_TRIANGLES, 0, 4*3); } -void BindOutputTexture(int buf) +void GLCompositor::BindOutputTexture(int buf) { glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[buf]); } } -} diff --git a/src/GPU_OpenGL.h b/src/GPU_OpenGL.h new file mode 100644 index 0000000000..1fcb08f712 --- /dev/null +++ b/src/GPU_OpenGL.h @@ -0,0 +1,68 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#pragma once + +#include "OpenGLSupport.h" + +namespace GPU +{ + +struct RenderSettings; + +class GLCompositor +{ +public: + GLCompositor() = default; + GLCompositor(const GLCompositor&) = delete; + GLCompositor& operator=(const GLCompositor&) = delete; + + bool Init(); + void DeInit(); + void Reset(); + + void SetRenderSettings(RenderSettings& settings); + + void Stop(); + void RenderFrame(); + void BindOutputTexture(int buf); +private: + + int Scale; + int ScreenH, ScreenW; + + GLuint CompShader[1][3]; + GLuint CompScaleLoc[1]; + GLuint Comp3DXPosLoc[1]; + + GLuint CompVertexBufferID; + GLuint CompVertexArrayID; + + struct CompVertex + { + float Position[2]; + float Texcoord[2]; + }; + CompVertex CompVertices[2 * 3*2]; + + GLuint CompScreenInputTex; + GLuint CompScreenOutputTex[2]; + GLuint CompScreenOutputFB[2]; +}; + +} \ No newline at end of file diff --git a/src/NDS.cpp b/src/NDS.cpp index 7c0ecea1c6..6c41cb55b0 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -1564,7 +1564,7 @@ void RunTimer(u32 tid, s32 cycles) void RunTimers(u32 cpu) { - register u32 timermask = TimerCheckMask[cpu]; + u32 timermask = TimerCheckMask[cpu]; s32 cycles; if (cpu == 0) diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 5aa4959ecb..460457c46e 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -1013,7 +1013,7 @@ void ScreenPanelGL::paintGL() if (GPU::Renderer != 0) { // hardware-accelerated render - GPU::GLCompositor::BindOutputTexture(frontbuf); + GPU::CurGLCompositor->BindOutputTexture(frontbuf); } else #endif @@ -2536,9 +2536,15 @@ int main(int argc, char** argv) Config::Load(); -#define SANITIZE(var, min, max) { if (var < min) var = min; else if (var > max) var = max; } +#define SANITIZE(var, min, max) { var = std::clamp(var, min, max); } SANITIZE(Config::ConsoleType, 0, 1); - SANITIZE(Config::_3DRenderer, 0, 1); + SANITIZE(Config::_3DRenderer, + 0, + 0 // Minimum, Software renderer + #ifdef OGLRENDERER_ENABLED + + 1 // OpenGL Renderer + #endif + ); SANITIZE(Config::ScreenVSyncInterval, 1, 20); SANITIZE(Config::GL_ScaleFactor, 1, 16); SANITIZE(Config::AudioVolume, 0, 256); From d63f7977f83fb4bc48c633c3b1ecbfa23423370f Mon Sep 17 00:00:00 2001 From: gal20 <71563441+gal20@users.noreply.github.com> Date: Wed, 10 Feb 2021 00:42:31 +0200 Subject: [PATCH 15/18] Remove code duplication in `onChangeScreenSize` (#968) --- src/frontend/qt_sdl/main.cpp | 43 ++++-------------------------------- src/frontend/qt_sdl/main.h | 3 +-- 2 files changed, 5 insertions(+), 41 deletions(-) diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 460457c46e..3e26489792 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -681,13 +681,13 @@ void ScreenHandler::screenSetupLayout(int w, int h) numScreens = Frontend::GetScreenTransforms(screenMatrix[0], screenKind); } -QSize ScreenHandler::screenGetMinSize() +QSize ScreenHandler::screenGetMinSize(int factor = 1) { bool isHori = (Config::ScreenRotation == 1 || Config::ScreenRotation == 3); int gap = Config::ScreenGap; - int w = 256; - int h = 192; + int w = 256 * factor; + int h = 192 * factor; if (Config::ScreenLayout == 0) // natural { @@ -2272,43 +2272,8 @@ void MainWindow::onChangeSavestateSRAMReloc(bool checked) void MainWindow::onChangeScreenSize() { int factor = ((QAction*)sender())->data().toInt(); - - bool isHori = (Config::ScreenRotation == 1 || Config::ScreenRotation == 3); - int gap = Config::ScreenGap; - - int w = 256*factor; - int h = 192*factor; - QSize diff = size() - panel->size(); - - if (Config::ScreenLayout == 0) // natural - { - if (isHori) - resize(QSize(h+gap+h, w) + diff); - else - resize(QSize(w, h+gap+h) + diff); - } - else if (Config::ScreenLayout == 1) // vertical - { - if (isHori) - resize(QSize(h, w+gap+w) + diff); - else - resize(QSize(w, h+gap+h) + diff); - } - else if (Config::ScreenLayout == 2) // horizontal - { - if (isHori) - resize(QSize(h+gap+h, w) + diff); - else - resize(QSize(w+gap+w, h) + diff); - } - else // hybrid - { - if (isHori) - return resize(QSize(h+gap+h, 3*w +(4*gap) / 3) + diff); - else - return resize(QSize(3*w +(4*gap) / 3, h+gap+h) + diff); - } + resize(dynamic_cast(panel)->screenGetMinSize(factor) + diff); } void MainWindow::onChangeScreenRotation(QAction* act) diff --git a/src/frontend/qt_sdl/main.h b/src/frontend/qt_sdl/main.h index 9bfcd0a71a..0009551a7b 100644 --- a/src/frontend/qt_sdl/main.h +++ b/src/frontend/qt_sdl/main.h @@ -100,12 +100,11 @@ class ScreenHandler QTimer* setupMouseTimer(); void updateMouseTimer(); QTimer* mouseTimer; + QSize screenGetMinSize(int factor); protected: void screenSetupLayout(int w, int h); - QSize screenGetMinSize(); - void screenOnMousePress(QMouseEvent* event); void screenOnMouseRelease(QMouseEvent* event); void screenOnMouseMove(QMouseEvent* event); From f05bc50d40cded130e188d165e6b310f2a72d58f Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 11 Feb 2021 16:00:36 +0100 Subject: [PATCH 16/18] use std::function in Thread_Create so we can revert back to using it --- src/GPU3D_Soft.cpp | 9 +++------ src/GPU3D_Soft.h | 3 +-- src/Platform.h | 4 +++- src/frontend/qt_sdl/Platform.cpp | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index f6d27a0df5..de66b6b023 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -37,10 +37,8 @@ void SoftRenderer::StopRenderThread() { RenderThreadRunning = false; Platform::Semaphore_Post(Sema_RenderStart); - // Platform::Thread_Wait(RenderThread); - // Platform::Thread_Free(RenderThread); - RenderThread.join(); - + Platform::Thread_Wait(RenderThread); + Platform::Thread_Free(RenderThread); } } @@ -51,8 +49,7 @@ void SoftRenderer::SetupRenderThread() if (!RenderThreadRunning) { RenderThreadRunning = true; - //RenderThread = Platform::Thread_Create(RenderThreadFunc); - RenderThread = std::thread(&SoftRenderer::RenderThreadFunc, this); + RenderThread = Platform::Thread_Create(std::bind(&RenderThreadFunc, this)); } // otherwise more than one frame can be queued up at once diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 851b7c19b5..ee1977d34a 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -505,8 +505,7 @@ class SoftRenderer : public Renderer3D // threading bool Threaded; - // Platform::Thread* RenderThread; - std::thread RenderThread; + Platform::Thread* RenderThread; bool RenderThreadRunning; bool RenderThreadRendering; Platform::Semaphore* Sema_RenderStart; diff --git a/src/Platform.h b/src/Platform.h index 42e1e24695..9542233a9d 100644 --- a/src/Platform.h +++ b/src/Platform.h @@ -21,6 +21,8 @@ #include "types.h" +#include + namespace Platform { @@ -68,7 +70,7 @@ inline bool LocalFileExists(const char* name) } struct Thread; -Thread* Thread_Create(void (*func)()); +Thread* Thread_Create(std::function func); void Thread_Free(Thread* thread); void Thread_Wait(Thread* thread); diff --git a/src/frontend/qt_sdl/Platform.cpp b/src/frontend/qt_sdl/Platform.cpp index 7c4b553709..64013058dd 100644 --- a/src/frontend/qt_sdl/Platform.cpp +++ b/src/frontend/qt_sdl/Platform.cpp @@ -188,7 +188,7 @@ FILE* OpenLocalFile(const char* path, const char* mode) return OpenFile(fullpath.toUtf8(), mode, mode[0] != 'w'); } -Thread* Thread_Create(void (* func)()) +Thread* Thread_Create(std::function func) { QThread* t = QThread::create(func); t->start(); From f1e0816c1af934f38baa8c3095d03512b462db67 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 11 Feb 2021 18:38:52 +0100 Subject: [PATCH 17/18] detach and delete shaders directly after linking --- src/OpenGLSupport.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/OpenGLSupport.cpp b/src/OpenGLSupport.cpp index 01fa9d8635..a133c41305 100644 --- a/src/OpenGLSupport.cpp +++ b/src/OpenGLSupport.cpp @@ -87,6 +87,12 @@ bool LinkShaderProgram(GLuint* ids) glLinkProgram(ids[2]); + glDetachShader(ids[2], ids[0]); + glDetachShader(ids[2], ids[1]); + + glDeleteShader(ids[0]); + glDeleteShader(ids[1]); + glGetProgramiv(ids[2], GL_LINK_STATUS, &res); if (res != GL_TRUE) { @@ -97,8 +103,6 @@ bool LinkShaderProgram(GLuint* ids) printf("OpenGL: failed to link shader program: %s\n", log); delete[] log; - glDeleteShader(ids[0]); - glDeleteShader(ids[1]); glDeleteProgram(ids[2]); return false; @@ -109,8 +113,6 @@ bool LinkShaderProgram(GLuint* ids) void DeleteShaderProgram(GLuint* ids) { - glDeleteShader(ids[0]); - glDeleteShader(ids[1]); glDeleteProgram(ids[2]); } From 295d60e4cb217f73c28e514f1e05127d6892e0ac Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 11 Feb 2021 19:11:18 +0100 Subject: [PATCH 18/18] try to fix build when the compiler is stricter --- src/GPU3D_Soft.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index de66b6b023..22f7f01c27 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -49,7 +49,7 @@ void SoftRenderer::SetupRenderThread() if (!RenderThreadRunning) { RenderThreadRunning = true; - RenderThread = Platform::Thread_Create(std::bind(&RenderThreadFunc, this)); + RenderThread = Platform::Thread_Create(std::bind(&SoftRenderer::RenderThreadFunc, this)); } // otherwise more than one frame can be queued up at once