Skip to content

Commit

Permalink
Run PSG on a separate thread
Browse files Browse the repository at this point in the history
This unblocks the z80, causing fewer frame drops
  • Loading branch information
hpvb committed Aug 7, 2022
1 parent 910dbbb commit 1d55f09
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 37 deletions.
4 changes: 3 additions & 1 deletion main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ idf_component_register(
)

target_compile_options(${COMPONENT_LIB} PRIVATE
-Ofast
-O3
-Wall
-Wextra
-fjump-tables
-ftree-switch-conversion
-ftree-loop-im
Expand Down
61 changes: 39 additions & 22 deletions main/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ SOFTWARE.
extern const uint8_t rom_start[] asm("_binary_rom_sms_start");
extern const uint8_t rom_end[] asm("_binary_rom_sms_end");

static bool current_backbuffer = 0;
static bool current_video_backbuffer = 0;
videobuffer_t* backbuffer[2];

#define OVERSCAN_BUFFER_SIZE 1024
Expand All @@ -57,6 +57,7 @@ extern uint64_t cpal_updates;

static xQueueHandle button_queue;
static QueueHandle_t video_queue;
static SemaphoreHandle_t video_mutex;

ILI9341 *ili9341 = NULL;
ICE40 *ice40 = NULL;
Expand Down Expand Up @@ -85,9 +86,8 @@ __attribute__((always_inline)) inline uint32_t core_colour_callback(void *user,
return __builtin_bswap16((((r & 0xF8) << 8) + ((g & 0xFC) << 3) + ((b & 0xF8) >> 3)));
}

volatile bool currently_drawing;
__attribute__((always_inline)) inline static void write_frame(bool frame) {
currently_drawing = true;
xSemaphoreTake(video_mutex, portMAX_DELAY);

uint64_t start = esp_timer_get_time();
uint64_t end;
Expand All @@ -96,7 +96,7 @@ __attribute__((always_inline)) inline static void write_frame(bool frame) {
ice40_lcd_send_turbo(ice40, ((uint8_t*)backbuffer[frame]->parts[i]) - 1, backbuffer[frame]->part_size + 1);
}

currently_drawing = false;
xSemaphoreGive(video_mutex);
++frames;

end = esp_timer_get_time();
Expand All @@ -105,17 +105,15 @@ __attribute__((always_inline)) inline static void write_frame(bool frame) {
}

__attribute__((always_inline)) inline void core_vblank_callback(void *user) {
while (currently_drawing) { }

if (xQueueSend(video_queue, &current_backbuffer, 5) == errQUEUE_FULL) {
if (xQueueSend(video_queue, &current_video_backbuffer, portMAX_DELAY) == errQUEUE_FULL) {
++dropped_frames;
}

current_backbuffer = !current_backbuffer;
sms.pixels = backbuffer[current_backbuffer];
current_video_backbuffer = !current_video_backbuffer;
sms.pixels = backbuffer[current_video_backbuffer];
}

void videoTask(void *arg) {
void video_task(void *arg) {
bool param;
while (1) {
xQueuePeek(video_queue, &param, portMAX_DELAY);
Expand Down Expand Up @@ -175,7 +173,7 @@ static void handle_input() {
} while (queueResult == pdTRUE);
}

#define AUDIO_FREQ 11200
#define AUDIO_FREQ 22000
#define AUDIO_BLOCK_SIZE 256
static uint16_t* audio_buffer;
static uint32_t audio_idx = 0;
Expand All @@ -187,7 +185,7 @@ void audio_init() {
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
.communication_format = I2S_COMM_FORMAT_STAND_MSB,
.dma_buf_count = 8,
.dma_buf_count = 4,
.dma_buf_len = AUDIO_BLOCK_SIZE / 4,
.intr_alloc_flags = 0,
.use_apll = true,
Expand All @@ -207,23 +205,23 @@ void audio_init() {
i2s_set_pin(0, &pin_config);
i2s_set_sample_rates(0, AUDIO_FREQ);

audio_buffer = malloc(AUDIO_BLOCK_SIZE * sizeof(uint16_t));
audio_buffer = heap_caps_malloc(AUDIO_BLOCK_SIZE * sizeof(uint16_t), MALLOC_CAP_SPIRAM);
if (!audio_buffer) {
ESP_LOGE(TAG, "Failed to allocate audio buffer");
}
ESP_LOGI(TAG, "Audio initialized!");
}

__attribute__((always_inline)) inline void core_apu_callback(void* user, struct SMS_ApuCallbackData* data) {
size_t count;
uint16_t sample = (data->tone0 + data->tone1 + data->tone2 + data->noise) * 128;
audio_buffer[audio_idx++] = sample;
audio_buffer[audio_idx++] = sample;

if (audio_idx >= AUDIO_BLOCK_SIZE) {
size_t count;

i2s_write(0, audio_buffer, AUDIO_BLOCK_SIZE * 2, &count, portMAX_DELAY);
audio_idx = 0;
}

return;
}

static uint8_t leds[5][3];
Expand Down Expand Up @@ -267,15 +265,19 @@ void init_screen_rect() {

#define MIN(x, y) (((x) < (y)) ? (x) : (y))
__attribute__((always_inline)) inline void write_screen(uint8_t* buffer, size_t size) {
for(int i = 0; i < (size / OVERSCAN_BUFFER_SIZE) + 1; ++i) {
size_t length = MIN(OVERSCAN_BUFFER_SIZE, size - (i * OVERSCAN_BUFFER_SIZE));
if (length) ice40_lcd_send_turbo(ice40, buffer, length + 1);
while(1) {
if (!size) break;

size_t length = MIN(OVERSCAN_BUFFER_SIZE, size);
ice40_lcd_send_turbo(ice40, buffer, length + 1);
size -= length;
}
}

void set_overscan_border(uint16_t color) {
// Don't try to interleave this with regular frame data, bad things will happen
while (currently_drawing) {}

xSemaphoreTake(video_mutex, portMAX_DELAY);

overscan_buffer[0] = 0xf3;
for (uint32_t i = 1; i < OVERSCAN_BUFFER_SIZE + 1; i += 2) {
Expand Down Expand Up @@ -318,6 +320,7 @@ void set_overscan_border(uint16_t color) {
write_screen(overscan_buffer, rightside);

init_screen_rect();
xSemaphoreGive(video_mutex);
}

static void available_ram(const char *context) {
Expand All @@ -329,6 +332,8 @@ static void available_ram(const char *context) {
}

void main_loop() {
TickType_t xLastWakeTime = xTaskGetTickCount ();

uint64_t start = esp_timer_get_time();
uint64_t end;

Expand Down Expand Up @@ -363,6 +368,7 @@ void main_loop() {
}

psg_sync();
vTaskDelayUntil(&xLastWakeTime, 2);

current_overscan_color = sms.vdp.colour[16 + (sms.vdp.registers[0x7] & 0xF)];
if (overscan_color != current_overscan_color) {
Expand Down Expand Up @@ -412,6 +418,14 @@ void main_loop() {
}
}

void ping_task() {
TickType_t xLastWakeTime = xTaskGetTickCount ();
while(1) {
xTaskDelayUntil(&xLastWakeTime, 120);
ESP_LOGI(TAG, "Alive and well");
}
}

void app_main() {
ESP_LOGI(TAG, "\n"
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
Expand Down Expand Up @@ -479,9 +493,12 @@ void app_main() {

ESP_LOGI(TAG, "Starting video thread");
video_queue = xQueueCreate(1, sizeof(uint16_t *));
xTaskCreatePinnedToCore(&videoTask, "video_task", 2048, NULL, 5, NULL, 0);
video_mutex = xSemaphoreCreateMutex();
xTaskCreatePinnedToCore(&video_task, "video_task", 2048, NULL, 5, NULL, 0);
available_ram("video_task");

//xTaskCreatePinnedToCore(&ping_task, "ping_task", 2048, NULL, 5, NULL, 0);

SMS_set_colour_callback(core_colour_callback);
SMS_set_vblank_callback(core_vblank_callback);
SMS_set_apu_callback(core_apu_callback, AUDIO_FREQ);
Expand Down
47 changes: 36 additions & 11 deletions main/totalsms/psg.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
#include "internal.h"
#include <stdint.h>
#include <string.h>
#include <stdatomic.h>

#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"

extern struct SMS_Core sms;

Expand All @@ -20,6 +25,8 @@ void psg_run(const uint8_t cycles) {}

#else

static TaskHandle_t psg_task_handle = NULL;

enum
{
LATCH_TYPE_TONE = 0,
Expand Down Expand Up @@ -97,6 +104,8 @@ void FORCE_INLINE psg_reg_write(const uint8_t value)
psg_sync();

// if MSB is set, then this is a latched write, else its a normal data write

// TMM: This is a little racey but it seems to work itself out in payroll
if (value & 0x80)
{
latch_reg_write(value);
Expand Down Expand Up @@ -195,22 +204,15 @@ static FORCE_INLINE uint8_t sample_channel(const uint8_t index)
return PSG.polarity[index] * (0xF - PSG.volume[index]);
}

// this is called on psg_reg_write() and at the end of a frame
void psg_sync()
void _psg_sync()
{
// psg regs cannot be read, so no point ticking stuff
// if we don't have callback for samples to be pushed
if (!sms.apu_callback)
{
return;
}

// psg is 16x slower than the cpu, so, it only makes sense to tick
// each component at every 16 step.
enum { STEP = 16 };

uint32_t cycles = atomic_load(&PSG.cycles);
// this loop will *not* cause PSG.cycles to underflow!
for (; STEP <= PSG.cycles; PSG.cycles -= STEP)
for (; STEP <= cycles; cycles -= STEP)
{
tick_tone(0, STEP);
tick_tone(1, STEP);
Expand Down Expand Up @@ -239,11 +241,28 @@ void psg_sync()
core_apu_callback(sms.userdata, &data);
}
}

atomic_fetch_sub_explicit(&PSG.cycles, cycles, memory_order_relaxed);
}

// this is called on psg_reg_write() and at the end of a frame
void psg_sync()
{
xTaskNotifyGive(psg_task_handle);
}

void psg_task()
{
while(1) {
ulTaskNotifyTake(true, portMAX_DELAY);
_psg_sync();
}
}

void psg_run(const uint8_t cycles)
{
PSG.cycles += cycles; // PSG.cycles is an uint32_t, so it won't overflow
atomic_fetch_add_explicit(&PSG.cycles, cycles, memory_order_relaxed);
// PSG.cycles += cycles; // PSG.cycles is an uint32_t, so it won't overflow
}

#endif // SMS_DISBALE_AUDIO
Expand All @@ -263,4 +282,10 @@ void psg_init()
PSG.noise.flip_flop = true;

PSG.latched_channel = 0;

if (!psg_task_handle)
{
ESP_LOGI(TAG, "Starting PSG thread");
xTaskCreatePinnedToCore(&psg_task, "psg_task", 1024, NULL, 5, &psg_task_handle, 0);
}
}
3 changes: 2 additions & 1 deletion main/totalsms/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ extern "C" {
#include <stdint.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdatomic.h>

#include "videobuffer.h"
#include "esp_attr.h"
Expand Down Expand Up @@ -351,7 +352,7 @@ struct SMS_ApuCallbackData

struct SMS_Psg
{
uint32_t cycles; // elapsed cycles since last psg_sync()
atomic_uint_fast32_t cycles; // elapsed cycles since last psg_sync()

struct
{
Expand Down
4 changes: 2 additions & 2 deletions sdkconfig
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ CONFIG_FREERTOS_TICK_SUPPORT_CORETIMER=y
CONFIG_FREERTOS_CORETIMER_0=y
# CONFIG_FREERTOS_CORETIMER_1 is not set
CONFIG_FREERTOS_SYSTICK_USES_CCOUNT=y
CONFIG_FREERTOS_HZ=100
CONFIG_FREERTOS_HZ=120
CONFIG_FREERTOS_ASSERT_ON_UNTESTED_FUNCTION=y
# CONFIG_FREERTOS_CHECK_STACKOVERFLOW_NONE is not set
# CONFIG_FREERTOS_CHECK_STACKOVERFLOW_PTRVAL is not set
Expand Down Expand Up @@ -747,7 +747,7 @@ CONFIG_HEAP_POISONING_DISABLED=y
CONFIG_HEAP_TRACING_OFF=y
# CONFIG_HEAP_TRACING_STANDALONE is not set
# CONFIG_HEAP_TRACING_TOHOST is not set
# CONFIG_HEAP_ABORT_WHEN_ALLOCATION_FAILS is not set
CONFIG_HEAP_ABORT_WHEN_ALLOCATION_FAILS=y
# end of Heap memory debugging

#
Expand Down

0 comments on commit 1d55f09

Please sign in to comment.