Skip to content

Commit

Permalink
SPU work on single values instead of arrays
Browse files Browse the repository at this point in the history
it runs one sample at a time anyway and should allow the compiler to do more optimisations
  • Loading branch information
RSDuck committed Jul 10, 2020
1 parent 01e8e36 commit 674a169
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 132 deletions.
193 changes: 74 additions & 119 deletions src/SPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ const s16 PSGTable[8][8] =
{-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF}
};

const u32 kSamplesPerRun = 1;

const u32 OutputBufferSize = 2*1024;
s16 OutputBuffer[2 * OutputBufferSize];
Expand Down Expand Up @@ -109,7 +108,7 @@ void Reset()
Capture[0].Reset();
Capture[1].Reset();

NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun);
NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0);
}

void Stop()
Expand Down Expand Up @@ -403,49 +402,42 @@ void Channel::NextSample_Noise()
}

template<u32 type>
void Channel::Run(s32* buf, u32 samples)
s32 Channel::Run()
{
if (!(Cnt & (1<<31))) return;
if (!(Cnt & (1<<31))) return 0;

for (u32 s = 0; s < samples; s++)
Timer += 512; // 1 sample = 512 cycles at 16MHz

while (Timer >> 16)
{
Timer += 512; // 1 sample = 512 cycles at 16MHz
Timer = TimerReload + (Timer - 0x10000);

while (Timer >> 16)
switch (type)
{
Timer = TimerReload + (Timer - 0x10000);

switch (type)
{
case 0: NextSample_PCM8(); break;
case 1: NextSample_PCM16(); break;
case 2: NextSample_ADPCM(); break;
case 3: NextSample_PSG(); break;
case 4: NextSample_Noise(); break;
}
case 0: NextSample_PCM8(); break;
case 1: NextSample_PCM16(); break;
case 2: NextSample_ADPCM(); break;
case 3: NextSample_PSG(); break;
case 4: NextSample_Noise(); break;
}
}

s32 val = (s32)CurSample;
val <<= VolumeShift;
val *= Volume;
buf[s] = val;
s32 val = (s32)CurSample;
val <<= VolumeShift;
val *= Volume;

if (!(Cnt & (1<<31))) break;
}
return val;
}

void Channel::PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf)
void Channel::PanOutput(s32 inbuf, s32& leftbuf, s32& rightbuf)
{
for (u32 s = 0; s < samples; s++)
{
s32 val = (s32)inbuf[s];
s32 val = (s32)inbuf;

s32 l = ((s64)val * (128-Pan)) >> 10;
s32 r = ((s64)val * Pan) >> 10;
s32 l = ((s64)val * (128-Pan)) >> 10;
s32 r = ((s64)val * Pan) >> 10;

leftbuf[s] += l;
rightbuf[s] += r;
}
leftbuf += l;
rightbuf += r;
}


Expand Down Expand Up @@ -576,172 +568,135 @@ void CaptureUnit::Run(s32 sample)
}


void Mix(u32 samples)
void Mix(u32 dummy)
{
PROFILER_SECTION(mixSPU)

s32 channelbuf[32];
s32 leftbuf[32], rightbuf[32];
s32 ch0buf[32], ch1buf[32], ch2buf[32], ch3buf[32];
s32 leftoutput[32], rightoutput[32];

for (u32 s = 0; s < samples; s++)
{
leftbuf[s] = 0; rightbuf[s] = 0;
leftoutput[s] = 0; rightoutput[s] = 0;
}
s32 leftoutput = 0, rightoutput = 0;

if (Cnt & (1<<15))
{
Channels[0].DoRun(ch0buf, samples);
Channels[1].DoRun(ch1buf, samples);
Channels[2].DoRun(ch2buf, samples);
Channels[3].DoRun(ch3buf, samples);
s32 leftbuf = 0, rightbuf = 0;

s32 ch0buf = Channels[0].DoRun();
s32 ch1buf = Channels[1].DoRun();
s32 ch2buf = Channels[2].DoRun();
s32 ch3buf = Channels[3].DoRun();

// TODO: addition from capture registers
Channels[0].PanOutput(ch0buf, samples, leftbuf, rightbuf);
Channels[2].PanOutput(ch2buf, samples, leftbuf, rightbuf);
Channels[0].PanOutput(ch0buf, leftbuf, rightbuf);
Channels[2].PanOutput(ch2buf, leftbuf, rightbuf);

if (!(Cnt & (1<<12))) Channels[1].PanOutput(ch1buf, samples, leftbuf, rightbuf);
if (!(Cnt & (1<<13))) Channels[3].PanOutput(ch3buf, samples, leftbuf, rightbuf);
if (!(Cnt & (1<<12))) Channels[1].PanOutput(ch1buf, leftbuf, rightbuf);
if (!(Cnt & (1<<13))) Channels[3].PanOutput(ch3buf, leftbuf, rightbuf);

for (int i = 4; i < 16; i++)
{
Channel* chan = &Channels[i];

chan->DoRun(channelbuf, samples);
chan->PanOutput(channelbuf, samples, leftbuf, rightbuf);
s32 val = chan->DoRun();
chan->PanOutput(val, leftbuf, rightbuf);
}

// sound capture
// TODO: other sound capture sources, along with their bugs

if (Capture[0].Cnt & (1<<7))
{
for (u32 s = 0; s < samples; s++)
{
s32 val = leftbuf[s];
leftbuf >>= 8;
if (leftbuf < -0x8000) leftbuf = -0x8000;
else if (leftbuf > 0x7FFF) leftbuf = 0x7FFF;

val >>= 8;
if (val < -0x8000) val = -0x8000;
else if (val > 0x7FFF) val = 0x7FFF;

Capture[0].Run(val);
if (!(Capture[0].Cnt & (1<<7))) break;
}
Capture[0].Run(leftbuf);
}

if (Capture[1].Cnt & (1<<7))
{
for (u32 s = 0; s < samples; s++)
{
s32 val = rightbuf[s];

val >>= 8;
if (val < -0x8000) val = -0x8000;
else if (val > 0x7FFF) val = 0x7FFF;
rightbuf >>= 8;
if (rightbuf < -0x8000) rightbuf = -0x8000;
else if (rightbuf > 0x7FFF) rightbuf = 0x7FFF;

Capture[1].Run(val);
if (!(Capture[1].Cnt & (1<<7))) break;
}
Capture[1].Run(rightbuf);
}

// final output

switch (Cnt & 0x0300)
{
case 0x0000: // left mixer
{
for (u32 s = 0; s < samples; s++)
leftoutput[s] = leftbuf[s];
}
leftoutput = leftbuf;
break;
case 0x0100: // channel 1
{
s32 pan = 128 - Channels[1].Pan;
for (u32 s = 0; s < samples; s++)
leftoutput[s] = ((s64)ch1buf[s] * pan) >> 10;
leftoutput = ((s64)ch1buf * pan) >> 10;
}
break;
case 0x0200: // channel 3
{
s32 pan = 128 - Channels[3].Pan;
for (u32 s = 0; s < samples; s++)
leftoutput[s] = ((s64)ch3buf[s] * pan) >> 10;
leftoutput = ((s64)ch3buf * pan) >> 10;
}
break;
case 0x0300: // channel 1+3
{
s32 pan1 = 128 - Channels[1].Pan;
s32 pan3 = 128 - Channels[3].Pan;
for (u32 s = 0; s < samples; s++)
leftoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10);
leftoutput = (((s64)ch1buf * pan1) >> 10) + (((s64)ch3buf * pan3) >> 10);
}
break;
}

switch (Cnt & 0x0C00)
{
case 0x0000: // right mixer
{
for (u32 s = 0; s < samples; s++)
rightoutput[s] = rightbuf[s];
}
rightoutput = rightbuf;
break;
case 0x0400: // channel 1
{
s32 pan = Channels[1].Pan;
for (u32 s = 0; s < samples; s++)
rightoutput[s] = ((s64)ch1buf[s] * pan) >> 10;
rightoutput = ((s64)ch1buf * pan) >> 10;
}
break;
case 0x0800: // channel 3
{
s32 pan = Channels[3].Pan;
for (u32 s = 0; s < samples; s++)
rightoutput[s] = ((s64)ch3buf[s] * pan) >> 10;
rightoutput = ((s64)ch3buf * pan) >> 10;
}
break;
case 0x0C00: // channel 1+3
{
s32 pan1 = Channels[1].Pan;
s32 pan3 = Channels[3].Pan;
for (u32 s = 0; s < samples; s++)
rightoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10);
rightoutput = (((s64)ch1buf * pan1) >> 10) + (((s64)ch3buf * pan3) >> 10);
}
break;
}
}

for (u32 s = 0; s < samples; s++)
leftoutput = ((s64)leftoutput * MasterVolume) >> 7;
rightoutput = ((s64)rightoutput * MasterVolume) >> 7;

leftoutput >>= 8;
if (leftoutput < -0x8000) leftoutput = -0x8000;
else if (leftoutput > 0x7FFF) leftoutput = 0x7FFF;
rightoutput >>= 8;
if (rightoutput < -0x8000) rightoutput = -0x8000;
else if (rightoutput > 0x7FFF) rightoutput = 0x7FFF;

OutputBuffer[OutputWriteOffset ] = leftoutput >> 1;
OutputBuffer[OutputWriteOffset + 1] = rightoutput >> 1;
OutputWriteOffset += 2;
OutputWriteOffset &= ((2*OutputBufferSize)-1);
if (OutputWriteOffset == OutputReadOffset)
{
s32 l = leftoutput[s];
s32 r = rightoutput[s];

l = ((s64)l * MasterVolume) >> 7;
r = ((s64)r * MasterVolume) >> 7;

l >>= 8;
if (l < -0x8000) l = -0x8000;
else if (l > 0x7FFF) l = 0x7FFF;
r >>= 8;
if (r < -0x8000) r = -0x8000;
else if (r > 0x7FFF) r = 0x7FFF;

OutputBuffer[OutputWriteOffset ] = l >> 1;
OutputBuffer[OutputWriteOffset + 1] = r >> 1;
OutputWriteOffset += 2;
OutputWriteOffset &= ((2*OutputBufferSize)-1);
if (OutputWriteOffset == OutputReadOffset)
{
//printf("!! SOUND FIFO OVERFLOW %d\n", OutputWriteOffset>>1);
// advance the read position too, to avoid losing the entire FIFO
OutputReadOffset += 2;
OutputReadOffset &= ((2*OutputBufferSize)-1);
}
//printf("!! SOUND FIFO OVERFLOW %d\n", OutputWriteOffset>>1);
// advance the read position too, to avoid losing the entire FIFO
OutputReadOffset += 2;
OutputReadOffset &= ((2*OutputBufferSize)-1);
}

NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*kSamplesPerRun, Mix, kSamplesPerRun);
NDS::ScheduleEvent(NDS::Event_SPU, true, 1024, Mix, 0);

PROFILER_END_SECTION
}
Expand Down
23 changes: 10 additions & 13 deletions src/SPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void DoSavestate(Savestate* file);

void SetBias(u16 bias);

void Mix(u32 samples);
void Mix(u32 dummy);

void TrimOutput();
void DrainOutput();
Expand Down Expand Up @@ -122,26 +122,23 @@ class Channel
void NextSample_PSG();
void NextSample_Noise();

template<u32 type> void Run(s32* buf, u32 samples);
template<u32 type> s32 Run();

void DoRun(s32* buf, u32 samples)
s32 DoRun()
{
for (u32 s = 0; s < samples; s++)
buf[s] = 0;

switch ((Cnt >> 29) & 0x3)
{
case 0: Run<0>(buf, samples); break;
case 1: Run<1>(buf, samples); break;
case 2: Run<2>(buf, samples); break;
case 0: return Run<0>();
case 1: return Run<1>();
case 2: return Run<2>();
case 3:
if (Num >= 14) Run<4>(buf, samples);
else if (Num >= 8) Run<3>(buf, samples);
break;
if (Num >= 14) return Run<4>();
else if (Num >= 8) return Run<3>();
else return 0;
}
}

void PanOutput(s32* inbuf, u32 samples, s32* leftbuf, s32* rightbuf);
void PanOutput(s32 inbuf, s32& leftbuf, s32& rightbuf);
};

class CaptureUnit
Expand Down

0 comments on commit 674a169

Please sign in to comment.