[BREAKING] Simplify interface to DSP::process() (#78)

* Switched to mono inputs/outputs. Removed input/output gain. Removed input buffer copy. * _num_frames => _num_input_samples to avoid variable collision with WaveNet * Removed params arg to process * Removed extra output buffer * NAM_SAMPLE instead of float for input/output buffers. Updated process call for "benchmodel" tool.
sdatkinson · Oct 6, 2023 · 8904227 · 8904227
1 parent 2dcd64e
commit 8904227
Show file tree

Hide file tree

Showing 6 changed files with 47 additions and 78 deletions.
diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
@@ -124,8 +124,7 @@ void convnet::ConvNet::_process_core_()
   this->_update_buffers_();
   // Main computation!
   const long i_start = this->_input_buffer_offset;
-  const long num_frames = this->_input_post_gain.size();
-  const long i_end = i_start + num_frames;
+  const long i_end = i_start + _num_input_samples;
   // TODO one unnecessary copy :/ #speed
   for (auto i = i_start; i < i_end; i++)
     this->_block_vals[0](0, i) = this->_input_buffer[i];
@@ -134,8 +133,8 @@ void convnet::ConvNet::_process_core_()
   // TODO clean up this allocation
   this->_head.process_(this->_block_vals[this->_blocks.size()], this->_head_output, i_start, i_end);
   // Copy to required output array (TODO tighten this up)
-  for (int s = 0; s < num_frames; s++)
-    this->_core_dsp_output[s] = this->_head_output(s);
+  for (int s = 0; s < _num_input_samples; s++)
+    this->_output_samples[s] = this->_head_output(s);
   // Apply anti-pop
   this->_anti_pop_();
 }
@@ -191,12 +190,12 @@ void convnet::ConvNet::_anti_pop_()
   if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
     return;
   const float slope = 1.0f / float(this->_anti_pop_ramp);
-  for (size_t i = 0; i < this->_core_dsp_output.size(); i++)
+  for (size_t i = 0; i < _num_input_samples; i++)
   {
     if (this->_anti_pop_countdown >= this->_anti_pop_ramp)
       break;
     const float gain = std::max(slope * float(this->_anti_pop_countdown), float(0.0));
-    this->_core_dsp_output[i] *= gain;
+    this->_output_samples[i] *= gain;
     this->_anti_pop_countdown++;
   }
 }

diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
@@ -32,15 +32,15 @@ DSP::DSP(const double loudness, const double expected_sample_rate)
 {
 }
 
-void DSP::process(NAM_SAMPLE** inputs, NAM_SAMPLE** outputs, const int num_channels, const int num_frames,
-                  const double input_gain, const double output_gain,
-                  const std::unordered_map<std::string, double>& params)
+void DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames)
 {
-  this->_get_params_(params);
-  this->_apply_input_level_(inputs, num_channels, num_frames, input_gain);
+  this->_input_samples = input;
+  this->_output_samples = output;
+  this->_num_input_samples = num_frames;
+
   this->_ensure_core_dsp_output_ready_();
   this->_process_core_();
-  this->_apply_output_level_(outputs, num_channels, num_frames, output_gain);
+  this->_apply_output_level_(output, _num_input_samples);
 }
 
 void DSP::finalize_(const int num_frames) {}
@@ -60,38 +60,23 @@ void DSP::_get_params_(const std::unordered_map<std::string, double>& input_para
   }
 }
 
-void DSP::_apply_input_level_(NAM_SAMPLE** inputs, const int num_channels, const int num_frames, const double gain)
-{
-  // Must match exactly; we're going to use the size of _input_post_gain later
-  // for num_frames.
-  if ((int)this->_input_post_gain.size() != num_frames)
-    this->_input_post_gain.resize(num_frames);
-  // MONO ONLY
-  const int channel = 0;
-  for (int i = 0; i < num_frames; i++)
-    this->_input_post_gain[i] = float(gain * inputs[channel][i]);
-}
-
 void DSP::_ensure_core_dsp_output_ready_()
 {
-  if (this->_core_dsp_output.size() < this->_input_post_gain.size())
-    this->_core_dsp_output.resize(this->_input_post_gain.size());
 }
 
 void DSP::_process_core_()
 {
   // Default implementation is the null operation
-  for (size_t i = 0; i < this->_input_post_gain.size(); i++)
-    this->_core_dsp_output[i] = this->_input_post_gain[i];
+  for (size_t i = 0; i < _num_input_samples; i++)
+    this->_output_samples[i] = _input_samples[i];
 }
 
-void DSP::_apply_output_level_(NAM_SAMPLE** outputs, const int num_channels, const int num_frames, const double gain)
+void DSP::_apply_output_level_(NAM_SAMPLE* output, const int num_frames)
 {
   const double loudnessGain = pow(10.0, -(this->mLoudness - TARGET_DSP_LOUDNESS) / 20.0);
-  const double finalGain = this->mNormalizeOutputLoudness ? gain * loudnessGain : gain;
-  for (int c = 0; c < num_channels; c++)
-    for (int s = 0; s < num_frames; s++)
-      outputs[c][s] = (NAM_SAMPLE)(finalGain * this->_core_dsp_output[s]);
+  const double finalGain = this->mNormalizeOutputLoudness ? loudnessGain : 1.0;
+  for (int s = 0; s < num_frames; s++)
+    output[s] = (NAM_SAMPLE)(finalGain * _output_samples[s]);
 }
 
 // Buffer =====================================================================
@@ -122,11 +107,11 @@ void Buffer::_set_receptive_field(const int new_receptive_field, const int input
 
 void Buffer::_update_buffers_()
 {
-  const long int num_frames = this->_input_post_gain.size();
   // Make sure that the buffer is big enough for the receptive field and the
   // frames needed!
   {
-    const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames;
+    const long minimum_input_buffer_size =
+      (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * _num_input_samples;
     if ((long)this->_input_buffer.size() < minimum_input_buffer_size)
     {
       long new_buffer_size = 2;
@@ -139,13 +124,13 @@ void Buffer::_update_buffers_()
 
   // If we'd run off the end of the input buffer, then we need to move the data
   // back to the start of the buffer and start again.
-  if (this->_input_buffer_offset + num_frames > (long)this->_input_buffer.size())
+  if (this->_input_buffer_offset + _num_input_samples > (long)this->_input_buffer.size())
     this->_rewind_buffers_();
   // Put the new samples into the input buffer
-  for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++)
-    this->_input_buffer[i] = this->_input_post_gain[j];
+  for (long i = this->_input_buffer_offset, j = 0; j < _num_input_samples; i++, j++)
+    this->_input_buffer[i] = _input_samples[j];
   // And resize the output buffer:
-  this->_output_buffer.resize(num_frames);
+  this->_output_buffer.resize(_num_input_samples);
   std::fill (this->_output_buffer.begin(), this->_output_buffer.end(), 0.0f);
 }
 
@@ -203,11 +188,11 @@ void Linear::_process_core_()
   this->Buffer::_update_buffers_();
 
   // Main computation!
-  for (size_t i = 0; i < this->_input_post_gain.size(); i++)
+  for (size_t i = 0; i < _num_input_samples; i++)
   {
     const size_t offset = this->_input_buffer_offset - this->_weight.size() + i + 1;
     auto input = Eigen::Map<const Eigen::VectorXf>(&this->_input_buffer[offset], this->_receptive_field);
-    this->_core_dsp_output[i] = this->_bias + this->_weight.dot(input);
+    this->_output_samples[i] = this->_bias + this->_weight.dot(input);
   }
 }
 

diff --git a/NAM/dsp.h b/NAM/dsp.h
@@ -48,18 +48,13 @@ class DSP
   DSP(const double expected_sample_rate = -1.0);
   DSP(const double loudness, const double expected_sample_rate = -1.0);
   virtual ~DSP() = default;
-  // process() does all of the processing requried to take `inputs` array and
-  // fill in the required values on `outputs`.
+  // process() does all of the processing requried to take `input` array and
+  // fill in the required values on `output`.
   // To do this:
-  // 1. The parameters from the plugin (I/O levels and any other parametric
-  //    inputs) are gotten.
-  // 2. The input level is applied
-  // 3. The core DSP algorithm is run (This is what should probably be
+  // 1. The core DSP algorithm is run (This is what should probably be
   //    overridden in subclasses).
-  // 4. The output level is applied and the result stored to `output`.
-  virtual void process(NAM_SAMPLE** inputs, NAM_SAMPLE** outputs, const int num_channels, const int num_frames,
-                       const double input_gain, const double output_gain,
-                       const std::unordered_map<std::string, double>& params);
+  // 2. The output level is applied and the result stored to `output`.
+  virtual void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames);
   // Anything to take care of before next buffer comes in.
   // For example:
   // * Move the buffer index forward
@@ -82,10 +77,12 @@ class DSP
   std::unordered_map<std::string, double> _params;
   // If the params have changed since the last buffer was processed:
   bool _stale_params;
-  // Where to store the samples after applying input gain
-  std::vector<float> _input_post_gain;
-  // Location for the output of the core DSP algorithm.
-  std::vector<float> _core_dsp_output;
+  // Input sample buffer
+  NAM_SAMPLE* _input_samples;
+  // Output sample buffer
+  NAM_SAMPLE* _output_samples;
+  // Number of samples in the input buffer
+  int _num_input_samples;
 
   // Methods
 
@@ -94,10 +91,6 @@ class DSP
   // (TODO use "listener" approach)
   void _get_params_(const std::unordered_map<std::string, double>& input_params);
 
-  // Apply the input gain
-  // Result populates this->_input_post_gain
-  void _apply_input_level_(NAM_SAMPLE** inputs, const int num_channels, const int num_frames, const double gain);
-
   // i.e. ensure the size is correct.
   void _ensure_core_dsp_output_ready_();
 
@@ -107,7 +100,7 @@ class DSP
   virtual void _process_core_();
 
   // Copy this->_core_dsp_output to output and apply the output volume
-  void _apply_output_level_(NAM_SAMPLE** outputs, const int num_channels, const int num_frames, const double gain);
+  void _apply_output_level_(NAM_SAMPLE* output, const int num_frames);
 };
 
 // Class where an input buffer is kept so that long-time effects can be

diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
@@ -111,8 +111,8 @@ void lstm::LSTM::_process_core_()
     this->_stale_params = false;
   }
   // Process samples, placing results in the required output location
-  for (size_t i = 0; i < this->_input_post_gain.size(); i++)
-    this->_core_dsp_output[i] = this->_process_sample(this->_input_post_gain[i]);
+  for (size_t i = 0; i < _num_input_samples; i++)
+    this->_output_samples[i] = this->_process_sample(_input_samples[i]);
 }
 
 float lstm::LSTM::_process_sample(const float x)

diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
@@ -277,12 +277,10 @@ wavenet::WaveNet::WaveNet(const double loudness, const std::vector<wavenet::Laye
   NAM_SAMPLE sample = 0;
   NAM_SAMPLE* sample_ptr = &sample;
 
-  std::unordered_map<std::string, double> param_dict = {};
-
   // pre-warm the model over the size of the receptive field
   for (long i = 0; i < receptive_field; i++)
   {
-    this->process(&sample_ptr, &sample_ptr, 1, 1, 1.0, 1.0, param_dict);
+    this->process(sample_ptr, sample_ptr, 1);
     this->finalize_(1);
     sample = 0;
   }
@@ -337,15 +335,14 @@ void wavenet::WaveNet::_prepare_for_frames_(const long num_frames)
 
 void wavenet::WaveNet::_process_core_()
 {
-  const long num_frames = this->_input_post_gain.size();
-  this->_set_num_frames_(num_frames);
-  this->_prepare_for_frames_(num_frames);
+  this->_set_num_frames_(_num_input_samples);
+  this->_prepare_for_frames_(_num_input_samples);
 
   // Fill into condition array:
   // Clumsy...
-  for (int j = 0; j < num_frames; j++)
+  for (int j = 0; j < _num_input_samples; j++)
   {
-    this->_condition(0, j) = this->_input_post_gain[j];
+    this->_condition(0, j) = _input_samples[j];
     if (this->_stale_params) // Column-major assignment; good for Eigen. Let the
                              // compiler optimize this.
       for (size_t i = 0; i < this->_param_names.size(); i++)
@@ -369,10 +366,10 @@ void wavenet::WaveNet::_process_core_()
 
   const long final_head_array = this->_head_arrays.size() - 1;
   assert(this->_head_arrays[final_head_array].rows() == 1);
-  for (int s = 0; s < num_frames; s++)
+  for (int s = 0; s < _num_input_samples; s++)
   {
     float out = this->_head_scale * this->_head_arrays[final_head_array](0, s);
-    this->_core_dsp_output[s] = out;
+    this->_output_samples[s] = out;
   }
 }
 

diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp
@@ -12,14 +12,9 @@ using std::chrono::milliseconds;
 #define AUDIO_BUFFER_SIZE 64
 
 double buffer[AUDIO_BUFFER_SIZE];
-double* buffers[1];
 
 int main(int argc, char* argv[])
 {
-  std::unordered_map<std::string, double> mNAMParams = {};
-
-  buffers[0] = buffer;
-
   if (argc > 1)
   {
     const char* modelPath = argv[1];
@@ -50,7 +45,7 @@ int main(int argc, char* argv[])
 
     for (size_t i = 0; i < numBuffers; i++)
     {
-      model->process(buffers, buffers, 1, AUDIO_BUFFER_SIZE, 1.0, 1.0, mNAMParams);
+      model->process(buffer, buffer, AUDIO_BUFFER_SIZE);
       model->finalize_(AUDIO_BUFFER_SIZE);
     }