diff --git a/NAM/activations.cpp b/NAM/activations.cpp index 92f1d8d..d15e888 100644 --- a/NAM/activations.cpp +++ b/NAM/activations.cpp @@ -6,14 +6,14 @@ nam::activations::ActivationHardTanh _HARD_TANH = nam::activations::ActivationHa nam::activations::ActivationReLU _RELU = nam::activations::ActivationReLU(); nam::activations::ActivationSigmoid _SIGMOID = nam::activations::ActivationSigmoid(); -bool nam::activations::Activation::using_fast_tanh = false; +bool nam::activations::Activation::sUsingFastTanh = false; std::unordered_map nam::activations::Activation::_activations = {{"Tanh", &_TANH}, {"Hardtanh", &_HARD_TANH}, {"Fasttanh", &_FAST_TANH}, {"ReLU", &_RELU}, {"Sigmoid", &_SIGMOID}}; nam::activations::Activation* tanh_bak = nullptr; -nam::activations::Activation* nam::activations::Activation::get_activation(const std::string name) +nam::activations::Activation* nam::activations::Activation::GetActivation(const std::string& name) { if (_activations.find(name) == _activations.end()) return nullptr; @@ -21,9 +21,9 @@ nam::activations::Activation* nam::activations::Activation::get_activation(const return _activations[name]; } -void nam::activations::Activation::enable_fast_tanh() +void nam::activations::Activation::EnableFastTanh() { - nam::activations::Activation::using_fast_tanh = true; + nam::activations::Activation::sUsingFastTanh = true; if (_activations["Tanh"] != _activations["Fasttanh"]) { @@ -32,9 +32,9 @@ void nam::activations::Activation::enable_fast_tanh() } } -void nam::activations::Activation::disable_fast_tanh() +void nam::activations::Activation::DisableFastTanh() { - nam::activations::Activation::using_fast_tanh = false; + nam::activations::Activation::sUsingFastTanh = false; if (_activations["Tanh"] == _activations["Fasttanh"]) { diff --git a/NAM/activations.h b/NAM/activations.h index e9afc33..313288d 100644 --- a/NAM/activations.h +++ b/NAM/activations.h @@ -45,18 +45,18 @@ class Activation public: Activation() = default; virtual ~Activation() = default; - virtual void apply(Eigen::MatrixXf& matrix) { apply(matrix.data(), matrix.rows() * matrix.cols()); } - virtual void apply(Eigen::Block block) { apply(block.data(), block.rows() * block.cols()); } - virtual void apply(Eigen::Block block) + virtual void Apply(Eigen::Ref matrix) { Apply(matrix.data(), matrix.rows() * matrix.cols()); } + virtual void Apply(Eigen::Block block) { Apply(block.data(), block.rows() * block.cols()); } + virtual void Apply(Eigen::Block block) { - apply(block.data(), block.rows() * block.cols()); + Apply(block.data(), block.rows() * block.cols()); } - virtual void apply(float* data, long size) {} + virtual void Apply(float* data, long size) {} - static Activation* get_activation(const std::string name); - static void enable_fast_tanh(); - static void disable_fast_tanh(); - static bool using_fast_tanh; + static Activation* GetActivation(const std::string& name); + static void EnableFastTanh(); + static void DisableFastTanh(); + static bool sUsingFastTanh; protected: static std::unordered_map _activations; @@ -65,7 +65,7 @@ class Activation class ActivationTanh : public Activation { public: - void apply(float* data, long size) override + void Apply(float* data, long size) override { for (long pos = 0; pos < size; pos++) { @@ -77,7 +77,7 @@ class ActivationTanh : public Activation class ActivationHardTanh : public Activation { public: - void apply(float* data, long size) override + void Apply(float* data, long size) override { for (long pos = 0; pos < size; pos++) { @@ -89,7 +89,7 @@ class ActivationHardTanh : public Activation class ActivationFastTanh : public Activation { public: - void apply(float* data, long size) override + void Apply(float* data, long size) override { for (long pos = 0; pos < size; pos++) { @@ -101,7 +101,7 @@ class ActivationFastTanh : public Activation class ActivationReLU : public Activation { public: - void apply(float* data, long size) override + void Apply(float* data, long size) override { for (long pos = 0; pos < size; pos++) { @@ -113,7 +113,7 @@ class ActivationReLU : public Activation class ActivationSigmoid : public Activation { public: - void apply(float* data, long size) override + void Apply(float* data, long size) override { for (long pos = 0; pos < size; pos++) { diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp index 1d4e3ba..b4d3155 100644 --- a/NAM/convnet.cpp +++ b/NAM/convnet.cpp @@ -10,174 +10,174 @@ #include "dsp.h" #include "convnet.h" -nam::convnet::BatchNorm::BatchNorm(const int dim, std::vector::iterator& weights) +nam::convnet::BatchNorm::BatchNorm(const int dim, weightsIterator& weights) { // Extract from param buffer Eigen::VectorXf running_mean(dim); Eigen::VectorXf running_var(dim); - Eigen::VectorXf _weight(dim); - Eigen::VectorXf _bias(dim); + Eigen::VectorXf weight(dim); + Eigen::VectorXf bias(dim); for (int i = 0; i < dim; i++) running_mean(i) = *(weights++); for (int i = 0; i < dim; i++) running_var(i) = *(weights++); for (int i = 0; i < dim; i++) - _weight(i) = *(weights++); + weight(i) = *(weights++); for (int i = 0; i < dim; i++) - _bias(i) = *(weights++); + bias(i) = *(weights++); float eps = *(weights++); // Convert to scale & loc - this->scale.resize(dim); - this->loc.resize(dim); + mScale.resize(dim); + mLoc.resize(dim); for (int i = 0; i < dim; i++) - this->scale(i) = _weight(i) / sqrt(eps + running_var(i)); - this->loc = _bias - this->scale.cwiseProduct(running_mean); + mScale(i) = weight(i) / sqrt(eps + running_var(i)); + mLoc = bias - mScale.cwiseProduct(running_mean); } -void nam::convnet::BatchNorm::process_(Eigen::MatrixXf& x, const long i_start, const long i_end) const +void nam::convnet::BatchNorm::Process(Eigen::Ref x, const long i_start, const long i_end) const { // todo using colwise? // #speed but conv probably dominates for (auto i = i_start; i < i_end; i++) { - x.col(i) = x.col(i).cwiseProduct(this->scale); - x.col(i) += this->loc; + x.col(i) = x.col(i).cwiseProduct(mScale); + x.col(i) += mLoc; } } -void nam::convnet::ConvNetBlock::set_weights_(const int in_channels, const int out_channels, const int _dilation, - const bool batchnorm, const std::string activation, - std::vector::iterator& weights) +void nam::convnet::ConvNetBlock::SetWeights(const int inChannels, const int outChannels, const int dilation, + const bool doBatchNorm, const std::string& activation, + weightsIterator& weights) { - this->_batchnorm = batchnorm; + mDoBatchNorm = doBatchNorm; // HACK 2 kernel - this->conv.set_size_and_weights_(in_channels, out_channels, 2, _dilation, !batchnorm, weights); - if (this->_batchnorm) - this->batchnorm = BatchNorm(out_channels, weights); - this->activation = activations::Activation::get_activation(activation); + conv.SetSizeAndWeights(inChannels, outChannels, 2, dilation, !doBatchNorm, weights); + if (mDoBatchNorm) + mBatchnorm = BatchNorm(outChannels, weights); + mActivation = activations::Activation::GetActivation(activation); } -void nam::convnet::ConvNetBlock::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, +void nam::convnet::ConvNetBlock::Process(const Eigen::Ref input, Eigen::Ref output, const long i_start, const long i_end) const { const long ncols = i_end - i_start; - this->conv.process_(input, output, i_start, ncols, i_start); - if (this->_batchnorm) - this->batchnorm.process_(output, i_start, i_end); + conv.Process(input, output, i_start, ncols, i_start); + if (mDoBatchNorm) + mBatchnorm.Process(output, i_start, i_end); - this->activation->apply(output.middleCols(i_start, ncols)); + mActivation->Apply(output.middleCols(i_start, ncols)); } -long nam::convnet::ConvNetBlock::get_out_channels() const +long nam::convnet::ConvNetBlock::GetOutChannels() const { - return this->conv.get_out_channels(); + return conv.GetOutChannels(); } -nam::convnet::_Head::_Head(const int channels, std::vector::iterator& weights) +nam::convnet::Head::Head(const int channels, weightsIterator& weights) { - this->_weight.resize(channels); + mWeight.resize(channels); for (int i = 0; i < channels; i++) - this->_weight[i] = *(weights++); - this->_bias = *(weights++); + mWeight[i] = *(weights++); + mBias = *(weights++); } -void nam::convnet::_Head::process_(const Eigen::MatrixXf& input, Eigen::VectorXf& output, const long i_start, +void nam::convnet::Head::Process(const Eigen::Ref input, Eigen::VectorXf& output, const long i_start, const long i_end) const { const long length = i_end - i_start; output.resize(length); for (long i = 0, j = i_start; i < length; i++, j++) - output(i) = this->_bias + input.col(j).dot(this->_weight); + output(i) = mBias + input.col(j).dot(mWeight); } nam::convnet::ConvNet::ConvNet(const int channels, const std::vector& dilations, const bool batchnorm, - const std::string activation, std::vector& weights, - const double expected_sample_rate) -: Buffer(*std::max_element(dilations.begin(), dilations.end()), expected_sample_rate) + const std::string& activation, std::vector& weights, + const double expectedSampleRate) +: Buffer(*std::max_element(dilations.begin(), dilations.end()), expectedSampleRate) { - this->_verify_weights(channels, dilations, batchnorm, weights.size()); - this->_blocks.resize(dilations.size()); - std::vector::iterator it = weights.begin(); + VerifyWeights(channels, dilations, batchnorm, weights.size()); + mBlocks.resize(dilations.size()); + weightsIterator it = weights.begin(); for (size_t i = 0; i < dilations.size(); i++) - this->_blocks[i].set_weights_(i == 0 ? 1 : channels, channels, dilations[i], batchnorm, activation, it); - this->_block_vals.resize(this->_blocks.size() + 1); - for (auto& matrix : this->_block_vals) + mBlocks[i].SetWeights(i == 0 ? 1 : channels, channels, dilations[i], batchnorm, activation, it); + mBlockVals.resize(mBlocks.size() + 1); + for (auto& matrix : mBlockVals) matrix.setZero(); - std::fill(this->_input_buffer.begin(), this->_input_buffer.end(), 0.0f); - this->_head = _Head(channels, it); + std::fill(mInputBuffer.begin(), mInputBuffer.end(), 0.0f); + mHead = Head(channels, it); if (it != weights.end()) throw std::runtime_error("Didn't touch all the weights when initializing ConvNet"); - _prewarm_samples = 1; + mPrewarmSamples = 1; for (size_t i = 0; i < dilations.size(); i++) - _prewarm_samples += dilations[i]; + mPrewarmSamples += dilations[i]; } -void nam::convnet::ConvNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::convnet::ConvNet::Process(float* input, float* output, const int numFrames) { - this->_update_buffers_(input, num_frames); + UpdateBuffers(input, numFrames); // Main computation! - const long i_start = this->_input_buffer_offset; - const long i_end = i_start + num_frames; + const long i_start = mInputBufferOffset; + const long i_end = i_start + numFrames; // TODO one unnecessary copy :/ #speed for (auto i = i_start; i < i_end; i++) - this->_block_vals[0](0, i) = this->_input_buffer[i]; - for (size_t i = 0; i < this->_blocks.size(); i++) - this->_blocks[i].process_(this->_block_vals[i], this->_block_vals[i + 1], i_start, i_end); + mBlockVals[0](0, i) = mInputBuffer[i]; + for (size_t i = 0; i < mBlocks.size(); i++) + mBlocks[i].Process(mBlockVals[i], mBlockVals[i + 1], i_start, i_end); // TODO clean up this allocation - this->_head.process_(this->_block_vals[this->_blocks.size()], this->_head_output, i_start, i_end); + mHead.Process(mBlockVals[mBlocks.size()], mHeadOutput, i_start, i_end); // Copy to required output array (TODO tighten this up) - for (int s = 0; s < num_frames; s++) - output[s] = this->_head_output(s); + for (int s = 0; s < numFrames; s++) + output[s] = mHeadOutput(s); } -void nam::convnet::ConvNet::_verify_weights(const int channels, const std::vector& dilations, const bool batchnorm, - const size_t actual_weights) +void nam::convnet::ConvNet::VerifyWeights(const int channels, const std::vector& dilations, const bool batchnorm, + const size_t actualWeights) { // TODO } -void nam::convnet::ConvNet::_update_buffers_(NAM_SAMPLE* input, const int num_frames) +void nam::convnet::ConvNet::UpdateBuffers(float* input, const int numFrames) { - this->Buffer::_update_buffers_(input, num_frames); + Buffer::UpdateBuffers(input, numFrames); - const size_t buffer_size = this->_input_buffer.size(); + const size_t buffer_size = mInputBuffer.size(); - if (this->_block_vals[0].rows() != 1 || this->_block_vals[0].cols() != buffer_size) + if (mBlockVals[0].rows() != Eigen::Index(1) || mBlockVals[0].cols() != Eigen::Index(buffer_size)) { - this->_block_vals[0].resize(1, buffer_size); - this->_block_vals[0].setZero(); + mBlockVals[0].resize(1, buffer_size); + mBlockVals[0].setZero(); } - for (size_t i = 1; i < this->_block_vals.size(); i++) + for (size_t i = 1; i < mBlockVals.size(); i++) { - if (this->_block_vals[i].rows() == this->_blocks[i - 1].get_out_channels() - && this->_block_vals[i].cols() == buffer_size) + if (mBlockVals[i].rows() == mBlocks[i - 1].GetOutChannels() + && mBlockVals[i].cols() == Eigen::Index(buffer_size)) continue; // Already has correct size - this->_block_vals[i].resize(this->_blocks[i - 1].get_out_channels(), buffer_size); - this->_block_vals[i].setZero(); + mBlockVals[i].resize(mBlocks[i - 1].GetOutChannels(), buffer_size); + mBlockVals[i].setZero(); } } -void nam::convnet::ConvNet::_rewind_buffers_() +void nam::convnet::ConvNet::RewindBuffers() { // Need to rewind the block vals first because Buffer::rewind_buffers() // resets the offset index // The last _block_vals is the output of the last block and doesn't need to be // rewound. - for (size_t k = 0; k < this->_block_vals.size() - 1; k++) + for (size_t k = 0; k < mBlockVals.size() - 1; k++) { // We actually don't need to pull back a lot...just as far as the first // input sample would grab from dilation - const long _dilation = this->_blocks[k].conv.get_dilation(); - for (long i = this->_receptive_field - _dilation, j = this->_input_buffer_offset - _dilation; - j < this->_input_buffer_offset; i++, j++) - for (long r = 0; r < this->_block_vals[k].rows(); r++) - this->_block_vals[k](r, i) = this->_block_vals[k](r, j); + const long dilation = mBlocks[k].conv.GetDilation(); + for (long i = mReceptiveField - dilation, j = mInputBufferOffset - dilation; + j < mInputBufferOffset; i++, j++) + for (long r = 0; r < mBlockVals[k].rows(); r++) + mBlockVals[k](r, i) = mBlockVals[k](r, j); } // Now we can do the rest of the rewind - this->Buffer::_rewind_buffers_(); + Buffer::RewindBuffers(); } diff --git a/NAM/convnet.h b/NAM/convnet.h index 310a1e5..92103c7 100644 --- a/NAM/convnet.h +++ b/NAM/convnet.h @@ -23,8 +23,8 @@ class BatchNorm { public: BatchNorm(){}; - BatchNorm(const int dim, std::vector::iterator& weights); - void process_(Eigen::MatrixXf& input, const long i_start, const long i_end) const; + BatchNorm(const int dim, weightsIterator& weights); + void Process(Eigen::Ref input, const long i_start, const long i_end) const; private: // TODO simplify to just ax+b @@ -32,56 +32,56 @@ class BatchNorm // y = ax+b // a = w / sqrt(v+eps) // b = a * m + bias - Eigen::VectorXf scale; - Eigen::VectorXf loc; + Eigen::VectorXf mScale; + Eigen::VectorXf mLoc; }; class ConvNetBlock { public: ConvNetBlock(){}; - void set_weights_(const int in_channels, const int out_channels, const int _dilation, const bool batchnorm, - const std::string activation, std::vector::iterator& weights); - void process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long i_end) const; - long get_out_channels() const; + void SetWeights(const int inChannels, const int outChannels, const int dilation, const bool batchnorm, + const std::string& activation, weightsIterator& weights); + void Process(const Eigen::Ref input, Eigen::Ref output, const long i_start, const long i_end) const; + long GetOutChannels() const; Conv1D conv; private: - BatchNorm batchnorm; - bool _batchnorm = false; - activations::Activation* activation = nullptr; + BatchNorm mBatchnorm; + bool mDoBatchNorm = false; + activations::Activation* mActivation = nullptr; }; -class _Head +class Head { public: - _Head(){}; - _Head(const int channels, std::vector::iterator& weights); - void process_(const Eigen::MatrixXf& input, Eigen::VectorXf& output, const long i_start, const long i_end) const; + Head(){}; + Head(const int channels, weightsIterator& weights); + void Process(const Eigen::Ref input, Eigen::VectorXf& output, const long i_start, const long i_end) const; private: - Eigen::VectorXf _weight; - float _bias = 0.0f; + Eigen::VectorXf mWeight; + float mBias = 0.0f; }; class ConvNet : public Buffer { public: - ConvNet(const int channels, const std::vector& dilations, const bool batchnorm, const std::string activation, - std::vector& weights, const double expected_sample_rate = -1.0); + ConvNet(const int channels, const std::vector& dilations, const bool batchnorm, const std::string& activation, + std::vector& weights, const double expectedSampleRate = -1.0); ~ConvNet() = default; protected: - std::vector _blocks; - std::vector _block_vals; - Eigen::VectorXf _head_output; - _Head _head; - void _verify_weights(const int channels, const std::vector& dilations, const bool batchnorm, - const size_t actual_weights); - void _update_buffers_(NAM_SAMPLE* input, const int num_frames) override; - void _rewind_buffers_() override; + std::vector mBlocks; + std::vector mBlockVals; + Eigen::VectorXf mHeadOutput; + Head mHead; + void VerifyWeights(const int channels, const std::vector& dilations, const bool batchnorm, + const size_t actualWeights); + void UpdateBuffers(float* input, const int numFrames) override; + void RewindBuffers() override; - void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override; + void Process(float* input, float* output, const int numFrames) override; }; }; // namespace convnet }; // namespace nam diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp index d068eed..7c16505 100644 --- a/NAM/dsp.cpp +++ b/NAM/dsp.cpp @@ -14,32 +14,32 @@ constexpr const long _INPUT_BUFFER_SAFETY_FACTOR = 32; -nam::DSP::DSP(const double expected_sample_rate) -: mExpectedSampleRate(expected_sample_rate) +nam::DSP::DSP(const double expectedSampleRate) +: mExpectedSampleRate(expectedSampleRate) { } -void nam::DSP::prewarm() +void nam::DSP::Prewarm() { - if (_prewarm_samples == 0) + if (mPrewarmSamples == 0) return; - NAM_SAMPLE sample = 0; - NAM_SAMPLE* sample_ptr = &sample; + float sample = 0; + float* sample_ptr = &sample; // pre-warm the model for a model-specific number of samples - for (long i = 0; i < _prewarm_samples; i++) + for (long i = 0; i < mPrewarmSamples; i++) { - this->process(sample_ptr, sample_ptr, 1); - this->finalize_(1); + Process(sample_ptr, sample_ptr, 1); + Finalize(1); sample = 0; } } -void nam::DSP::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::DSP::Process(float* input, float* output, const int numFrames) { // Default implementation is the null operation - for (size_t i = 0; i < num_frames; i++) + for (auto i = 0; i < numFrames; i++) output[i] = input[i]; } @@ -58,198 +58,198 @@ void nam::DSP::SetLoudness(const double loudness) mHasLoudness = true; } -void nam::DSP::finalize_(const int num_frames) {} +void nam::DSP::Finalize(const int numFrames) {} // Buffer ===================================================================== -nam::Buffer::Buffer(const int receptive_field, const double expected_sample_rate) -: nam::DSP(expected_sample_rate) +nam::Buffer::Buffer(const int receptiveField, const double expectedSampleRate) +: nam::DSP(expectedSampleRate) { - this->_set_receptive_field(receptive_field); + SetReceptiveField(receptiveField); } -void nam::Buffer::_set_receptive_field(const int new_receptive_field) +void nam::Buffer::SetReceptiveField(const int newReceptiveField) { - this->_set_receptive_field(new_receptive_field, _INPUT_BUFFER_SAFETY_FACTOR * new_receptive_field); + SetReceptiveField(newReceptiveField, _INPUT_BUFFER_SAFETY_FACTOR * newReceptiveField); }; -void nam::Buffer::_set_receptive_field(const int new_receptive_field, const int input_buffer_size) +void nam::Buffer::SetReceptiveField(const int newReceptiveField, const int inputBufferSize) { - this->_receptive_field = new_receptive_field; - this->_input_buffer.resize(input_buffer_size); - std::fill(this->_input_buffer.begin(), this->_input_buffer.end(), 0.0f); - this->_reset_input_buffer(); + mReceptiveField = newReceptiveField; + mInputBuffer.resize(inputBufferSize); + std::fill(mInputBuffer.begin(), mInputBuffer.end(), 0.0f); + ResetInputBuffer(); } -void nam::Buffer::_update_buffers_(NAM_SAMPLE* input, const int num_frames) +void nam::Buffer::UpdateBuffers(float* input, const int numFrames) { // Make sure that the buffer is big enough for the receptive field and the // frames needed! { - const long minimum_input_buffer_size = (long)this->_receptive_field + _INPUT_BUFFER_SAFETY_FACTOR * num_frames; - if ((long)this->_input_buffer.size() < minimum_input_buffer_size) + const long minimum_input_buffer_size = (long)mReceptiveField + _INPUT_BUFFER_SAFETY_FACTOR * numFrames; + if ((long)mInputBuffer.size() < minimum_input_buffer_size) { long new_buffer_size = 2; while (new_buffer_size < minimum_input_buffer_size) new_buffer_size *= 2; - this->_input_buffer.resize(new_buffer_size); - std::fill(this->_input_buffer.begin(), this->_input_buffer.end(), 0.0f); + mInputBuffer.resize(new_buffer_size); + std::fill(mInputBuffer.begin(), mInputBuffer.end(), 0.0f); } } // If we'd run off the end of the input buffer, then we need to move the data // back to the start of the buffer and start again. - if (this->_input_buffer_offset + num_frames > (long)this->_input_buffer.size()) - this->_rewind_buffers_(); + if (mInputBufferOffset + numFrames > (long)mInputBuffer.size()) + RewindBuffers(); // Put the new samples into the input buffer - for (long i = this->_input_buffer_offset, j = 0; j < num_frames; i++, j++) - this->_input_buffer[i] = input[j]; + for (long i = mInputBufferOffset, j = 0; j < numFrames; i++, j++) + mInputBuffer[i] = input[j]; // And resize the output buffer: - this->_output_buffer.resize(num_frames); - std::fill(this->_output_buffer.begin(), this->_output_buffer.end(), 0.0f); + mOutputBuffer.resize(numFrames); + std::fill(mOutputBuffer.begin(), mOutputBuffer.end(), 0.0f); } -void nam::Buffer::_rewind_buffers_() +void nam::Buffer::RewindBuffers() { // Copy the input buffer back // RF-1 samples because we've got at least one new one inbound. - for (long i = 0, j = this->_input_buffer_offset - this->_receptive_field; i < this->_receptive_field; i++, j++) - this->_input_buffer[i] = this->_input_buffer[j]; + for (long i = 0, j = mInputBufferOffset - mReceptiveField; i < mReceptiveField; i++, j++) + mInputBuffer[i] = mInputBuffer[j]; // And reset the offset. // Even though we could be stingy about that one sample that we won't be using // (because a new set is incoming) it's probably not worth the // hyper-optimization and liable for bugs. And the code looks way tidier this // way. - this->_input_buffer_offset = this->_receptive_field; + mInputBufferOffset = mReceptiveField; } -void nam::Buffer::_reset_input_buffer() +void nam::Buffer::ResetInputBuffer() { - this->_input_buffer_offset = this->_receptive_field; + mInputBufferOffset = mReceptiveField; } -void nam::Buffer::finalize_(const int num_frames) +void nam::Buffer::Finalize(const int numFrames) { - this->nam::DSP::finalize_(num_frames); - this->_input_buffer_offset += num_frames; + nam::DSP::Finalize(numFrames); + mInputBufferOffset += numFrames; } // Linear ===================================================================== -nam::Linear::Linear(const int receptive_field, const bool _bias, const std::vector& weights, - const double expected_sample_rate) -: nam::Buffer(receptive_field, expected_sample_rate) +nam::Linear::Linear(const int receptiveField, const bool bias, const std::vector& weights, + const double expectedSampleRate) +: nam::Buffer(receptiveField, expectedSampleRate) { - if ((int)weights.size() != (receptive_field + (_bias ? 1 : 0))) + if ((int)weights.size() != (receptiveField + (bias ? 1 : 0))) throw std::runtime_error( "Params vector does not match expected size based " "on architecture parameters"); - this->_weight.resize(this->_receptive_field); + mWeight.resize(mReceptiveField); // Pass in in reverse order so that dot products work out of the box. - for (int i = 0; i < this->_receptive_field; i++) - this->_weight(i) = weights[receptive_field - 1 - i]; - this->_bias = _bias ? weights[receptive_field] : (float)0.0; + for (int i = 0; i < mReceptiveField; i++) + mWeight(i) = weights[receptiveField - 1 - i]; + mBias = bias ? weights[receptiveField] : (float)0.0; } -void nam::Linear::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::Linear::Process(float* input, float* output, const int numFrames) { - this->nam::Buffer::_update_buffers_(input, num_frames); + nam::Buffer::UpdateBuffers(input, numFrames); // Main computation! - for (size_t i = 0; i < num_frames; i++) + for (auto i = 0; i < numFrames; i++) { - const size_t offset = this->_input_buffer_offset - this->_weight.size() + i + 1; - auto input = Eigen::Map(&this->_input_buffer[offset], this->_receptive_field); - output[i] = this->_bias + this->_weight.dot(input); + const size_t offset = mInputBufferOffset - mWeight.size() + i + 1; + auto input = Eigen::Map(&mInputBuffer[offset], mReceptiveField); + output[i] = mBias + mWeight.dot(input); } } // NN modules ================================================================= -void nam::Conv1D::set_weights_(std::vector::iterator& weights) +void nam::Conv1D::SetWeights(weightsIterator& weights) { - if (this->_weight.size() > 0) + if (mWeight.size() > 0) { - const long out_channels = this->_weight[0].rows(); - const long in_channels = this->_weight[0].cols(); + const long outChannels = mWeight[0].rows(); + const long inChannels = mWeight[0].cols(); // Crazy ordering because that's how it gets flattened. - for (auto i = 0; i < out_channels; i++) - for (auto j = 0; j < in_channels; j++) - for (size_t k = 0; k < this->_weight.size(); k++) - this->_weight[k](i, j) = *(weights++); + for (auto i = 0; i < outChannels; i++) + for (auto j = 0; j < inChannels; j++) + for (size_t k = 0; k < mWeight.size(); k++) + mWeight[k](i, j) = *(weights++); } - for (long i = 0; i < this->_bias.size(); i++) - this->_bias(i) = *(weights++); + for (long i = 0; i < mBias.size(); i++) + mBias(i) = *(weights++); } -void nam::Conv1D::set_size_(const int in_channels, const int out_channels, const int kernel_size, const bool do_bias, - const int _dilation) +void nam::Conv1D::SetSize(const int inChannels, const int outChannels, const int kernelSize, const bool doBias, + const int dilation) { - this->_weight.resize(kernel_size); - for (size_t i = 0; i < this->_weight.size(); i++) - this->_weight[i].resize(out_channels, - in_channels); // y = Ax, input array (C,L) - if (do_bias) - this->_bias.resize(out_channels); + mWeight.resize(kernelSize); + for (size_t i = 0; i < mWeight.size(); i++) + mWeight[i].resize(outChannels, + inChannels); // y = Ax, input array (C,L) + if (doBias) + mBias.resize(outChannels); else - this->_bias.resize(0); - this->_dilation = _dilation; + mBias.resize(0); + mDilation = dilation; } -void nam::Conv1D::set_size_and_weights_(const int in_channels, const int out_channels, const int kernel_size, - const int _dilation, const bool do_bias, std::vector::iterator& weights) +void nam::Conv1D::SetSizeAndWeights(const int inChannels, const int outChannels, const int kernelSize, + const int dilation, const bool doBias, weightsIterator& weights) { - this->set_size_(in_channels, out_channels, kernel_size, do_bias, _dilation); - this->set_weights_(weights); + SetSize(inChannels, outChannels, kernelSize, doBias, dilation); + SetWeights(weights); } -void nam::Conv1D::process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long ncols, +void nam::Conv1D::Process(const Eigen::Ref input, Eigen::Ref output, const long i_start, const long ncols, const long j_start) const { // This is the clever part ;) - for (size_t k = 0; k < this->_weight.size(); k++) + for (size_t k = 0; k < mWeight.size(); k++) { - const long offset = this->_dilation * (k + 1 - this->_weight.size()); + const long offset = mDilation * (k + 1 - mWeight.size()); if (k == 0) - output.middleCols(j_start, ncols) = this->_weight[k] * input.middleCols(i_start + offset, ncols); + output.middleCols(j_start, ncols) = mWeight[k] * input.middleCols(i_start + offset, ncols); else - output.middleCols(j_start, ncols) += this->_weight[k] * input.middleCols(i_start + offset, ncols); + output.middleCols(j_start, ncols) += mWeight[k] * input.middleCols(i_start + offset, ncols); } - if (this->_bias.size() > 0) - output.middleCols(j_start, ncols).colwise() += this->_bias; + if (mBias.size() > 0) + output.middleCols(j_start, ncols).colwise() += mBias; } -long nam::Conv1D::get_num_weights() const +long nam::Conv1D::GetNumWeights() const { - long num_weights = this->_bias.size(); - for (size_t i = 0; i < this->_weight.size(); i++) - num_weights += this->_weight[i].size(); + long num_weights = mBias.size(); + for (size_t i = 0; i < mWeight.size(); i++) + num_weights += mWeight[i].size(); return num_weights; } -nam::Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias) +nam::Conv1x1::Conv1x1(const int inChannels, const int outChannels, const bool bias) { - this->_weight.resize(out_channels, in_channels); - this->_do_bias = _bias; - if (_bias) - this->_bias.resize(out_channels); + mWeight.resize(outChannels, inChannels); + mDoBias = bias; + if (bias) + mBias.resize(outChannels); } -void nam::Conv1x1::set_weights_(std::vector::iterator& weights) +void nam::Conv1x1::SetWeights(weightsIterator& weights) { - for (int i = 0; i < this->_weight.rows(); i++) - for (int j = 0; j < this->_weight.cols(); j++) - this->_weight(i, j) = *(weights++); - if (this->_do_bias) - for (int i = 0; i < this->_bias.size(); i++) - this->_bias(i) = *(weights++); + for (int i = 0; i < mWeight.rows(); i++) + for (int j = 0; j < mWeight.cols(); j++) + mWeight(i, j) = *(weights++); + if (mDoBias) + for (int i = 0; i < mBias.size(); i++) + mBias(i) = *(weights++); } -Eigen::MatrixXf nam::Conv1x1::process(const Eigen::MatrixXf& input) const +Eigen::MatrixXf nam::Conv1x1::Process(const Eigen::Ref input) const { - if (this->_do_bias) - return (this->_weight * input).colwise() + this->_bias; + if (mDoBias) + return (mWeight * input).colwise() + mBias; else - return this->_weight * input; + return mWeight * input; } diff --git a/NAM/dsp.h b/NAM/dsp.h index c3e8ec9..6c590bc 100644 --- a/NAM/dsp.h +++ b/NAM/dsp.h @@ -12,17 +12,14 @@ #include "activations.h" #include "json.hpp" -#ifdef NAM_SAMPLE_FLOAT - #define NAM_SAMPLE float -#else - #define NAM_SAMPLE double -#endif // Use a sample rate of -1 if we don't know what the model expects to be run at. // TODO clean this up and track a bool for whether it knows. #define NAM_UNKNOWN_EXPECTED_SAMPLE_RATE -1.0 namespace nam { +using weightsIterator = std::vector::const_iterator; + enum EArchitectures { kLinear = 0, @@ -40,22 +37,22 @@ class DSP // Older models won't know, but newer ones will come with a loudness from the training based on their response to a // standardized input. // We may choose to have the models figure out for themselves how loud they are in here in the future. - DSP(const double expected_sample_rate); + DSP(const double expectedSampleRate); virtual ~DSP() = default; - // prewarm() does any required intial work required to "settle" model initial conditions + // Prewarm() does any required intial work required to "settle" model initial conditions // it can be somewhat expensive, so should not be called during realtime audio processing - virtual void prewarm(); - // process() does all of the processing requried to take `input` array and + virtual void Prewarm(); + // Process() does all of the processing requried to take `input` array and // fill in the required values on `output`. // To do this: // 1. The core DSP algorithm is run (This is what should probably be // overridden in subclasses). // 2. The output level is applied and the result stored to `output`. - virtual void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames); + virtual void Process(float* input, float* output, const int numFrames); // Anything to take care of before next buffer comes in. // For example: // * Move the buffer index forward - virtual void finalize_(const int num_frames); + virtual void Finalize(const int numFrames); // Expected sample rate, in Hz. // TODO throw if it doesn't know. double GetExpectedSampleRate() const { return mExpectedSampleRate; }; @@ -76,7 +73,7 @@ class DSP // What sample rate does the model expect? double mExpectedSampleRate; // How many samples should be processed during "pre-warming" - int _prewarm_samples = 0; + int mPrewarmSamples = 0; }; // Class where an input buffer is kept so that long-time effects can be @@ -85,37 +82,35 @@ class DSP class Buffer : public DSP { public: - Buffer(const int receptive_field, const double expected_sample_rate = -1.0); - void finalize_(const int num_frames); + Buffer(const int receptiveField, const double expectedSampleRate = -1.0); + void Finalize(const int numFrames); protected: - // Input buffer - const int _input_buffer_channels = 1; // Mono - int _receptive_field; + int mReceptiveField; // First location where we add new samples from the input - long _input_buffer_offset; - std::vector _input_buffer; - std::vector _output_buffer; - - void _set_receptive_field(const int new_receptive_field, const int input_buffer_size); - void _set_receptive_field(const int new_receptive_field); - void _reset_input_buffer(); - // Use this->_input_post_gain - virtual void _update_buffers_(NAM_SAMPLE* input, int num_frames); - virtual void _rewind_buffers_(); + long mInputBufferOffset; + std::vector mInputBuffer; + std::vector mOutputBuffer; + + void SetReceptiveField(const int newReceptiveField, const int inputBufferSize); + void SetReceptiveField(const int newReceptiveField); + void ResetInputBuffer(); + // Use _input_post_gain + virtual void UpdateBuffers(float* input, int numFrames); + virtual void RewindBuffers(); }; // Basic linear model (an IR!) class Linear : public Buffer { public: - Linear(const int receptive_field, const bool _bias, const std::vector& weights, - const double expected_sample_rate = -1.0); - void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override; + Linear(const int receptiveField, const bool bias, const std::vector& weights, + const double expectedSampleRate = -1.0); + void Process(float* input, float* output, const int numFrames) override; protected: - Eigen::VectorXf _weight; - float _bias; + Eigen::VectorXf mWeight; + float mBias; }; // NN modules ================================================================= @@ -123,55 +118,55 @@ class Linear : public Buffer class Conv1D { public: - Conv1D() { this->_dilation = 1; }; - void set_weights_(std::vector::iterator& weights); - void set_size_(const int in_channels, const int out_channels, const int kernel_size, const bool do_bias, - const int _dilation); - void set_size_and_weights_(const int in_channels, const int out_channels, const int kernel_size, const int _dilation, - const bool do_bias, std::vector::iterator& weights); + Conv1D() { mDilation = 1; }; + void SetWeights(weightsIterator& weights); + void SetSize(const int inChannels, const int outChannels, const int kernelSize, const bool doBias, + const int dilation); + void SetSizeAndWeights(const int inChannels, const int outChannels, const int kernelSize, const int dilation, + const bool doBias, weightsIterator& weights); // Process from input to output // Rightmost indices of input go from i_start to i_end, // Indices on output for from j_start (to j_start + i_end - i_start) - void process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long i_end, + void Process(const Eigen::Ref input, Eigen::Ref output, const long i_start, const long i_end, const long j_start) const; - long get_in_channels() const { return this->_weight.size() > 0 ? this->_weight[0].cols() : 0; }; - long get_kernel_size() const { return this->_weight.size(); }; - long get_num_weights() const; - long get_out_channels() const { return this->_weight.size() > 0 ? this->_weight[0].rows() : 0; }; - int get_dilation() const { return this->_dilation; }; + long GetInChannels() const { return mWeight.size() > 0 ? mWeight[0].cols() : 0; }; + long GetKernelSize() const { return mWeight.size(); }; + long GetNumWeights() const; + long GetOutChannels() const { return mWeight.size() > 0 ? mWeight[0].rows() : 0; }; + int GetDilation() const { return mDilation; }; private: // Gonna wing this... // conv[kernel](cout, cin) - std::vector _weight; - Eigen::VectorXf _bias; - int _dilation; + std::vector mWeight; + Eigen::VectorXf mBias; + int mDilation; }; // Really just a linear layer class Conv1x1 { public: - Conv1x1(const int in_channels, const int out_channels, const bool _bias); - void set_weights_(std::vector::iterator& weights); + Conv1x1(const int inChannels, const int outChannels, const bool bias); + void SetWeights(weightsIterator& weights); // :param input: (N,Cin) or (Cin,) // :return: (N,Cout) or (Cout,), respectively - Eigen::MatrixXf process(const Eigen::MatrixXf& input) const; + Eigen::MatrixXf Process(const Eigen::Ref input) const; - long get_out_channels() const { return this->_weight.rows(); }; + long GetOutChannels() const { return mWeight.rows(); }; private: - Eigen::MatrixXf _weight; - Eigen::VectorXf _bias; - bool _do_bias; + Eigen::MatrixXf mWeight; + Eigen::VectorXf mBias; + bool mDoBias; }; // Utilities ================================================================== -// Implemented in get_dsp.cpp +// Implemented in GetDSP.cpp // Data for a DSP object // :param version: Data version. Follows the conventions established in the trainer code. -// :param architecture: Defines the high-level architecture. Supported are (as per `get-dsp()` in get_dsp.cpp): +// :param architecture: Defines the high-level architecture. Supported are (as per `get-dsp()` in GetDSP.cpp): // * "CatLSTM" // * "CatWaveNet" // * "ConvNet" @@ -181,7 +176,7 @@ class Conv1x1 // :param config: // :param metadata: // :param weights: The model weights -// :param expected_sample_rate: Most NAM models implicitly assume that data will be provided to them at some sample +// :param expectedSampleRate: Most NAM models implicitly assume that data will be provided to them at some sample // rate. This captures it for other components interfacing with the model to understand its needs. Use -1.0 for "I // don't know". struct dspData @@ -191,19 +186,21 @@ struct dspData nlohmann::json config; nlohmann::json metadata; std::vector weights; - double expected_sample_rate; + double expectedSampleRate; }; // Verify that the config that we are building our model from is supported by // this plugin version. -void verify_config_version(const std::string version); +void VerifyConfigVersion(const std::string& version); // Takes the model file and uses it to instantiate an instance of DSP. -std::unique_ptr get_dsp(const std::filesystem::path model_file); +std::unique_ptr GetDSP(const std::filesystem::path& modelFile); +// Creates an instance of DSP. Also returns a dspData struct that holds the data of the model. +std::unique_ptr GetDSP(const std::filesystem::path& modelFile, dspData& returnedConfig); // Creates an instance of DSP. Also returns a dspData struct that holds the data of the model. -std::unique_ptr get_dsp(const std::filesystem::path model_file, dspData& returnedConfig); +std::unique_ptr GetDSP(const char* jsonStr, dspData& returnedConfig); // Instantiates a DSP object from dsp_config struct. -std::unique_ptr get_dsp(dspData& conf); +std::unique_ptr GetDSP(dspData& conf); // Legacy loader for directory-type DSPs -std::unique_ptr get_dsp_legacy(const std::filesystem::path dirname); +std::unique_ptr GetDSPLegacy(const std::filesystem::path& dirname); }; // namespace nam diff --git a/NAM/get_dsp.cpp b/NAM/get_dsp.cpp index 47aba97..6ca34db 100644 --- a/NAM/get_dsp.cpp +++ b/NAM/get_dsp.cpp @@ -53,7 +53,7 @@ Version ParseVersion(const std::string& versionStr) return version; } -void verify_config_version(const std::string versionStr) +void VerifyConfigVersion(const std::string& versionStr) { Version version = ParseVersion(versionStr); if (version.major != 0 || version.minor != 5) @@ -66,7 +66,7 @@ void verify_config_version(const std::string versionStr) } } -std::vector GetWeights(nlohmann::json const& j, const std::filesystem::path config_path) +std::vector GetWeights(nlohmann::json const& j) { if (j.find("weights") != j.end()) { @@ -77,54 +77,82 @@ std::vector GetWeights(nlohmann::json const& j, const std::filesystem::pa return weights; } else - throw std::runtime_error("Corrupted model file is missing weights."); + throw std::runtime_error("Corrupted model is missing weights."); } -std::unique_ptr get_dsp(const std::filesystem::path config_filename) +std::unique_ptr GetDSP(const std::filesystem::path& configFileName) { dspData temp; - return get_dsp(config_filename, temp); + return GetDSP(configFileName, temp); } -std::unique_ptr get_dsp(const std::filesystem::path config_filename, dspData& returnedConfig) +std::unique_ptr GetDSP(const std::filesystem::path& configFileName, dspData& config) { - if (!std::filesystem::exists(config_filename)) + if (!std::filesystem::exists(configFileName)) throw std::runtime_error("Config JSON doesn't exist!\n"); - std::ifstream i(config_filename); + std::ifstream i(configFileName); nlohmann::json j; i >> j; - verify_config_version(j["version"]); + VerifyConfigVersion(j["version"]); auto architecture = j["architecture"]; - nlohmann::json config = j["config"]; - std::vector weights = GetWeights(j, config_filename); - - // Assign values to returnedConfig - returnedConfig.version = j["version"]; - returnedConfig.architecture = j["architecture"]; - returnedConfig.config = j["config"]; - returnedConfig.metadata = j["metadata"]; - returnedConfig.weights = weights; + std::vector weights = GetWeights(j); + + // Assign values to config + config.version = j["version"]; + config.architecture = j["architecture"]; + config.config = j["config"]; + config.metadata = j["metadata"]; + config.weights = weights; if (j.find("sample_rate") != j.end()) - returnedConfig.expected_sample_rate = j["sample_rate"]; + config.expectedSampleRate = j["sample_rate"]; else { - returnedConfig.expected_sample_rate = -1.0; + config.expectedSampleRate = -1.0; } + /*Copy to a new dsp_config object for GetDSP below, + since not sure if weights actually get modified as being non-const references on some + model constructors inside GetDSP(dsp_config& conf). + We need to return unmodified version of dsp_config via returnedConfig.*/ + dspData conf = config; + + return GetDSP(conf); +} + +std::unique_ptr GetDSP(const char* jsonStr, dspData& config) +{ + nlohmann::json j = nlohmann::json::parse(jsonStr); + VerifyConfigVersion(j["version"]); + + auto architecture = j["architecture"]; + std::vector weights = GetWeights(j); + + // Assign values to config + config.version = j["version"]; + config.architecture = j["architecture"]; + config.config = j["config"]; + config.metadata = j["metadata"]; + config.weights = weights; + if (j.find("sample_rate") != j.end()) + config.expectedSampleRate = j["sample_rate"]; + else + { + config.expectedSampleRate = -1.0; + } - /*Copy to a new dsp_config object for get_dsp below, + /*Copy to a new dsp_config object for GetDSP below, since not sure if weights actually get modified as being non-const references on some - model constructors inside get_dsp(dsp_config& conf). + model constructors inside GetDSP(dsp_config& conf). We need to return unmodified version of dsp_config via returnedConfig.*/ - dspData conf = returnedConfig; + dspData conf = config; - return get_dsp(conf); + return GetDSP(conf); } -std::unique_ptr get_dsp(dspData& conf) +std::unique_ptr GetDSP(dspData& conf) { - verify_config_version(conf.version); + VerifyConfigVersion(conf.version); auto& architecture = conf.architecture; nlohmann::json& config = conf.config; @@ -140,14 +168,14 @@ std::unique_ptr get_dsp(dspData& conf) haveLoudness = true; } } - const double expectedSampleRate = conf.expected_sample_rate; + const double expectedSampleRate = conf.expectedSampleRate; std::unique_ptr out = nullptr; if (architecture == "Linear") { - const int receptive_field = config["receptive_field"]; - const bool _bias = config["bias"]; - out = std::make_unique(receptive_field, _bias, weights, expectedSampleRate); + const int receptiveField = config["receptiveField"]; + const bool bias = config["bias"]; + out = std::make_unique(receptiveField, bias, weights, expectedSampleRate); } else if (architecture == "ConvNet") { @@ -156,33 +184,33 @@ std::unique_ptr get_dsp(dspData& conf) std::vector dilations; for (size_t i = 0; i < config["dilations"].size(); i++) dilations.push_back(config["dilations"][i]); - const std::string activation = config["activation"]; + auto activation = config["activation"]; out = std::make_unique(channels, dilations, batchnorm, activation, weights, expectedSampleRate); } else if (architecture == "LSTM") { - const int num_layers = config["num_layers"]; - const int input_size = config["input_size"]; - const int hidden_size = config["hidden_size"]; - out = std::make_unique(num_layers, input_size, hidden_size, weights, expectedSampleRate); + const int numLayers = config["num_layers"]; + const int inputSize = config["input_size"]; + const int hiddenSize = config["hidden_size"]; + out = std::make_unique(numLayers, inputSize, hiddenSize, weights, expectedSampleRate); } else if (architecture == "WaveNet") { - std::vector layer_array_params; + std::vector layerArrayParams; for (size_t i = 0; i < config["layers"].size(); i++) { - nlohmann::json layer_config = config["layers"][i]; + nlohmann::json layerConfig = config["layers"][i]; std::vector dilations; - for (size_t j = 0; j < layer_config["dilations"].size(); j++) - dilations.push_back(layer_config["dilations"][j]); - layer_array_params.push_back( - wavenet::LayerArrayParams(layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], - layer_config["channels"], layer_config["kernel_size"], dilations, - layer_config["activation"], layer_config["gated"], layer_config["head_bias"])); + for (size_t j = 0; j < layerConfig["dilations"].size(); j++) + dilations.push_back(layerConfig["dilations"][j]); + layerArrayParams.push_back( + wavenet::LayerArrayParams(layerConfig["input_size"], layerConfig["condition_size"], layerConfig["head_size"], + layerConfig["channels"], layerConfig["kernel_size"], dilations, + layerConfig["activation"], layerConfig["gated"], layerConfig["head_bias"])); } - const bool with_head = config["head"] == NULL; - const float head_scale = config["head_scale"]; - out = std::make_unique(layer_array_params, head_scale, with_head, weights, expectedSampleRate); + const bool withHead = config["head"] == NULL; + const float headScale = config["head_scale"]; + out = std::make_unique(layerArrayParams, headScale, withHead, weights, expectedSampleRate); } else { @@ -194,7 +222,7 @@ std::unique_ptr get_dsp(dspData& conf) } // "pre-warm" the model to settle initial conditions - out->prewarm(); + out->Prewarm(); return out; } diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp index 38ff494..f3b3af9 100644 --- a/NAM/lstm.cpp +++ b/NAM/lstm.cpp @@ -2,95 +2,96 @@ #include #include +#include "dsp.h" #include "lstm.h" -nam::lstm::LSTMCell::LSTMCell(const int input_size, const int hidden_size, std::vector::iterator& weights) +nam::lstm::LSTMCell::LSTMCell(const int inputSize, const int hiddenSize, weightsIterator& weights) { // Resize arrays - this->_w.resize(4 * hidden_size, input_size + hidden_size); - this->_b.resize(4 * hidden_size); - this->_xh.resize(input_size + hidden_size); - this->_ifgo.resize(4 * hidden_size); - this->_c.resize(hidden_size); + _w.resize(4 * hiddenSize, inputSize + hiddenSize); + _b.resize(4 * hiddenSize); + _xh.resize(inputSize + hiddenSize); + _ifgo.resize(4 * hiddenSize); + _c.resize(hiddenSize); // Assign in row-major because that's how PyTorch goes. - for (int i = 0; i < this->_w.rows(); i++) - for (int j = 0; j < this->_w.cols(); j++) - this->_w(i, j) = *(weights++); - for (int i = 0; i < this->_b.size(); i++) - this->_b[i] = *(weights++); - const int h_offset = input_size; - for (int i = 0; i < hidden_size; i++) - this->_xh[i + h_offset] = *(weights++); - for (int i = 0; i < hidden_size; i++) - this->_c[i] = *(weights++); + for (int i = 0; i < _w.rows(); i++) + for (int j = 0; j < _w.cols(); j++) + _w(i, j) = *(weights++); + for (int i = 0; i < _b.size(); i++) + _b[i] = *(weights++); + const int h_offset = inputSize; + for (int i = 0; i < hiddenSize; i++) + _xh[i + h_offset] = *(weights++); + for (int i = 0; i < hiddenSize; i++) + _c[i] = *(weights++); } -void nam::lstm::LSTMCell::process_(const Eigen::VectorXf& x) +void nam::lstm::LSTMCell::Process(const Eigen::VectorXf& x) { - const long hidden_size = this->_get_hidden_size(); - const long input_size = this->_get_input_size(); + const long hiddenSize = GetHiddenSize(); + const long inputSize = GetInputSize(); // Assign inputs - this->_xh(Eigen::seq(0, input_size - 1)) = x; + _xh(Eigen::seq(0, inputSize - 1)) = x; // The matmul - this->_ifgo = this->_w * this->_xh + this->_b; + _ifgo = _w * _xh + _b; // Elementwise updates (apply nonlinearities here) const long i_offset = 0; - const long f_offset = hidden_size; - const long g_offset = 2 * hidden_size; - const long o_offset = 3 * hidden_size; - const long h_offset = input_size; + const long f_offset = hiddenSize; + const long g_offset = 2 * hiddenSize; + const long o_offset = 3 * hiddenSize; + const long h_offset = inputSize; - if (activations::Activation::using_fast_tanh) + if (activations::Activation::sUsingFastTanh) { - for (auto i = 0; i < hidden_size; i++) - this->_c[i] = - activations::fast_sigmoid(this->_ifgo[i + f_offset]) * this->_c[i] - + activations::fast_sigmoid(this->_ifgo[i + i_offset]) * activations::fast_tanh(this->_ifgo[i + g_offset]); + for (auto i = 0; i < hiddenSize; i++) + _c[i] = + activations::fast_sigmoid(_ifgo[i + f_offset]) * _c[i] + + activations::fast_sigmoid(_ifgo[i + i_offset]) * activations::fast_tanh(_ifgo[i + g_offset]); - for (int i = 0; i < hidden_size; i++) - this->_xh[i + h_offset] = - activations::fast_sigmoid(this->_ifgo[i + o_offset]) * activations::fast_tanh(this->_c[i]); + for (int i = 0; i < hiddenSize; i++) + _xh[i + h_offset] = + activations::fast_sigmoid(_ifgo[i + o_offset]) * activations::fast_tanh(_c[i]); } else { - for (auto i = 0; i < hidden_size; i++) - this->_c[i] = activations::sigmoid(this->_ifgo[i + f_offset]) * this->_c[i] - + activations::sigmoid(this->_ifgo[i + i_offset]) * tanhf(this->_ifgo[i + g_offset]); + for (auto i = 0; i < hiddenSize; i++) + _c[i] = activations::sigmoid(_ifgo[i + f_offset]) * _c[i] + + activations::sigmoid(_ifgo[i + i_offset]) * tanhf(_ifgo[i + g_offset]); - for (int i = 0; i < hidden_size; i++) - this->_xh[i + h_offset] = activations::sigmoid(this->_ifgo[i + o_offset]) * tanhf(this->_c[i]); + for (int i = 0; i < hiddenSize; i++) + _xh[i + h_offset] = activations::sigmoid(_ifgo[i + o_offset]) * tanhf(_c[i]); } } -nam::lstm::LSTM::LSTM(const int num_layers, const int input_size, const int hidden_size, std::vector& weights, - const double expected_sample_rate) -: DSP(expected_sample_rate) +nam::lstm::LSTM::LSTM(const int numLayers, const int inputSize, const int hiddenSize, const std::vector& weights, + const double expectedSampleRate) +: DSP(expectedSampleRate) { - this->_input.resize(1); - std::vector::iterator it = weights.begin(); - for (int i = 0; i < num_layers; i++) - this->_layers.push_back(LSTMCell(i == 0 ? input_size : hidden_size, hidden_size, it)); - this->_head_weight.resize(hidden_size); - for (int i = 0; i < hidden_size; i++) - this->_head_weight[i] = *(it++); - this->_head_bias = *(it++); + mInput.resize(1); + auto it = weights.begin(); + for (int i = 0; i < numLayers; i++) + mLayers.push_back(LSTMCell(i == 0 ? inputSize : hiddenSize, hiddenSize, it)); + mHeadWeight.resize(hiddenSize); + for (int i = 0; i < hiddenSize; i++) + mHeadWeight[i] = *(it++); + mHeadBias = *(it++); assert(it == weights.end()); } -void nam::lstm::LSTM::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::lstm::LSTM::Process(float* input, float* output, const int numFrames) { - for (size_t i = 0; i < num_frames; i++) - output[i] = this->_process_sample(input[i]); + for (auto i = 0; i < numFrames; i++) + output[i] = ProcessSample(input[i]); } -float nam::lstm::LSTM::_process_sample(const float x) +float nam::lstm::LSTM::ProcessSample(const float x) { - if (this->_layers.size() == 0) + if (mLayers.size() == 0) return x; - this->_input(0) = x; - this->_layers[0].process_(this->_input); - for (size_t i = 1; i < this->_layers.size(); i++) - this->_layers[i].process_(this->_layers[i - 1].get_hidden_state()); - return this->_head_weight.dot(this->_layers[this->_layers.size() - 1].get_hidden_state()) + this->_head_bias; + mInput(0) = x; + mLayers[0].Process(mInput); + for (size_t i = 1; i < mLayers.size(); i++) + mLayers[i].Process(mLayers[i - 1].GetHiddenState()); + return mHeadWeight.dot(mLayers[mLayers.size() - 1].GetHiddenState()) + mHeadBias; } diff --git a/NAM/lstm.h b/NAM/lstm.h index 6b02b18..8701bc3 100644 --- a/NAM/lstm.h +++ b/NAM/lstm.h @@ -22,9 +22,9 @@ namespace lstm class LSTMCell { public: - LSTMCell(const int input_size, const int hidden_size, std::vector::iterator& weights); - Eigen::VectorXf get_hidden_state() const { return this->_xh(Eigen::placeholders::lastN(this->_get_hidden_size())); }; - void process_(const Eigen::VectorXf& x); + LSTMCell(const int inputSize, const int hiddenSize, weightsIterator& weights); + Eigen::VectorXf GetHiddenState() const { return _xh(Eigen::placeholders::lastN(GetHiddenSize())); }; + void Process(const Eigen::VectorXf& x); private: // Parameters @@ -42,29 +42,29 @@ class LSTMCell // Cell state Eigen::VectorXf _c; - long _get_hidden_size() const { return this->_b.size() / 4; }; - long _get_input_size() const { return this->_xh.size() - this->_get_hidden_size(); }; + long GetHiddenSize() const { return _b.size() / 4; }; + long GetInputSize() const { return _xh.size() - GetHiddenSize(); }; }; // The multi-layer LSTM model class LSTM : public DSP { public: - LSTM(const int num_layers, const int input_size, const int hidden_size, std::vector& weights, - const double expected_sample_rate = -1.0); + LSTM(const int numLayers, const int inputSize, const int hiddenSize, const std::vector& weights, + const double expectedSampleRate = -1.0); ~LSTM() = default; protected: - Eigen::VectorXf _head_weight; - float _head_bias; - void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override; - std::vector _layers; + Eigen::VectorXf mHeadWeight; + float mHeadBias; + void Process(float* input, float* output, const int numFrames) override; + std::vector mLayers; - float _process_sample(const float x); + float ProcessSample(const float x); // Input to the LSTM. // Since this is assumed to not be a parametric model, its shape should be (1,) - Eigen::VectorXf _input; + Eigen::VectorXf mInput; }; }; // namespace lstm }; // namespace nam diff --git a/NAM/util.cpp b/NAM/util.cpp index 93815fc..d94724d 100644 --- a/NAM/util.cpp +++ b/NAM/util.cpp @@ -3,7 +3,7 @@ #include "util.h" -std::string nam::util::lowercase(const std::string& s) +std::string nam::util::lowercase(const std::string&& s) { std::string out(s); std::transform(s.begin(), s.end(), out.begin(), [](unsigned char c) { return std::tolower(c); }); diff --git a/NAM/util.h b/NAM/util.h index c0a5bd4..8b0f579 100644 --- a/NAM/util.h +++ b/NAM/util.h @@ -9,6 +9,6 @@ namespace nam { namespace util { -std::string lowercase(const std::string& s); +std::string lowercase(const std::string&& s); }; // namespace util }; // namespace nam diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index fc96fa6..bf0d15d 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -6,279 +6,289 @@ #include "wavenet.h" -nam::wavenet::_DilatedConv::_DilatedConv(const int in_channels, const int out_channels, const int kernel_size, +nam::wavenet::DilatedConv::DilatedConv(const int inChannels, const int outChannels, const int kernelSize, const int bias, const int dilation) { - this->set_size_(in_channels, out_channels, kernel_size, bias, dilation); + SetSize(inChannels, outChannels, kernelSize, bias, dilation); } -void nam::wavenet::_Layer::set_weights_(std::vector::iterator& weights) +void nam::wavenet::Layer::SetWeights(weightsIterator& weights) { - this->_conv.set_weights_(weights); - this->_input_mixin.set_weights_(weights); - this->_1x1.set_weights_(weights); + mConv.SetWeights(weights); + mInputMixin.SetWeights(weights); + _1x1.SetWeights(weights); } -void nam::wavenet::_Layer::process_(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, - Eigen::MatrixXf& head_input, Eigen::MatrixXf& output, const long i_start, +void nam::wavenet::Layer::Process(const Eigen::Ref input, const Eigen::Ref condition, + Eigen::Ref head_input, Eigen::Ref output, const long i_start, const long j_start) { const long ncols = condition.cols(); - const long channels = this->get_channels(); + const long channels = GetChannels(); // Input dilated conv - this->_conv.process_(input, this->_z, i_start, ncols, 0); + mConv.Process(input, _z, i_start, ncols, 0); // Mix-in condition - this->_z += this->_input_mixin.process(condition); + _z += mInputMixin.Process(condition); - this->_activation->apply(this->_z); + mActivation->Apply(_z); - if (this->_gated) + if (mGated) { - activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(channels, 0, channels, this->_z.cols())); + activations::Activation::GetActivation("Sigmoid")->Apply(_z.block(channels, 0, channels, _z.cols())); - this->_z.topRows(channels).array() *= this->_z.bottomRows(channels).array(); - // this->_z.topRows(channels) = this->_z.topRows(channels).cwiseProduct( - // this->_z.bottomRows(channels) + _z.topRows(channels).array() *= _z.bottomRows(channels).array(); + // _z.topRows(channels) = _z.topRows(channels).cwiseProduct( + // _z.bottomRows(channels) // ); } - head_input += this->_z.topRows(channels); - output.middleCols(j_start, ncols) = input.middleCols(i_start, ncols) + this->_1x1.process(this->_z.topRows(channels)); + head_input += _z.topRows(channels); + output.middleCols(j_start, ncols) = input.middleCols(i_start, ncols) + _1x1.Process(_z.topRows(channels)); } -void nam::wavenet::_Layer::set_num_frames_(const long num_frames) +void nam::wavenet::Layer::SetNumFrames(const long numFrames) { - if (this->_z.rows() == this->_conv.get_out_channels() && this->_z.cols() == num_frames) + if (_z.rows() == mConv.GetOutChannels() && _z.cols() == numFrames) return; // Already has correct size - this->_z.resize(this->_conv.get_out_channels(), num_frames); - this->_z.setZero(); + _z.resize(mConv.GetOutChannels(), numFrames); + _z.setZero(); } // LayerArray ================================================================= #define LAYER_ARRAY_BUFFER_SIZE 65536 -nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size, - const int channels, const int kernel_size, const std::vector& dilations, - const std::string activation, const bool gated, const bool head_bias) -: _rechannel(input_size, channels, false) -, _head_rechannel(channels, head_size, head_bias) +nam::wavenet::LayerArray::LayerArray(const int inputSize, const int condition_size, const int head_size, + const int channels, const int kernelSize, const std::vector& dilations, + const std::string& activation, const bool gated, const bool head_bias) +: mReChannel(inputSize, channels, false) +, mHeadRechannel(channels, head_size, head_bias) { for (size_t i = 0; i < dilations.size(); i++) - this->_layers.push_back(_Layer(condition_size, channels, kernel_size, dilations[i], activation, gated)); - const long receptive_field = this->_get_receptive_field(); + mLayers.push_back(Layer(condition_size, channels, kernelSize, dilations[i], activation, gated)); + const long receptiveField = GetReceptiveField(); for (size_t i = 0; i < dilations.size(); i++) { - this->_layer_buffers.push_back(Eigen::MatrixXf(channels, LAYER_ARRAY_BUFFER_SIZE + receptive_field - 1)); - this->_layer_buffers[i].setZero(); + mLayerBuffers.push_back(Eigen::MatrixXf(channels, LAYER_ARRAY_BUFFER_SIZE + receptiveField - 1)); + mLayerBuffers[i].setZero(); } - this->_buffer_start = this->_get_receptive_field() - 1; + mBufferStart = GetReceptiveField() - 1; } -void nam::wavenet::_LayerArray::advance_buffers_(const int num_frames) +void nam::wavenet::LayerArray::AdvanceBuffers(const int numFrames) { - this->_buffer_start += num_frames; + mBufferStart += numFrames; } -long nam::wavenet::_LayerArray::get_receptive_field() const +long nam::wavenet::LayerArray::GetReceptiveField() const { long result = 0; - for (size_t i = 0; i < this->_layers.size(); i++) - result += this->_layers[i].get_dilation() * (this->_layers[i].get_kernel_size() - 1); + for (size_t i = 0; i < mLayers.size(); i++) + result += mLayers[i].GetDilation() * (mLayers[i].GetKernelSize() - 1); return result; } -void nam::wavenet::_LayerArray::prepare_for_frames_(const long num_frames) +void nam::wavenet::LayerArray::PrepareForFrames(const long numFrames) { // Example: // _buffer_start = 0 - // num_frames = 64 + // numFrames = 64 // buffer_size = 64 // -> this will write on indices 0 through 63, inclusive. // -> No illegal writes. // -> no rewind needed. - if (this->_buffer_start + num_frames > this->_get_buffer_size()) - this->_rewind_buffers_(); + if (mBufferStart + numFrames > GetBufferSize()) + RewindBuffers(); } -void nam::wavenet::_LayerArray::process_(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, - Eigen::MatrixXf& head_inputs, Eigen::MatrixXf& layer_outputs, - Eigen::MatrixXf& head_outputs) +void nam::wavenet::LayerArray::Process(const Eigen::Ref layerInputs, const Eigen::Ref condition, + Eigen::Ref headInputs, Eigen::Ref layerOutputs, + Eigen::Ref headOutputs) { - this->_layer_buffers[0].middleCols(this->_buffer_start, layer_inputs.cols()) = this->_rechannel.process(layer_inputs); - const size_t last_layer = this->_layers.size() - 1; - for (size_t i = 0; i < this->_layers.size(); i++) + mLayerBuffers[0].middleCols(mBufferStart, layerInputs.cols()) = mReChannel.Process(layerInputs); + const size_t last_layer = mLayers.size() - 1; + for (size_t i = 0; i < mLayers.size(); i++) { - this->_layers[i].process_(this->_layer_buffers[i], condition, head_inputs, - i == last_layer ? layer_outputs : this->_layer_buffers[i + 1], this->_buffer_start, - i == last_layer ? 0 : this->_buffer_start); + if (i == last_layer) + { + mLayers[i].Process(mLayerBuffers[i], condition, headInputs, + layerOutputs, mBufferStart, + 0); + } + else + { + mLayers[i].Process(mLayerBuffers[i], condition, headInputs, + mLayerBuffers[i + 1], mBufferStart, + mBufferStart); + } + } - head_outputs = this->_head_rechannel.process(head_inputs); + headOutputs = mHeadRechannel.Process(headInputs); } -void nam::wavenet::_LayerArray::set_num_frames_(const long num_frames) +void nam::wavenet::LayerArray::SetNumFrames(const long numFrames) { - // Wavenet checks for unchanged num_frames; if we made it here, there's + // Wavenet checks for unchanged numFrames; if we made it here, there's // something to do. - if (LAYER_ARRAY_BUFFER_SIZE - num_frames < this->_get_receptive_field()) + if (LAYER_ARRAY_BUFFER_SIZE - numFrames < GetReceptiveField()) { std::stringstream ss; - ss << "Asked to accept a buffer of " << num_frames << " samples, but the buffer is too short (" - << LAYER_ARRAY_BUFFER_SIZE << ") to get out of the recptive field (" << this->_get_receptive_field() + ss << "Asked to accept a buffer of " << numFrames << " samples, but the buffer is too short (" + << LAYER_ARRAY_BUFFER_SIZE << ") to get out of the recptive field (" << GetReceptiveField() << "); copy errors could occur!\n"; throw std::runtime_error(ss.str().c_str()); } - for (size_t i = 0; i < this->_layers.size(); i++) - this->_layers[i].set_num_frames_(num_frames); + for (size_t i = 0; i < mLayers.size(); i++) + mLayers[i].SetNumFrames(numFrames); } -void nam::wavenet::_LayerArray::set_weights_(std::vector::iterator& weights) +void nam::wavenet::LayerArray::SetWeights(weightsIterator& weights) { - this->_rechannel.set_weights_(weights); - for (size_t i = 0; i < this->_layers.size(); i++) - this->_layers[i].set_weights_(weights); - this->_head_rechannel.set_weights_(weights); + mReChannel.SetWeights(weights); + for (size_t i = 0; i < mLayers.size(); i++) + mLayers[i].SetWeights(weights); + mHeadRechannel.SetWeights(weights); } -long nam::wavenet::_LayerArray::_get_channels() const +long nam::wavenet::LayerArray::GetChannels() const { - return this->_layers.size() > 0 ? this->_layers[0].get_channels() : 0; + return mLayers.size() > 0 ? mLayers[0].GetChannels() : 0; } -long nam::wavenet::_LayerArray::_get_receptive_field() const +long nam::wavenet::LayerArray::_GetReceptiveField() const // TODO: why two? { - // TODO remove this and use get_receptive_field() instead! + // TODO remove this and use GetReceptiveField() instead! long res = 1; - for (size_t i = 0; i < this->_layers.size(); i++) - res += (this->_layers[i].get_kernel_size() - 1) * this->_layers[i].get_dilation(); + for (size_t i = 0; i < mLayers.size(); i++) + res += (mLayers[i].GetKernelSize() - 1) * mLayers[i].GetDilation(); return res; } -void nam::wavenet::_LayerArray::_rewind_buffers_() +void nam::wavenet::LayerArray::RewindBuffers() // Consider wrapping instead... // Can make this smaller--largest dilation, not receptive field! { - const long start = this->_get_receptive_field() - 1; - for (size_t i = 0; i < this->_layer_buffers.size(); i++) + const long start = GetReceptiveField() - 1; + for (size_t i = 0; i < mLayerBuffers.size(); i++) { - const long d = (this->_layers[i].get_kernel_size() - 1) * this->_layers[i].get_dilation(); - this->_layer_buffers[i].middleCols(start - d, d) = this->_layer_buffers[i].middleCols(this->_buffer_start - d, d); + const long d = (mLayers[i].GetKernelSize() - 1) * mLayers[i].GetDilation(); + mLayerBuffers[i].middleCols(start - d, d) = mLayerBuffers[i].middleCols(mBufferStart - d, d); } - this->_buffer_start = start; + mBufferStart = start; } // Head ======================================================================= -nam::wavenet::_Head::_Head(const int input_size, const int num_layers, const int channels, const std::string activation) -: _channels(channels) -, _head(num_layers > 0 ? channels : input_size, 1, true) -, _activation(activations::Activation::get_activation(activation)) +nam::wavenet::Head::Head(const int inputSize, const int numLayers, const int channels, const std::string& activation) +: mChannels(channels) +, mHead(numLayers > 0 ? channels : inputSize, 1, true) +, mActivation(activations::Activation::GetActivation(activation)) { - assert(num_layers > 0); - int dx = input_size; - for (int i = 0; i < num_layers; i++) + assert(numLayers > 0); + int dx = inputSize; + for (int i = 0; i < numLayers; i++) { - this->_layers.push_back(Conv1x1(dx, i == num_layers - 1 ? 1 : channels, true)); + mLayers.push_back(Conv1x1(dx, i == numLayers - 1 ? 1 : channels, true)); dx = channels; - if (i < num_layers - 1) - this->_buffers.push_back(Eigen::MatrixXf()); + if (i < numLayers - 1) + mBuffers.push_back(Eigen::MatrixXf()); } } -void nam::wavenet::_Head::set_weights_(std::vector::iterator& weights) +void nam::wavenet::Head::SetWeights(weightsIterator& weights) { - for (size_t i = 0; i < this->_layers.size(); i++) - this->_layers[i].set_weights_(weights); + for (size_t i = 0; i < mLayers.size(); i++) + mLayers[i].SetWeights(weights); } -void nam::wavenet::_Head::process_(Eigen::MatrixXf& inputs, Eigen::MatrixXf& outputs) +void nam::wavenet::Head::Process(Eigen::Ref inputs, Eigen::Ref outputs) { - const size_t num_layers = this->_layers.size(); - this->_apply_activation_(inputs); - if (num_layers == 1) - outputs = this->_layers[0].process(inputs); + const size_t numLayers = mLayers.size(); + ApplyActivation(inputs); + if (numLayers == 1) + outputs = mLayers[0].Process(inputs); else { - this->_buffers[0] = this->_layers[0].process(inputs); - for (size_t i = 1; i < num_layers; i++) + mBuffers[0] = mLayers[0].Process(inputs); + for (size_t i = 1; i < numLayers; i++) { // Asserted > 0 layers - this->_apply_activation_(this->_buffers[i - 1]); - if (i < num_layers - 1) - this->_buffers[i] = this->_layers[i].process(this->_buffers[i - 1]); + ApplyActivation(mBuffers[i - 1]); + if (i < numLayers - 1) + mBuffers[i] = mLayers[i].Process(mBuffers[i - 1]); else - outputs = this->_layers[i].process(this->_buffers[i - 1]); + outputs = mLayers[i].Process(mBuffers[i - 1]); } } } -void nam::wavenet::_Head::set_num_frames_(const long num_frames) +void nam::wavenet::Head::SetNumFrames(const long numFrames) { - for (size_t i = 0; i < this->_buffers.size(); i++) + for (size_t i = 0; i < mBuffers.size(); i++) { - if (this->_buffers[i].rows() == this->_channels && this->_buffers[i].cols() == num_frames) + if (mBuffers[i].rows() == mChannels && mBuffers[i].cols() == numFrames) continue; // Already has correct size - this->_buffers[i].resize(this->_channels, num_frames); - this->_buffers[i].setZero(); + mBuffers[i].resize(mChannels, numFrames); + mBuffers[i].setZero(); } } -void nam::wavenet::_Head::_apply_activation_(Eigen::MatrixXf& x) +void nam::wavenet::Head::ApplyActivation(Eigen::Ref x) { - this->_activation->apply(x); + mActivation->Apply(x); } // WaveNet ==================================================================== -nam::wavenet::WaveNet::WaveNet(const std::vector& layer_array_params, - const float head_scale, const bool with_head, std::vector weights, - const double expected_sample_rate) -: DSP(expected_sample_rate) -, _num_frames(0) -, _head_scale(head_scale) +nam::wavenet::WaveNet::WaveNet(const std::vector& layerArrayParams, + const float headScale, const bool withHead, const std::vector& weights, + const double expectedSampleRate) +: DSP(expectedSampleRate) +, mNumFrames(0) +, mHeadScale(headScale) { - if (with_head) + if (withHead) throw std::runtime_error("Head not implemented!"); - for (size_t i = 0; i < layer_array_params.size(); i++) + for (size_t i = 0; i < layerArrayParams.size(); i++) { - this->_layer_arrays.push_back(nam::wavenet::_LayerArray( - layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size, - layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations, - layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias)); - this->_layer_array_outputs.push_back(Eigen::MatrixXf(layer_array_params[i].channels, 0)); + mLayerArrays.push_back(nam::wavenet::LayerArray( + layerArrayParams[i].mInputSize, layerArrayParams[i].mConditionSize, layerArrayParams[i].mHeadSize, + layerArrayParams[i].mChannels, layerArrayParams[i].mKernelSize, layerArrayParams[i].mDilations, + layerArrayParams[i].mActivation, layerArrayParams[i].mGated, layerArrayParams[i].mHeadBias)); + mLayerArrayOutputs.push_back(Eigen::MatrixXf(layerArrayParams[i].mChannels, 0)); if (i == 0) - this->_head_arrays.push_back(Eigen::MatrixXf(layer_array_params[i].channels, 0)); + mHeadArrays.push_back(Eigen::MatrixXf(layerArrayParams[i].mChannels, 0)); if (i > 0) - if (layer_array_params[i].channels != layer_array_params[i - 1].head_size) + if (layerArrayParams[i].mChannels != layerArrayParams[i - 1].mHeadSize) { std::stringstream ss; - ss << "channels of layer " << i << " (" << layer_array_params[i].channels - << ") doesn't match head_size of preceding layer (" << layer_array_params[i - 1].head_size << "!\n"; + ss << "channels of layer " << i << " (" << layerArrayParams[i].mChannels + << ") doesn't match head_size of preceding layer (" << layerArrayParams[i - 1].mHeadSize << "!\n"; throw std::runtime_error(ss.str().c_str()); } - this->_head_arrays.push_back(Eigen::MatrixXf(layer_array_params[i].head_size, 0)); + mHeadArrays.push_back(Eigen::MatrixXf(layerArrayParams[i].mHeadSize, 0)); } - this->_head_output.resize(1, 0); // Mono output! - this->set_weights_(weights); + mHeadOutput.resize(1, 0); // Mono output! + SetWeights(weights); - _prewarm_samples = 1; - for (size_t i = 0; i < this->_layer_arrays.size(); i++) - _prewarm_samples += this->_layer_arrays[i].get_receptive_field(); + mPrewarmSamples = 1; + for (size_t i = 0; i < mLayerArrays.size(); i++) + mPrewarmSamples += mLayerArrays[i].GetReceptiveField(); } -void nam::wavenet::WaveNet::finalize_(const int num_frames) +void nam::wavenet::WaveNet::Finalize(const int numFrames) { - this->DSP::finalize_(num_frames); - this->_advance_buffers_(num_frames); + DSP::Finalize(numFrames); + AdvanceBuffers(numFrames); } -void nam::wavenet::WaveNet::set_weights_(std::vector& weights) +void nam::wavenet::WaveNet::SetWeights(const std::vector& weights) { - std::vector::iterator it = weights.begin(); - for (size_t i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].set_weights_(it); - // this->_head.set_params_(it); - this->_head_scale = *(it++); + weightsIterator it = weights.begin(); + for (size_t i = 0; i < mLayerArrays.size(); i++) + mLayerArrays[i].SetWeights(it); + // _head.set_params_(it); + mHeadScale = *(it++); if (it != weights.end()) { std::stringstream ss; @@ -293,71 +303,71 @@ void nam::wavenet::WaveNet::set_weights_(std::vector& weights) } } -void nam::wavenet::WaveNet::_advance_buffers_(const int num_frames) +void nam::wavenet::WaveNet::AdvanceBuffers(const int numFrames) { - for (size_t i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].advance_buffers_(num_frames); + for (size_t i = 0; i < mLayerArrays.size(); i++) + mLayerArrays[i].AdvanceBuffers(numFrames); } -void nam::wavenet::WaveNet::_prepare_for_frames_(const long num_frames) +void nam::wavenet::WaveNet::PrepareForFrames(const long numFrames) { - for (size_t i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].prepare_for_frames_(num_frames); + for (size_t i = 0; i < mLayerArrays.size(); i++) + mLayerArrays[i].PrepareForFrames(numFrames); } -void nam::wavenet::WaveNet::_set_condition_array(NAM_SAMPLE* input, const int num_frames) +void nam::wavenet::WaveNet::SetConditionArray(float* input, const int numFrames) { - for (int j = 0; j < num_frames; j++) + for (int j = 0; j < numFrames; j++) { - this->_condition(0, j) = input[j]; + mCondition(0, j) = input[j]; } } -void nam::wavenet::WaveNet::process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) +void nam::wavenet::WaveNet::Process(float* input, float* output, const int numFrames) { - this->_set_num_frames_(num_frames); - this->_prepare_for_frames_(num_frames); - this->_set_condition_array(input, num_frames); + SetNumFrames(numFrames); + PrepareForFrames(numFrames); + SetConditionArray(input, numFrames); // Main layer arrays: // Layer-to-layer // Sum on head output - this->_head_arrays[0].setZero(); - for (size_t i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].process_(i == 0 ? this->_condition : this->_layer_array_outputs[i - 1], this->_condition, - this->_head_arrays[i], this->_layer_array_outputs[i], this->_head_arrays[i + 1]); - // this->_head.process_( - // this->_head_input, - // this->_head_output + mHeadArrays[0].setZero(); + for (size_t i = 0; i < mLayerArrays.size(); i++) + mLayerArrays[i].Process(i == 0 ? mCondition : mLayerArrayOutputs[i - 1], mCondition, + mHeadArrays[i], mLayerArrayOutputs[i], mHeadArrays[i + 1]); + // _head.Process( + // _head_input, + // _head_output //); // Copy to required output array // Hack: apply head scale here; revisit when/if I activate the head. - // assert(this->_head_output.rows() == 1); + // assert(_head_output.rows() == 1); - const long final_head_array = this->_head_arrays.size() - 1; - assert(this->_head_arrays[final_head_array].rows() == 1); - for (int s = 0; s < num_frames; s++) + const long finalHeadArray = mHeadArrays.size() - 1; + assert(mHeadArrays[finalHeadArray].rows() == 1); + for (int s = 0; s < numFrames; s++) { - float out = this->_head_scale * this->_head_arrays[final_head_array](0, s); + float out = mHeadScale * mHeadArrays[finalHeadArray](0, s); output[s] = out; } } -void nam::wavenet::WaveNet::_set_num_frames_(const long num_frames) +void nam::wavenet::WaveNet::SetNumFrames(const long numFrames) { - if (num_frames == this->_num_frames) + if (numFrames == mNumFrames) return; - this->_condition.resize(this->_get_condition_dim(), num_frames); - for (size_t i = 0; i < this->_head_arrays.size(); i++) - this->_head_arrays[i].resize(this->_head_arrays[i].rows(), num_frames); - for (size_t i = 0; i < this->_layer_array_outputs.size(); i++) - this->_layer_array_outputs[i].resize(this->_layer_array_outputs[i].rows(), num_frames); - this->_head_output.resize(this->_head_output.rows(), num_frames); - this->_head_output.setZero(); - - for (size_t i = 0; i < this->_layer_arrays.size(); i++) - this->_layer_arrays[i].set_num_frames_(num_frames); - // this->_head.set_num_frames_(num_frames); - this->_num_frames = num_frames; + mCondition.resize(GetConditionDim(), numFrames); + for (size_t i = 0; i < mHeadArrays.size(); i++) + mHeadArrays[i].resize(mHeadArrays[i].rows(), numFrames); + for (size_t i = 0; i < mLayerArrayOutputs.size(); i++) + mLayerArrayOutputs[i].resize(mLayerArrayOutputs[i].rows(), numFrames); + mHeadOutput.resize(mHeadOutput.rows(), numFrames); + mHeadOutput.setZero(); + + for (size_t i = 0; i < mLayerArrays.size(); i++) + mLayerArrays[i].SetNumFrames(numFrames); + // _head.SetNumFrames(numFrames); + mNumFrames = numFrames; } diff --git a/NAM/wavenet.h b/NAM/wavenet.h index 7ea94f1..b1e6248 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -13,191 +13,191 @@ namespace nam namespace wavenet { // Rework the initialization API slightly. Merge w/ dsp.h later. -class _DilatedConv : public Conv1D +class DilatedConv : public Conv1D { public: - _DilatedConv(const int in_channels, const int out_channels, const int kernel_size, const int bias, + DilatedConv(const int inChannels, const int outChannels, const int kernelSize, const int bias, const int dilation); }; -class _Layer +class Layer { public: - _Layer(const int condition_size, const int channels, const int kernel_size, const int dilation, - const std::string activation, const bool gated) - : _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation) - , _input_mixin(condition_size, gated ? 2 * channels : channels, false) + Layer(const int conditionSize, const int channels, const int kernelSize, const int dilation, + const std::string& activation, const bool gated) + : mConv(channels, gated ? 2 * channels : channels, kernelSize, true, dilation) + , mInputMixin(conditionSize, gated ? 2 * channels : channels, false) , _1x1(channels, channels, true) - , _activation(activations::Activation::get_activation(activation)) - , _gated(gated){}; - void set_weights_(std::vector::iterator& weights); + , mActivation(activations::Activation::GetActivation(activation)) + , mGated(gated){}; + void SetWeights(weightsIterator& weights); // :param `input`: from previous layer // :param `output`: to next layer - void process_(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, Eigen::MatrixXf& head_input, - Eigen::MatrixXf& output, const long i_start, const long j_start); - void set_num_frames_(const long num_frames); - long get_channels() const { return this->_conv.get_in_channels(); }; - int get_dilation() const { return this->_conv.get_dilation(); }; - long get_kernel_size() const { return this->_conv.get_kernel_size(); }; + void Process(const Eigen::Ref input, const Eigen::Ref condition, Eigen::Ref headInput, + Eigen::Ref output, const long i_start, const long j_start); + void SetNumFrames(const long numFrames); + long GetChannels() const { return mConv.GetInChannels(); }; + int GetDilation() const { return mConv.GetDilation(); }; + long GetKernelSize() const { return mConv.GetKernelSize(); }; private: // The dilated convolution at the front of the block - _DilatedConv _conv; + DilatedConv mConv; // Input mixin - Conv1x1 _input_mixin; + Conv1x1 mInputMixin; // The post-activation 1x1 convolution Conv1x1 _1x1; // The internal state Eigen::MatrixXf _z; - activations::Activation* _activation; - const bool _gated; + activations::Activation* mActivation; + const bool mGated; }; class LayerArrayParams { public: - LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_, - const int kernel_size_, const std::vector& dilations_, const std::string activation_, - const bool gated_, const bool head_bias_) - : input_size(input_size_) - , condition_size(condition_size_) - , head_size(head_size_) - , channels(channels_) - , kernel_size(kernel_size_) - , activation(activation_) - , gated(gated_) - , head_bias(head_bias_) + LayerArrayParams(const int inputSize, const int conditionSize, const int headSize, const int channels, + const int kernelSize, const std::vector& dilations, const std::string& activation, + const bool gated, const bool headBias) + : mInputSize(inputSize) + , mConditionSize(conditionSize) + , mHeadSize(headSize) + , mChannels(channels) + , mKernelSize(kernelSize) + , mActivation(activation) + , mGated(gated) + , mHeadBias(headBias) { - for (size_t i = 0; i < dilations_.size(); i++) - this->dilations.push_back(dilations_[i]); + for (size_t i = 0; i < dilations.size(); i++) + mDilations.push_back(dilations[i]); }; - const int input_size; - const int condition_size; - const int head_size; - const int channels; - const int kernel_size; - std::vector dilations; - const std::string activation; - const bool gated; - const bool head_bias; + const int mInputSize; + const int mConditionSize; + const int mHeadSize; + const int mChannels; + const int mKernelSize; + std::vector mDilations; + const std::string mActivation; + const bool mGated; + const bool mHeadBias; }; // An array of layers with the same channels, kernel sizes, activations. -class _LayerArray +class LayerArray { public: - _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels, - const int kernel_size, const std::vector& dilations, const std::string activation, const bool gated, + LayerArray(const int inputSize, const int conditionSize, const int headSize, const int channels, + const int kernelSize, const std::vector& dilations, const std::string& activation, const bool gated, const bool head_bias); - void advance_buffers_(const int num_frames); + void AdvanceBuffers(const int numFrames); // Preparing for frames: // Rewind buffers if needed // Shift index to prepare // - void prepare_for_frames_(const long num_frames); + void PrepareForFrames(const long numFrames); // All arrays are "short". - void process_(const Eigen::MatrixXf& layer_inputs, // Short - const Eigen::MatrixXf& condition, // Short - Eigen::MatrixXf& layer_outputs, // Short - Eigen::MatrixXf& head_inputs, // Sum up on this. - Eigen::MatrixXf& head_outputs // post head-rechannel + void Process(const Eigen::Ref layerInputs, // Short + const Eigen::Ref condition, // Short + Eigen::Ref layerOutputs, // Short + Eigen::Ref headInputs, // Sum up on this. + Eigen::Ref headOutputs // post head-rechannel ); - void set_num_frames_(const long num_frames); - void set_weights_(std::vector::iterator& it); + void SetNumFrames(const long numFrames); + void SetWeights(weightsIterator& it); // "Zero-indexed" receptive field. // E.g. a 1x1 convolution has a z.i.r.f. of zero. - long get_receptive_field() const; + long GetReceptiveField() const; private: - long _buffer_start; + long mBufferStart; // The rechannel before the layers - Conv1x1 _rechannel; + Conv1x1 mReChannel; // Buffers in between layers. // buffer [i] is the input to layer [i]. // the last layer outputs to a short array provided by outside. - std::vector _layer_buffers; + std::vector mLayerBuffers; // The layer objects - std::vector<_Layer> _layers; + std::vector mLayers; // Rechannel for the head - Conv1x1 _head_rechannel; + Conv1x1 mHeadRechannel; - long _get_buffer_size() const { return this->_layer_buffers.size() > 0 ? this->_layer_buffers[0].cols() : 0; }; - long _get_channels() const; + long GetBufferSize() const { return mLayerBuffers.size() > 0 ? mLayerBuffers[0].cols() : 0; }; + long GetChannels() const; // "One-indexed" receptive field // TODO remove! // E.g. a 1x1 convolution has a o.i.r.f. of one. - long _get_receptive_field() const; - void _rewind_buffers_(); + long _GetReceptiveField() const; // TODO: why two! + void RewindBuffers(); }; // The head module // [Act->Conv] x L -class _Head +class Head { public: - _Head(const int input_size, const int num_layers, const int channels, const std::string activation); - void set_weights_(std::vector::iterator& weights); + Head(const int inputSize, const int numLayers, const int channels, const std::string& activation); + void SetWeights(weightsIterator& weights); // NOTE: the head transforms the provided input by applying a nonlinearity // to it in-place! - void process_(Eigen::MatrixXf& inputs, Eigen::MatrixXf& outputs); - void set_num_frames_(const long num_frames); + void Process(Eigen::Ref inputs, Eigen::Ref outputs); + void SetNumFrames(const long numFrames); private: - int _channels; - std::vector _layers; - Conv1x1 _head; - activations::Activation* _activation; + int mChannels; + std::vector mLayers; + Conv1x1 mHead; + activations::Activation* mActivation; // Stores the outputs of the convs *except* the last one, which goes in - // The array `outputs` provided to .process_() - std::vector _buffers; + // The array `outputs` provided to .Process() + std::vector mBuffers; // Apply the activation to the provided array, in-place - void _apply_activation_(Eigen::MatrixXf& x); + void ApplyActivation(Eigen::Ref x); }; // The main WaveNet model class WaveNet : public DSP { public: - WaveNet(const std::vector& layer_array_params, const float head_scale, const bool with_head, - std::vector weights, const double expected_sample_rate = -1.0); + WaveNet(const std::vector& layerArrayParams, const float headScale, const bool withHead, + const std::vector& weights, const double expectedSampleRate = -1.0); ~WaveNet() = default; - void finalize_(const int num_frames) override; - void set_weights_(std::vector& weights); + void Finalize(const int numFrames) override; + void SetWeights(const std::vector& weights); private: - long _num_frames; - std::vector<_LayerArray> _layer_arrays; + long mNumFrames; + std::vector mLayerArrays; // Their outputs - std::vector _layer_array_outputs; + std::vector mLayerArrayOutputs; // Head _head; // Element-wise arrays: - Eigen::MatrixXf _condition; + Eigen::MatrixXf mCondition; // One more than total layer arrays - std::vector _head_arrays; - float _head_scale; - Eigen::MatrixXf _head_output; + std::vector mHeadArrays; + float mHeadScale; + Eigen::MatrixXf mHeadOutput; - void _advance_buffers_(const int num_frames); - void _prepare_for_frames_(const long num_frames); - void process(NAM_SAMPLE* input, NAM_SAMPLE* output, const int num_frames) override; + void AdvanceBuffers(const int numFrames); + void PrepareForFrames(const long numFrames); + void Process(float* input, float* output, const int numFrames) override; - virtual int _get_condition_dim() const { return 1; }; + virtual int GetConditionDim() const { return 1; }; // Fill in the "condition" array that's fed into the various parts of the net. - virtual void _set_condition_array(NAM_SAMPLE* input, const int num_frames); - // Ensure that all buffer arrays are the right size for this num_frames - void _set_num_frames_(const long num_frames); + virtual void SetConditionArray(float* input, const int numFrames); + // Ensure that all buffer arrays are the right size for this numFrames + void SetNumFrames(const long numFrames); }; }; // namespace wavenet }; // namespace nam diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp index 46178d2..5432a53 100644 --- a/tools/benchmodel.cpp +++ b/tools/benchmodel.cpp @@ -1,4 +1,3 @@ -#include "malloc.h" #include #include @@ -11,7 +10,7 @@ using std::chrono::milliseconds; #define AUDIO_BUFFER_SIZE 64 -double buffer[AUDIO_BUFFER_SIZE]; +float buffer[AUDIO_BUFFER_SIZE]; int main(int argc, char* argv[]) { @@ -22,12 +21,12 @@ int main(int argc, char* argv[]) std::cout << "Loading model " << modelPath << "\n"; // Turn on fast tanh approximation - nam::activations::Activation::enable_fast_tanh(); + nam::activations::Activation::EnableFastTanh(); std::unique_ptr model; model.reset(); - model = std::move(nam::get_dsp(modelPath)); + model = std::move(nam::GetDSP(modelPath)); if (model == nullptr) { @@ -45,8 +44,8 @@ int main(int argc, char* argv[]) for (size_t i = 0; i < numBuffers; i++) { - model->process(buffer, buffer, AUDIO_BUFFER_SIZE); - model->finalize_(AUDIO_BUFFER_SIZE); + model->Process(buffer, buffer, AUDIO_BUFFER_SIZE); + model->Finalize(AUDIO_BUFFER_SIZE); } std::cout << "Finished\n"; diff --git a/tools/loadmodel.cpp b/tools/loadmodel.cpp index 8a1b889..e415b27 100644 --- a/tools/loadmodel.cpp +++ b/tools/loadmodel.cpp @@ -9,7 +9,7 @@ int main(int argc, char* argv[]) fprintf(stderr, "Loading model [%s]\n", modelPath); - auto model = nam::get_dsp(modelPath); + auto model = nam::GetDSP(modelPath); if (model != nullptr) {