From c266a3a1c02c7debb4962567a67385249f443fb8 Mon Sep 17 00:00:00 2001 From: pr0m1th3as Date: Sun, 25 Aug 2024 00:49:10 +0300 Subject: [PATCH] update Makefile --- src/Makefile | 4 +- src/fcnn.cpp | 615 +++++++++++++++++++++++++++++++++++++++++++++ src/fcnnpredict.cc | 2 +- src/fcnntrain.cc | 2 +- 4 files changed, 619 insertions(+), 4 deletions(-) create mode 100644 src/fcnn.cpp diff --git a/src/Makefile b/src/Makefile index 5457a500..58fdcb87 100644 --- a/src/Makefile +++ b/src/Makefile @@ -6,5 +6,5 @@ all: $(MKOCTFILE) libsvmwrite.cc $(MKOCTFILE) svmpredict.cc svm.cpp svm_model_octave.cc $(MKOCTFILE) svmtrain.cc svm.cpp svm_model_octave.cc - $(MKOCTFILE) fcnnntrain.cc - $(MKOCTFILE) fcnnnpredict.cc + $(MKOCTFILE) fcnnntrain.cc fcnn.cpp + $(MKOCTFILE) fcnnnpredict.cc fcnn.cpp diff --git a/src/fcnn.cpp b/src/fcnn.cpp new file mode 100644 index 00000000..61a922da --- /dev/null +++ b/src/fcnn.cpp @@ -0,0 +1,615 @@ +/* +Copyright (C) 2024 Andreas Bertsatos + +This file is part of the statistics package for GNU Octave. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program; if not, see . +*/ + +#include +#include +#include +#include +#include +#include + +using namespace std; + +// Helper functions + +// Return random number between -1 and 1 +double get_random () +{ + // return random number between -1 and 1 + return (rand () / (double) RAND_MAX) * 2 - 1; +} + +// Compute accuracy of predicted samples during training +double accuracy (vector predictions, vector labels) +{ + double correct = 0.0; + for (int i = 0; i < predictions.size (); i++) + { + if (predictions[i] == labels[i] - 1) + { + correct += 1.0; + } + } + double accuracy = correct / (double) predictions.size (); + return accuracy; +} + +// Class definitions + +class Neuron +{ +public: + // constructor + Neuron (int input_size); + // destructor + ~Neuron (); + + // methods + double forward (vector inputs); + void backward (vector last_input, double grad); + void descend (double learning_rate); + vector get_neuron (); + void set_neuron (vector Wb_vector); + void zero_gradient (); + + // data + vector weights; + vector wgrad; + double bias; + double bgrad; +}; + +Neuron::Neuron (int input_size) +{ + this->weights = vector (input_size); + this->bias = 0.01 * get_random (); + for (int i = 0; i < input_size; i++) + { + this->weights[i] = get_random (); + } +} +Neuron::~Neuron () {} + +double Neuron::forward (vector inputs) +{ + double total = this->bias; + for (int i = 0; i < inputs.size (); i++) + { + total += inputs[i] * this->weights[i]; + } + return total; +} + +void Neuron::backward (vector last_input, double grad) +{ + this->bgrad += grad; + for (int i = 0; i < this->wgrad.size (); i++) + { + this->wgrad.at (i) = this->wgrad.at (i) + grad * last_input.at (i); + } +} + +void Neuron::descend (double learning_rate) +{ + this->bias -= this->bgrad * learning_rate; + for (int i = 0; i < this->weights.size (); i++) + { + this->weights.at (i) -= this->wgrad.at (i) * learning_rate; + } +} + +vector Neuron::get_neuron () +{ + vector Wb_vector = this->weights; + Wb_vector.push_back (this->bias); + return Wb_vector; +} + +void Neuron::set_neuron (vector Wb_vector) +{ + int w_len = Wb_vector.size () - 1; + for (int i; i < w_len; i++) + { + this->weights.at (i) = Wb_vector[i]; + } + this->bias = Wb_vector[w_len]; +} + +void Neuron::zero_gradient () +{ + this->wgrad = vector (this->weights.size ()); + this->bgrad = 0.0; +} + +class DenseLayer +{ +public: + // constructor + DenseLayer (int input_size, int output_size); + // destructor + ~DenseLayer (); + + // methods + vector forward (vector inputs); + void backward (vector grad); + void descend (double learning_rate); + vector> get_layer (); + void set_layer (vector> Wb_matrix); + void zero_gradient (); + + // data + vector neurons; + vector last_input; +}; + +DenseLayer::DenseLayer (int input_size, int output_size) +{ + // initialize neurons + this->neurons = vector (); + for (int i = 0; i < output_size; i++) + { + Neuron to_add = Neuron (input_size); + this->neurons.push_back (to_add); + } +} +DenseLayer::~DenseLayer () {} + +vector DenseLayer::forward (vector inputs) +{ + this->last_input = inputs; + vector outputs = vector (this->neurons.size()); + for (int i = 0; i < this->neurons.size (); i++) + { + outputs[i] = this->neurons[i].forward (inputs); + } + return outputs; +} + +void DenseLayer::backward (vector grad) +{ + for (int i = 0; i < this->neurons.size (); i++) + { + this->neurons[i].backward (last_input, grad[i]); + } +} + +void DenseLayer::descend (double learning_rate) +{ + for (int i = 0; i < this->neurons.size (); i++) + { + this->neurons[i].descend (learning_rate); + } +} + +vector> DenseLayer::get_layer () +{ + vector> Wb_matrix; + for (int i = 0; i < this->neurons.size (); i++) + { + vector WB_vector = this->neurons[i].get_neuron (); + Wb_matrix.push_back (WB_vector); + } + return Wb_matrix; +} + +void DenseLayer::set_layer (vector> Wb_matrix) +{ + for (int i; i < Wb_matrix.size (); i++) + { + this->neurons[i].set_neuron (Wb_matrix[i]); + } +} + +void DenseLayer::zero_gradient () +{ + for (int i = 0; i < this->neurons.size (); i++) + { + this->neurons[i].zero_gradient (); + } +} + +class ActivationLayer +{ +public: + // constructor + ActivationLayer (int activation, double alpha); + // destructor + ~ActivationLayer (); + + // methods + vector forward (vector inputs); + void backward (vector grad); + void backward (DenseLayer &prev_layer); + + // data + vector last_input; + vector grad; + vector last_output; +private: + int activation; + double alpha; +}; + +ActivationLayer::ActivationLayer (int activation, double alpha) +{ + this->activation = activation; + this->alpha = alpha; +} +ActivationLayer::~ActivationLayer () {} + +vector ActivationLayer::forward (vector inputs) +{ + this->last_input = inputs; + vector outputs = vector (inputs.size()); + if (this->activation == 0) // 'Linear' + { + outputs = inputs; + } + else if (this->activation == 1) // Sigmoid function + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < inputs.size(); i++) + { + outputs[i] = 1 / (1 + exp (-inputs[i])); + } + } + } + else if (this->activation == 2) // Rectified Linear Unit (ReLU) + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < inputs.size(); i++) + { + outputs[i] = inputs[i] > 0 ? inputs[i] : 0; + } + } + } + else if (this->activation == 3) // Hyperbolic tangent (tanh) + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < inputs.size(); i++) + { + double ex = exp (inputs[i]); + double e_x = exp (-inputs[i]); + outputs[i] = (ex - e_x) / (ex + e_x); + } + } + } + else if (this->activation == 4) // Softmax activation + { + double total = 0.0; + double maxel = *max_element (inputs.begin (), inputs.end ()); + for (int i = 0; i < inputs.size(); i++) + { + outputs[i] = exp (inputs[i] - maxel); + total += outputs[i]; + } + for (int i = 0; i < inputs.size(); i++) + { + outputs[i] /= total; + } + } + else if (this->activation == 5) // Parametric or Leaky ReLU + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < inputs.size(); i++) + { + outputs[i] = inputs[i] >= 0 ? inputs[i] : inputs[i] * this->alpha; + } + } + } + else if (this->activation == 6) // Exponential Linear Unit (ELU) + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < inputs.size(); i++) + { + outputs[i] = inputs[i] >= 0 ? inputs[i] : (exp (inputs[i]) - 1) + * this->alpha; + } + } + } + else if (this->activation == 7) // Gaussian Error Linear Unit (GELU) + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < inputs.size(); i++) + { + double x_3 = pow (inputs[i], 3) * 0.044715; + outputs[i] = 0.5 * inputs[i] * (tanh (sqrt (2 / M_PI) + * (inputs[i] + x_3))); + } + } + } + this->last_output = outputs; + return outputs; +} + +void ActivationLayer::backward (vector chain_grad) +{ + this->grad = vector (this->last_input.size ()); + if (this->activation == 0) // 'Linear' + { + this->grad = chain_grad; + } + else if (this->activation == 1) // Sigmoid function + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < this->last_input.size (); i++) + { + this->grad[i] = this->last_output[i] * (1 - this->last_output[i]) + * chain_grad[i]; + } + } + } + else if (this->activation == 2) // Rectified Linear Unit (ReLU) + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < last_input.size(); i++) + { + this->grad[i] = this->last_input[i] > 0 ? chain_grad[i] : 0; + } + } + } + else if (this->activation == 3) // Hyperbolic tangent (tanh) + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < last_input.size(); i++) + { + this->grad[i] = (1 - pow (this->last_output[i], 2)) * chain_grad[i]; + } + } + } + else if (this->activation == 4) // Softmax activation + { + // WARNING: this code may be incorrect + this->grad = chain_grad; + } + else if (this->activation == 5) // Parametric or Leaky ReLU + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < last_input.size(); i++) + { + this->grad[i] = this->last_input[i] >= 0 ? chain_grad[i] : + chain_grad[i] * this->alpha; + } + } + } + else if (this->activation == 6) // Exponential Linear Unit (ELU) + { + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < last_input.size(); i++) + { + this->grad[i] = this->last_input[i] >= 0 ? chain_grad[i] : chain_grad[i] + * exp (this->last_output[i]) * this->alpha; + } + } + } + else if (this->activation == 7) // Gaussian Error Linear Unit (GELU) + { + // WARNING: this code may be incorrect + static const double inv_sqrt_2pi = 0.3989422804014327; + #pragma omp parallel + { + #pragma omp parallel for + for (int i = 0; i < last_input.size(); i++) + { + double pdf_val = inv_sqrt_2pi * exp (-0.5 * this->last_output[i] + * this->last_output[i]); + this->grad[i] = this->last_output[i] * pdf_val * chain_grad[i]; + } + } + } +} + +void ActivationLayer::backward (DenseLayer &prev_layer) +{ + this->grad = vector (this->last_input.size ()); + if (this->activation == 0) // 'Linear' + { + for (int i = 0; i < prev_layer.last_input.size (); i++) + { + for (int n = 0; n < prev_layer.neurons.size (); n++) + { + double curr_grad = this->last_output[n]; + double chain_grad = prev_layer.neurons[n].weights[i] * + prev_layer.neurons[n].wgrad[i] / + prev_layer.last_input[i]; + this->grad[i] += curr_grad * chain_grad; + } + } + } + else if (this->activation == 1) // Sigmoid function + { + for (int i = 0; i < prev_layer.last_input.size (); i++) + { + for (int n = 0; n < prev_layer.neurons.size (); n++) + { + double curr_grad = this->last_output[n] * (1 - this->last_output[n]); + double chain_grad = prev_layer.neurons[n].weights[i] * + prev_layer.neurons[n].wgrad[i] / + prev_layer.last_input[i]; + this->grad[i] += curr_grad * chain_grad; + } + } + } + else if (this->activation == 2) // Rectified Linear Unit (ReLU) + { + for (int i = 0; i < prev_layer.last_input.size(); i++) + { + for (int n = 0; n < prev_layer.neurons.size(); n++) + { + double grad = prev_layer.neurons[n].wgrad[i] / prev_layer.last_input[i]; + if (this->last_input[i] < 0) + { + this->grad[i] = 0; + } + else + { + this->grad[i] += prev_layer.neurons[n].weights[i] * grad; + } + } + } + } + else if (this->activation == 3) // Hyperbolic tangent (tanh) + { + for (int i = 0; i < prev_layer.last_input.size (); i++) + { + for (int n = 0; n < prev_layer.neurons.size (); n++) + { + double curr_grad = this->last_output[n] * + (1 - pow (this->last_output[i], 2)); + double chain_grad = prev_layer.neurons[n].weights[i] * + prev_layer.neurons[n].wgrad[i] / + prev_layer.last_input[i]; + this->grad[i] += curr_grad * chain_grad; + } + } + } + else if (this->activation == 4) // Softmax activation + { + // WARNING: this code may be incorrect + for (int i = 0; i < prev_layer.last_input.size (); i++) + { + for (int n = 0; n < prev_layer.neurons.size (); n++) + { + double curr_grad = this->last_output[n]; + double chain_grad = prev_layer.neurons[n].weights[i] * + prev_layer.neurons[n].wgrad[i] / + prev_layer.last_input[i]; + this->grad[i] += curr_grad * chain_grad; + } + } + } + else if (this->activation == 5) // Parametric or Leaky ReLU + { + for (int i = 0; i < prev_layer.last_input.size(); i++) + { + for (int n = 0; n < prev_layer.neurons.size(); n++) + { + double grad = prev_layer.neurons[n].wgrad[i] / prev_layer.last_input[i]; + this->grad[i] += prev_layer.neurons[n].weights[i] * grad; + } + } + for (int i = 0; i < this->last_input.size(); i++) + { + this->grad[i] = this->last_input[i] >= 0 ? this->grad[i] : + this->grad[i] * this->alpha; + } + } + else if (this->activation == 6) // Exponential Linear Unit (ELU) + { + for (int i = 0; i < prev_layer.last_input.size(); i++) + { + for (int n = 0; n < prev_layer.neurons.size(); n++) + { + double grad = prev_layer.neurons[n].wgrad[i] / prev_layer.last_input[i]; + this->grad[i] += prev_layer.neurons[n].weights[i] * grad; + } + } + for (int i = 0; i < this->last_input.size(); i++) + { + this->grad[i] = this->last_input[i] >= 0 ? this->grad[i] : this->grad[i] + * exp (this->last_output[i]) * this->alpha; + } + } + else if (this->activation == 7) // Gaussian Error Linear Unit (GELU) + { + // WARNING: this code may be incorrect + static const double inv_sqrt_2pi = 0.3989422804014327; + for (int i = 0; i < prev_layer.last_input.size (); i++) + { + for (int n = 0; n < prev_layer.neurons.size (); n++) + { + double pdf_val = inv_sqrt_2pi * exp (-0.5 * this->last_output[i] + * this->last_output[i]); + double curr_grad = this->last_output[n] * pdf_val * this->last_output[i]; + double chain_grad = prev_layer.neurons[n].weights[i] * + prev_layer.neurons[n].wgrad[i] / + prev_layer.last_input[i]; + this->grad[i] += curr_grad * chain_grad; + } + } + } +} + +class MeanSquaredErrorLoss +{ +public: + MeanSquaredErrorLoss (); + ~MeanSquaredErrorLoss (); + + double forward (vector inputs, vector targets); + void backward (double grad); + + // data + vector last_input; + vector last_target; + vector grad; +}; + +MeanSquaredErrorLoss::MeanSquaredErrorLoss () {} +MeanSquaredErrorLoss::~MeanSquaredErrorLoss () {} + +double MeanSquaredErrorLoss::forward (vector inputs, + vector targets) +{ + // we only need to calculate the loss for the target class + this->last_input = inputs; + this->last_target = targets; + + double total = 0; + + for (int i = 0; i < inputs.size (); i++) + { + total += pow (inputs[i] - targets[i], 2); + } + + double loss = total; + return loss; +} + +void MeanSquaredErrorLoss::backward (double grad) +{ + this->grad = vector (this->last_input.size ()); + for (int i = 0; i < this->last_input.size (); i++) + { + this->grad.at(i) = 2 * this->last_input[i] - this->last_target[i]; + this->grad.at(i) *= grad; + } +} diff --git a/src/fcnnpredict.cc b/src/fcnnpredict.cc index 42bdaa82..9921f3f1 100644 --- a/src/fcnnpredict.cc +++ b/src/fcnnpredict.cc @@ -26,7 +26,7 @@ this program; if not, see . #include #include #include -#include "fcnn.cc" +#include "fcnn.cpp" using namespace std; diff --git a/src/fcnntrain.cc b/src/fcnntrain.cc index 6f7fa9db..18a1beae 100644 --- a/src/fcnntrain.cc +++ b/src/fcnntrain.cc @@ -26,7 +26,7 @@ this program; if not, see . #include #include #include -#include "fcnn.cc" +#include "fcnn.cpp" using namespace std;