From ab71bda8de0507770b04835c5c4838e91f3c6261 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Fri, 20 Sep 2024 17:08:09 -0300 Subject: [PATCH] Provide a way to set gradients to none (#1195) * Provide a way to set gradients to none * Use a different implementation --- R/RcppExports.R | 4 ++-- R/optim.R | 4 ++-- inst/include/lantern/lantern.h | 6 +++--- src/RcppExports.cpp | 9 +++++---- src/autograd.cpp | 4 ++-- src/lantern/include/lantern/lantern.h | 6 +++--- src/lantern/src/Autograd.cpp | 16 ++++++++++------ tests/testthat/test-optim-sgd.R | 12 ++++++++++++ 8 files changed, 39 insertions(+), 22 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 3e702301fe..485aa3c3e1 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -185,8 +185,8 @@ cpp_set_cuda_allocator_allocator_thresholds <- function(reserved_rate, allocated invisible(.Call(`_torch_cpp_set_cuda_allocator_allocator_thresholds`, reserved_rate, allocated_rate, allocated_reserved_rate)) } -cpp_autograd_zero_grad <- function(x) { - invisible(.Call(`_torch_cpp_autograd_zero_grad`, x)) +cpp_autograd_zero_grad <- function(x, set_to_none) { + invisible(.Call(`_torch_cpp_autograd_zero_grad`, x, set_to_none)) } cpp_backends_mkldnn_is_available <- function() { diff --git a/R/optim.R b/R/optim.R index e0469ce892..7e070ef5a1 100644 --- a/R/optim.R +++ b/R/optim.R @@ -82,9 +82,9 @@ Optimizer <- R6::R6Class( self$param_groups <- append(self$param_groups, list(param_group)) }, - zero_grad = function() { + zero_grad = function(set_to_none = FALSE) { for (group in self$param_groups) { - cpp_autograd_zero_grad(group$params) + cpp_autograd_zero_grad(group$params, set_to_none) } }, state_dict = function() { diff --git a/inst/include/lantern/lantern.h b/inst/include/lantern/lantern.h index 633920c3e2..fbcf7c6d6f 100644 --- a/inst/include/lantern/lantern.h +++ b/inst/include/lantern/lantern.h @@ -2405,10 +2405,10 @@ HOST_API void* lantern_IntArrayRef_get (void* x) return ret; } -LANTERN_API void (LANTERN_PTR _lantern_autograd_zero_grad) (void * self); -HOST_API void lantern_autograd_zero_grad (void * self) { +LANTERN_API void (LANTERN_PTR _lantern_autograd_zero_grad) (void * self, bool set_to_none); +HOST_API void lantern_autograd_zero_grad (void * self, bool set_to_none) { LANTERN_CHECK_LOADED - _lantern_autograd_zero_grad(self); + _lantern_autograd_zero_grad(self, set_to_none); LANTERN_HOST_HANDLER; } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 8554f6620e..44b14d42a0 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -518,12 +518,13 @@ BEGIN_RCPP END_RCPP } // cpp_autograd_zero_grad -void cpp_autograd_zero_grad(torch::TensorList x); -RcppExport SEXP _torch_cpp_autograd_zero_grad(SEXP xSEXP) { +void cpp_autograd_zero_grad(torch::TensorList x, bool set_to_none); +RcppExport SEXP _torch_cpp_autograd_zero_grad(SEXP xSEXP, SEXP set_to_noneSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< torch::TensorList >::type x(xSEXP); - cpp_autograd_zero_grad(x); + Rcpp::traits::input_parameter< bool >::type set_to_none(set_to_noneSEXP); + cpp_autograd_zero_grad(x, set_to_none); return R_NilValue; END_RCPP } @@ -45647,7 +45648,7 @@ static const R_CallMethodDef CallEntries[] = { {"_torch_cpp_autograd_grad", (DL_FUNC) &_torch_cpp_autograd_grad, 6}, {"_torch_cpp_set_lantern_allocator", (DL_FUNC) &_torch_cpp_set_lantern_allocator, 1}, {"_torch_cpp_set_cuda_allocator_allocator_thresholds", (DL_FUNC) &_torch_cpp_set_cuda_allocator_allocator_thresholds, 3}, - {"_torch_cpp_autograd_zero_grad", (DL_FUNC) &_torch_cpp_autograd_zero_grad, 1}, + {"_torch_cpp_autograd_zero_grad", (DL_FUNC) &_torch_cpp_autograd_zero_grad, 2}, {"_torch_cpp_backends_mkldnn_is_available", (DL_FUNC) &_torch_cpp_backends_mkldnn_is_available, 0}, {"_torch_cpp_backends_mkl_is_available", (DL_FUNC) &_torch_cpp_backends_mkl_is_available, 0}, {"_torch_cpp_backends_openmp_is_available", (DL_FUNC) &_torch_cpp_backends_openmp_is_available, 0}, diff --git a/src/autograd.cpp b/src/autograd.cpp index 6ad799255e..994a692f89 100644 --- a/src/autograd.cpp +++ b/src/autograd.cpp @@ -416,6 +416,6 @@ void cpp_set_cuda_allocator_allocator_thresholds (double reserved_rate, double a } // [[Rcpp::export]] -void cpp_autograd_zero_grad (torch::TensorList x) { - lantern_autograd_zero_grad(x.get()); +void cpp_autograd_zero_grad (torch::TensorList x, bool set_to_none) { + lantern_autograd_zero_grad(x.get(), set_to_none); } diff --git a/src/lantern/include/lantern/lantern.h b/src/lantern/include/lantern/lantern.h index 633920c3e2..fbcf7c6d6f 100644 --- a/src/lantern/include/lantern/lantern.h +++ b/src/lantern/include/lantern/lantern.h @@ -2405,10 +2405,10 @@ HOST_API void* lantern_IntArrayRef_get (void* x) return ret; } -LANTERN_API void (LANTERN_PTR _lantern_autograd_zero_grad) (void * self); -HOST_API void lantern_autograd_zero_grad (void * self) { +LANTERN_API void (LANTERN_PTR _lantern_autograd_zero_grad) (void * self, bool set_to_none); +HOST_API void lantern_autograd_zero_grad (void * self, bool set_to_none) { LANTERN_CHECK_LOADED - _lantern_autograd_zero_grad(self); + _lantern_autograd_zero_grad(self, set_to_none); LANTERN_HOST_HANDLER; } diff --git a/src/lantern/src/Autograd.cpp b/src/lantern/src/Autograd.cpp index 890f68137d..7202506e77 100644 --- a/src/lantern/src/Autograd.cpp +++ b/src/lantern/src/Autograd.cpp @@ -296,14 +296,18 @@ void *_lantern_Edge_function(void *self) { LANTERN_FUNCTION_END } -void _lantern_autograd_zero_grad (void * self) { +void _lantern_autograd_zero_grad (void * self, bool set_to_none) { LANTERN_FUNCTION_START auto list = from_raw::TensorList(self); - for (auto &t : list) { - auto grad = t.grad(); - if (grad.defined()) { - grad.zero_(); - } + for (auto &p : list) { + if (p.mutable_grad().defined()) { + p.mutable_grad().detach_(); + if (set_to_none) { + p.mutable_grad().reset(); + } else { + p.mutable_grad().zero_(); + } + } } LANTERN_FUNCTION_END_VOID } diff --git a/tests/testthat/test-optim-sgd.R b/tests/testthat/test-optim-sgd.R index 2cb7255e6b..a808c121b2 100644 --- a/tests/testthat/test-optim-sgd.R +++ b/tests/testthat/test-optim-sgd.R @@ -53,4 +53,16 @@ test_that("copy state between optimizers corecctly", { opt$zero_grad() expect_equal_to_tensor(x, y) +}) + +test_that("zero_grad set_to_none", { + # start with a tensor and make one step in the optimize + x <- torch_tensor(1, requires_grad = TRUE) + + opt <- optim_sgd(x, lr = 0.1) + (2*x)$backward() + opt$step() + opt$zero_grad(set_to_none = TRUE) + + expect_true(is_undefined_tensor(x$grad)) }) \ No newline at end of file