Fix type for LeastSquares gradient (JuliaFirstOrder#134)

* add gradient test util * fix function value type
nHackel · Sep 29, 2021 · fc46398 · fc46398
1 parent c9a250f
commit fc46398
Show file tree

Hide file tree

Showing 16 changed files with 44 additions and 40 deletions.
diff --git a/src/functions/leastSquaresDirect.jl b/src/functions/leastSquaresDirect.jl
@@ -128,7 +128,7 @@ function gradient!(y::AbstractArray{C, N}, f::LeastSquaresDirect{N, R, C, M, V,
     f.res .-= f.b
     mul!(y, adjoint(f.A), f.res)
     y .*= f.lambda
-    fy = (f.lambda/2)*dot(f.res, f.res)
+    return (f.lambda / 2) * real(dot(f.res, f.res))
 end
 
 function prox_naive(f::LeastSquaresDirect{N, R, C}, x::AbstractArray{C, N}, gamma::R=R(1)) where {N, R, C <: RealOrComplex{R}}

diff --git a/src/functions/leastSquaresIterative.jl b/src/functions/leastSquaresIterative.jl
@@ -70,7 +70,7 @@ function gradient!(y::AbstractArray{D, N}, f::LeastSquaresIterative{N, R, RC, M,
     f.res .-= f.b
     mul!(y, adjoint(f.A), f.res)
     y .*= f.lambda
-    fy = (f.lambda/2)*dot(f.res, f.res)
+    return (f.lambda / 2) * real(dot(f.res, f.res))
 end
 
 function prox_naive(f::LeastSquaresIterative{N}, x::AbstractArray{D, N}, gamma::R=R(1)) where {N, R, D <: RealOrComplex{R}}

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -67,6 +67,13 @@ function prox_test(f, x::ArrayOrTuple{R}, gamma=R(1)) where R <: Real
     return y, fy
 end
 
+# tests equality of the results of prox, prox! and prox_naive
+function gradient_test(f, x::ArrayOrTuple{R}, gamma=R(1)) where R <: Real
+    grad_fx, fx = gradient(f, x)
+    @test typeof(fx) == R
+    return grad_fx, fx
+end
+
 # test predicates consistency
 # i.e., that more specific properties imply less specific ones
 # e.g., the indicator of a subspace is the indicator of a set in particular

diff --git a/test/test_cubeNormL2.jl b/test/test_cubeNormL2.jl
@@ -14,7 +14,7 @@ for R in [Float16, Float32, Float64]
             call_test(f, x)
             gamma = R(0.5)+rand(R)
             y, f_y = prox_test(f, x, gamma)
-            grad_f_y, f_y = gradient(f, y)
+            grad_f_y, f_y = gradient_test(f, y)
             @test grad_f_y ≈ (x - y)/gamma
         end
     end

diff --git a/test/test_gradients.jl b/test/test_gradients.jl
@@ -149,14 +149,13 @@ for i = 1:length(stuff)
   ref_∇f = stuff[i]["∇f(x)"]
 
   ref_fx = f(x)
-  ∇f = similar(x)
-  fx = gradient!(∇f, f, x)
+  ∇f, fx = gradient_test(f, x)
   @test fx ≈ ref_fx
   @test ∇f ≈ ref_∇f
 
   for j = 1:11
     #For initial point x and 10 other random points
-    fx = gradient!(∇f, f, x)
+    ∇f, fx = gradient_test(f, x)
     for k = 1:10
       # Test conditions in different directions
       if ProximalOperators.is_convex(f)

diff --git a/test/test_huberLoss.jl b/test/test_huberLoss.jl
@@ -17,7 +17,7 @@ x = 1.6*x/norm(x)
 
 call_test(f, x)
 prox_test(f, x, 1.3)
-grad_fx, fx = gradient(f, x)
+grad_fx, fx = gradient_test(f, x)
 
 @test abs(fx - f(x)) <= 1e-12
 @test norm(0.7*1.5*x/norm(x) - grad_fx, Inf) <= 1e-12
@@ -27,7 +27,7 @@ x = 1.4*x/norm(x)
 
 call_test(f, x)
 prox_test(f, x, 0.9)
-grad_fx, fx = gradient(f, x)
+grad_fx, fx = gradient_test(f, x)
 
 @test abs(fx - f(x)) <= 1e-12
 @test norm(0.7*x - grad_fx, Inf) <= 1e-12

diff --git a/test/test_leastSquares.jl b/test/test_leastSquares.jl
@@ -38,7 +38,7 @@ predicates_test(f)
 @test ProximalOperators.is_generalized_quadratic(f) == true
 @test ProximalOperators.is_set(f) == false
 
-grad_fx, fx = gradient(f, x)
+grad_fx, fx = gradient_test(f, x)
 lsres = A*x - b
 @test fx ≈ 0.5*norm(lsres)^2
 @test all(grad_fx .≈ (A'*lsres))
@@ -51,7 +51,7 @@ lam = R(0.1) + rand(R)
 f = LeastSquares(A, b, lam, iterative=(mode == :iterative))
 predicates_test(f)
 
-grad_fx, fx = gradient(f, x)
+grad_fx, fx = gradient_test(f, x)
 @test fx ≈ (lam/2)*norm(lsres)^2
 @test all(grad_fx .≈ lam*(A'*lsres))
 

diff --git a/test/test_linear.jl b/test/test_linear.jl
@@ -11,7 +11,7 @@ for R in [Float16, Float32, Float64]
         f = Linear(c)
         predicates_test(f)
         x = randn(R, shape)
-        @test gradient(f, x) == (c, f(x))
+        @test gradient_test(f, x) == (c, f(x))
         call_test(f, x)
         prox_test(f, x, R(0.5)+rand(R))
     end

diff --git a/test/test_logisticLoss.jl b/test/test_logisticLoss.jl
@@ -15,7 +15,7 @@ f_x_1 = f(x)
 
 @test typeof(f_x_1) == T
 
-grad_f_x, f_x_2 = gradient(f, x)
+grad_f_x, f_x_2 = gradient_test(f, x)
 
 f_x_ref = 5.893450123044199
 grad_f_x_ref = [-1.0965878679450072, 0.17880438303317633, -0.07113880976635019, 1.3211956169668235, -0.4034121320549927]
@@ -25,13 +25,13 @@ grad_f_x_ref = [-1.0965878679450072, 0.17880438303317633, -0.07113880976635019,
 @test all(grad_f_x .≈ grad_f_x_ref)
 
 z1, f_z1 = prox(f, x)
-grad_f_z1, = gradient(f, z1)
+grad_f_z1, = gradient_test(f, z1)
 
 @test typeof(f_z1) == T
 @test norm((x - z1)./1.0 - grad_f_z1, Inf)/norm(grad_f_z1, Inf) <= 1e-4
 
 z2, f_z2 = prox(f, x, T(2.0))
-grad_f_z2, = gradient(f, z2)
+grad_f_z2, = gradient_test(f, z2)
 
 @test typeof(f_z2) == T
 @test norm((x - z2)./2.0 - grad_f_z2, Inf)/norm(grad_f_z2, Inf) <= 1e-4

diff --git a/test/test_moreauEnvelope.jl b/test/test_moreauEnvelope.jl
@@ -20,10 +20,10 @@ using LinearAlgebra
 
         x = R[1.0, 2.0, 3.0, 4.0, 5.0]
 
-        grad_g_x, g_x = gradient(g, x)
+        grad_g_x, g_x = gradient_test(g, x)
 
         y, g_y = prox_test(g, x, R(1/2))
-        grad_g_y, _ = gradient(g, y)
+        grad_g_y, _ = gradient_test(g, y)
 
         @test y + grad_g_y / 2 ≈ x
         @test g(y) ≈ g_y
@@ -48,15 +48,15 @@ end
 
         @test g(x) ≈ h(x)
 
-        grad_g_x, g_x = gradient(g, x)
-        grad_h_x, h_x = gradient(h, x)
+        grad_g_x, g_x = gradient_test(g, x)
+        grad_h_x, h_x = gradient_test(h, x)
 
         @test g_x ≈ g(x)
         @test h_x ≈ h(x)
         @test all(grad_g_x .≈ grad_h_x)
 
         y, g_y = prox_test(g, x, R(1/2))
-        grad_g_y, _ = gradient(g, y)
+        grad_g_y, _ = gradient_test(g, y)
 
         @test y + grad_g_y / 2 ≈ x
         @test g(y) ≈ g_y

diff --git a/test/test_postcompose.jl b/test/test_postcompose.jl
@@ -14,8 +14,8 @@ hx = call_test(h, x)
 
 @test abs(gx-hx)/(1+abs(gx)) <= 1e-12
 
-grad_gx, gx1 = gradient(g, x)
-grad_hx, hx1 = gradient(h, x)
+grad_gx, gx1 = gradient_test(g, x)
+grad_hx, hx1 = gradient_test(h, x)
 
 @test abs(gx1-gx)/(1+abs(gx)) <= 1e-12
 @test abs(hx1-hx)/(1+abs(hx)) <= 1e-12
@@ -35,8 +35,8 @@ hx = call_test(h, x)
 
 @test abs(gx-hx)/(1+abs(gx)) <= 1e-12
 
-grad_gx, gx1 = gradient(g, x)
-grad_hx, hx1 = gradient(h, x)
+grad_gx, gx1 = gradient_test(g, x)
+grad_hx, hx1 = gradient_test(h, x)
 
 @test abs(gx1-gx)/(1+abs(gx)) <= 1e-12
 @test abs(hx1-hx)/(1+abs(hx)) <= 1e-12

diff --git a/test/test_precompose.jl b/test/test_precompose.jl
@@ -137,17 +137,17 @@ grad_gx = diagA.*diagA.*(x - b)
 @test abs(g3(x) - gx)/(1+abs(gx)) <= 1e-14
 
 call_test(g1, x)
-grad_g1_x, g1_x = gradient(g1, x)
+grad_g1_x, g1_x = gradient_test(g1, x)
 @test abs(g1_x - gx) <= (1 + abs(gx))*1e-12
 @test norm(grad_gx - grad_g1_x, Inf) <= 1e-12
 
 call_test(g2, x)
-grad_g2_x, g2_x = gradient(g2, x)
+grad_g2_x, g2_x = gradient_test(g2, x)
 @test abs(g2_x - gx) <= (1 + abs(gx))*1e-12
 @test norm(grad_gx - grad_g2_x, Inf) <= 1e-12
 
 call_test(g3, x)
-grad_g3_x, g3_x = gradient(g3, x)
+grad_g3_x, g3_x = gradient_test(g3, x)
 @test abs(g3_x - gx) <= (1 + abs(gx))*1e-12
 @test norm(grad_gx - grad_g3_x, Inf) <= 1e-12
 

diff --git a/test/test_quadratic.jl b/test/test_quadratic.jl
@@ -23,7 +23,7 @@ predicates_test(f)
 
 x = randn(n)
 
-grad_fx, fx = gradient(f, x)
+grad_fx, fx = gradient_test(f, x)
 @test fx ≈ 0.5*dot(x, Q*x) + dot(x, q)
 @test all(grad_fx .≈ (Q*x + q))
 
@@ -44,7 +44,7 @@ Q = sparse(Q)
 f = Quadratic(Q, q)
 @test typeof(f) <: ProximalOperators.QuadraticDirect
 
-grad_fx, fx = gradient(f, x)
+grad_fx, fx = gradient_test(f, x)
 @test fx ≈ 0.5*dot(x, Q*x) + dot(x, q)
 @test all(grad_fx .≈ (Q*x + q))
 

diff --git a/test/test_regularize.jl b/test/test_regularize.jl
@@ -41,7 +41,7 @@ gx = call_test(g, x)
 @test abs(0.5*norm(res)^2 + (0.5*lam)*norm(x)^2 - gx)/(1+abs(gx)) <= 1e-12
 
 prox_test(g, x, 0.7)
-grad_gx, gx1 = gradient(g, x)
+grad_gx, gx1 = gradient_test(g, x)
 
 @test abs(gx - gx1)/(1+abs(gx)) <= 1e-12
 @test norm(grad_gx - A'*(A*x - b) - lam*x, Inf)/(1+norm(grad_gx, Inf)) <= 1e-12
diff --git a/test/test_separableSum.jl b/test/test_separableSum.jl
@@ -49,8 +49,8 @@ fs = (SqrNormL2(), LeastSquares(randn(5,10), randn(5)))
 f = SeparableSum(fs)
 x, y = randn(10), randn(10)
 
-grad_f_x_y, f_x_y = gradient(f, (x, y))
-grad_f_x, f_x = gradient(fs[1], x)
-grad_f_y, f_y = gradient(fs[2], y)
+grad_f_x_y, f_x_y = gradient_test(f, (x, y))
+grad_f_x, f_x = gradient_test(fs[1], x)
+grad_f_y, f_y = gradient_test(fs[2], y)
 @test norm(grad_f_x_y .- (grad_f_x, grad_f_y), Inf) <= 1e-12
 @test abs((f_x+f_y)-f_x_y) <= 1e-12
diff --git a/test/test_sum.jl b/test/test_sum.jl
@@ -19,11 +19,10 @@ xtest = randn(10)
 result = f1(xtest) + f2(xtest)
 @test f(xtest) ≈ result
 
-grad1, val1 = gradient(f1, xtest)
-grad2, val2 = gradient(f2, xtest)
+grad1, val1 = gradient_test(f1, xtest)
+grad2, val2 = gradient_test(f2, xtest)
 
-gradsum = randn(size(xtest))
-valsum = gradient!(gradsum, f, xtest)
+gradsum, valsum = gradient_test(f, xtest)
 @test gradsum ≈ grad1 + grad2
 
 # nonsmooth case
@@ -43,9 +42,8 @@ xtest = randn(10)
 result = g1(xtest) + g2(xtest)
 @test g(xtest) ≈ result
 
-grad1, val1 = gradient(g1, xtest)
-grad2, val2 = gradient(g2, xtest)
+grad1, val1 = gradient_test(g1, xtest)
+grad2, val2 = gradient_test(g2, xtest)
 
-gradsum = randn(size(xtest))
-valsum = gradient!(gradsum, g, xtest)
+gradsum, valsum = gradient_test(g, xtest)
 @test gradsum ≈ grad1 + grad2