Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simplify test machinery #2498

Merged
merged 12 commits into from
Oct 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Manifest.toml
LocalPreferences.toml
.DS_Store
docs/mymodel.bson
prova.jl
2 changes: 1 addition & 1 deletion src/distributed/public_api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ Backend Agnostic API to perform an allreduce operation on the given buffer `send
workers.
"""
function allreduce!(backend::AbstractFluxDistributedBackend, sendrecvbuf, op::F) where {F}
return __allreduce!(backend, sendrecvbuf, op, get_device())
return __allreduce!(backend, sendrecvbuf, op, gpu_device())
end

function allreduce!(
Expand Down
2 changes: 1 addition & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
[compat]
FiniteDifferences = "0.12"
Tracker = "0.2.33"
Enzyme = "0.12.4"
Enzyme = "0.13"
41 changes: 21 additions & 20 deletions test/ext_amdgpu/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,27 @@ end
end

@testset "Chain of Dense layers" begin
m = Chain(Dense(10, 5, tanh), Dense(5, 2), softmax) |> f32
m = Chain(Dense(10, 5, tanh), Dense(5, 2), softmax)
x = rand(Float32, 10, 10)
gpu_autodiff_test(m, x)
test_gradients(m, x, test_gpu=true, compare_finite_diff=false)
end

@testset "Convolution" begin
for conv_type in (Conv, ConvTranspose), nd in 1:3
m = conv_type(tuple(fill(2, nd)...), 3 => 4) |> f32
m = conv_type(tuple(fill(2, nd)...), 3 => 4)
x = rand(Float32, fill(10, nd)..., 3, 5)

md, xd = Flux.gpu.((m, x))
y = m(x)
# Ensure outputs are the same.
gpu_autodiff_test(m, x; atol=1f-3, checkgrad=false)
@test collect(md(xd)) ≈ y atol=1f-3

# Gradients are flipped as well.
md, xd = Flux.gpu.((m, x))
gs = gradient(m -> sum(m(x)), m)
gsd = gradient(m -> sum(m(xd)), md)
gs = gradient(m -> sum(m(x)), m)[1]
gsd = gradient(m -> sum(m(xd)), md)[1]

dims = ntuple(i -> i, ndims(m.weight) - 2)
@test reverse(gs[1].weight; dims) ≈ Array(gsd[1].weight) atol=1f-2
@test reverse(gs.weight; dims) ≈ Array(gsd.weight) atol=1f-2

# Movement back to CPU flips weights back.
mh = Flux.cpu(md)
Expand All @@ -52,10 +53,10 @@ end
x = rand(Float32, fill(10, nd)..., 3, 5) |> gpu

pad = ntuple(i -> i, nd)
m = conv_type(kernel, 3 => 4, pad=pad) |> f32 |> gpu
m = conv_type(kernel, 3 => 4, pad=pad) |> gpu

expanded_pad = ntuple(i -> pad[(i - 1) ÷ 2 + 1], 2 * nd)
m_expanded = conv_type(kernel, 3 => 4, pad=expanded_pad) |> f32 |> gpu
m_expanded = conv_type(kernel, 3 => 4, pad=expanded_pad) |> gpu

@test size(m(x)) == size(m_expanded(x))
end
Expand All @@ -74,25 +75,25 @@ end
end

@testset "Chain(Conv)" begin
m = Chain(Conv((3, 3), 3 => 3)) |> f32
x = rand(Float32, 10, 10, 3, 2)
gpu_autodiff_test(m, x; atol=1f-3, checkgrad=false)
m = Chain(Conv((3, 3), 3 => 3))
x = rand(Float32, 5, 5, 3, 2)
test_gradients(m, x, test_gpu=true, compare_finite_diff=false, test_grad_f=false)

md = m |> gpu |> cpu
@test md[1].weight ≈ m[1].weight atol=1f-3

m = Chain(ConvTranspose((3, 3), 3 => 3)) |> f32
x = rand(Float32, 10, 10, 3, 2)
gpu_autodiff_test(m, x; atol=1f-3, checkgrad=false)
m = Chain(ConvTranspose((3, 3), 3 => 3))
x = rand(Float32, 5, 5, 3, 2)
test_gradients(m, x, test_gpu=true, compare_finite_diff=false, test_grad_f=false)

md = m |> gpu |> cpu
@test md[1].weight ≈ m[1].weight atol=1f-3
end

@testset "Cross-correlation" begin
m = CrossCor((2, 2), 3 => 4) |> f32
x = rand(Float32, 10, 10, 3, 2)
gpu_autodiff_test(m, x; atol=1f-3)
m = CrossCor((2, 2), 3 => 4)
x = rand(Float32, 5, 5, 3, 2)
test_gradients(m, x, test_gpu=true, compare_finite_diff=false)
end

@testset "Restructure" begin
Expand Down Expand Up @@ -132,7 +133,7 @@ end
bn = BatchNorm(3, σ)
for nd in 1:3
x = rand(Float32, fill(2, nd - 1)..., 3, 4)
gpu_autodiff_test(bn, x; atol=1f-3, allow_nothing=true)
test_gradients(bn, x; test_gpu=true, compare_finite_diff=false)
end
end

Expand Down
4 changes: 2 additions & 2 deletions test/ext_amdgpu/get_devices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ x = randn(Float32, 5, 5)
cx = x |> amdgpu_device
@test cx isa AMDGPU.ROCArray

# moving models to specific NVIDIA devices
# moving models to specific AMDGPU devices
for id in 0:(length(AMDGPU.devices()) - 1)
current_amdgpu_device = Flux.get_device("AMDGPU", id)
current_amdgpu_device = gpu_device(id+1)

global dense_model = dense_model |> current_amdgpu_device
@test dense_model.weight isa AMDGPU.ROCArray
Expand Down
3 changes: 0 additions & 3 deletions test/ext_amdgpu/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
@assert AMDGPU.functional()
AMDGPU.allowscalar(false)

include("../test_utils.jl")
include("test_utils.jl")

@testset "get_devices" begin
include("get_devices.jl")
end
Expand Down
15 changes: 0 additions & 15 deletions test/ext_amdgpu/test_utils.jl

This file was deleted.

9 changes: 6 additions & 3 deletions test/ext_cuda/get_devices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ dense_model = Dense(2 => 3) # initially lives on CPU
weight = copy(dense_model.weight) # store the weight
bias = copy(dense_model.bias) # store the bias

cuda_device = Flux.get_device()

@test typeof(cuda_device) <: Flux.CUDADevice

# correctness of data transfer
x = randn(5, 5)
Expand All @@ -30,6 +27,12 @@ for id in 0:(length(CUDA.devices()) - 1)
@test isequal(Flux.cpu(dense_model.weight), weight)
@test isequal(Flux.cpu(dense_model.bias), bias)
end

# gpu_device remembers the last device selected
# Therefore, we need to reset it to the current cuda device
@test gpu_device().device.handle == length(CUDA.devices()) - 1
gpu_device(CUDA.device().handle + 1)

# finally move to CPU, and see if things work
cdev = cpu_device()
dense_model = cdev(dense_model)
Expand Down
126 changes: 32 additions & 94 deletions test/ext_cuda/layers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,73 +10,23 @@
@test gradient(x -> sum(cpu(x)), gpu(rand(3,3))) isa Tuple
end

# TODO: These layers get into scalar indexing issues.
const BROKEN_LAYERS = Union{}

const ACTIVATIONS = [identity, relu, tanh,
sigmoid, exp, softplus,
elu, selu]
const ACTIVATIONS = [identity, tanh]

function gpu_gradtest(name::String, layers::Vector, x_cpu = nothing, args...; test_cpu = true, test_mode = false)
isnothing(x_cpu) && error("Missing input to test the layers against.")
function gpu_gradtest(name::String, layers::Vector, x_cpu, args...;
test_mode=false, test_grad_x=true,
atol=1e-4, rtol=1e-4)
@testset "$name GPU grad tests" begin
for layer in layers
@testset "$layer Layer GPU grad test" begin

# compute output and grad of parameters
l_cpu = layer(args...)
l_gpu = l_cpu |> gpu
if test_mode
testmode!(l_cpu)
testmode!(l_gpu)
end

ps_cpu = Flux.params(l_cpu)
y_cpu, back_cpu = pullback(() -> sum(l_cpu(x_cpu)), ps_cpu)
gs_cpu = back_cpu(1f0)

x_gpu = gpu(x_cpu)
ps_gpu = Flux.params(l_gpu)

if typeof(l_gpu) <: BROKEN_LAYERS
@test_broken gradient(() -> sum(l_gpu(x_gpu)), ps_gpu) isa Flux.Zygote.Grads
else
y_gpu, back_gpu = pullback(() -> sum(l_gpu(x_gpu)), ps_gpu)
gs_gpu = back_gpu(1f0) # TODO many layers error out when backprop int 1, should fix

# compute grad of input
xg_cpu = gradient(x -> sum(l_cpu(x)), x_cpu)[1]
xg_gpu = gradient(x -> sum(l_gpu(x)), x_gpu)[1]

# test
if test_cpu
if layer === GroupedConvTranspose
@test y_gpu ≈ y_cpu rtol=1f-2 atol=1f-3
else
@test y_gpu ≈ y_cpu rtol=1f-3 atol=1f-3
end
if isnothing(xg_cpu)
@test isnothing(xg_gpu)
else
if layer === GroupedConvTranspose
@test Array(xg_gpu) ≈ xg_cpu rtol = 2f-2 atol = 1f-3
else
@test Array(xg_gpu) ≈ xg_cpu rtol = 1f-3 atol = 1f-3
end
end
end
@test gs_gpu isa Flux.Zygote.Grads
for (p_cpu, p_gpu) in zip(ps_cpu, ps_gpu)
if isnothing(gs_cpu[p_cpu])
@test isnothing(gs_gpu[p_gpu])
else
@test gs_gpu[p_gpu] isa CuArray
if test_cpu
@test Array(gs_gpu[p_gpu]) ≈ gs_cpu[p_cpu] rtol=1f-3 atol=1f-3
end
end
end
end
test_gradients(l_cpu, x_cpu; test_gpu=true, compare_finite_diff=false, test_grad_x, atol, rtol)
end
end
end
Expand All @@ -97,23 +47,24 @@ for act in ACTIVATIONS
ConvTranspose, ConvTransposeNoBias,
CrossCor, CrossCorNoBias,
DepthwiseConv, DepthwiseConvNoBias]
gpu_gradtest("Convolution with $act", conv_layers, r, (2,2), 1=>3, act, test_cpu = false)
gpu_gradtest("Convolution with $act", conv_layers, r, (2,2), 1=>3, act)

groupedconv = [GroupedConv, GroupedConvTranspose]
gpu_gradtest("GroupedConvolution with $act", groupedconv, rand(Float32, 28, 28, 100, 2), (3,3), 100 => 25, act, test_cpu = true)
gpu_gradtest("GroupedConvolution with $act", groupedconv, rand(Float32, 28, 28, 100, 2), (3,3), 100 => 25, act)

batch_norm = [BatchNorm, BatchNormNoTrackStats]
gpu_gradtest("BatchNorm 1 with $act", batch_norm, rand(Float32, 28,28,3,4), 3, act, test_cpu = false) #TODO fix errors
gpu_gradtest("BatchNorm 2 with $act", batch_norm, rand(Float32, 5,4), 5, act, test_cpu = true)
gpu_gradtest("BatchNorm 1 with $act", batch_norm, rand(Float32, 28,28,3,4), 3, act, atol=1e-3)
gpu_gradtest("BatchNorm 2 with $act", batch_norm, rand(Float32, 5,4), 5, act, atol=1e-3)

batch_norm = [BatchNormNoTrackStats]
gpu_gradtest("BatchNorm 3 with $act (test mode)", batch_norm, rand(Float32, 5,4), 5, act, test_cpu = true, test_mode = true)
gpu_gradtest("BatchNorm 3 with $act (test mode)", batch_norm, rand(Float32, 5,4), 5, act,
test_mode=true, atol=1e-3)

instancenorm = [InstanceNorm]
gpu_gradtest("InstanceNorm with $act", instancenorm, r, 1, act, test_cpu = false)
gpu_gradtest("InstanceNorm with $act", instancenorm, r, 1, act)

groupnorm = [GroupNorm]
gpu_gradtest("GroupNorm with $act", groupnorm, rand(Float32, 28,28,3,1), 3, 1, act, test_cpu = false)
gpu_gradtest("GroupNorm with $act", groupnorm, rand(Float32, 28,28,3,1), 3, 1, act)
end

r = rand(Float32, 28, 28, 1, 1)
Expand All @@ -122,13 +73,13 @@ pooling_layers = [MaxPool, MeanPool]
gpu_gradtest("Pooling", pooling_layers, r, (2,2))

adaptive_pooling_layers = [AdaptiveMaxPool, AdaptiveMeanPool]
gpu_gradtest("AdaptivePooling", adaptive_pooling_layers, r, (7,7), test_cpu = false)
gpu_gradtest("AdaptivePooling", adaptive_pooling_layers, r, (7,7))

dropout_layers = [Dropout, AlphaDropout]
gpu_gradtest("Dropout", dropout_layers, r, 0.5f0; test_cpu = false) # dropout is not deterministic
gpu_gradtest("Dropout", dropout_layers, r, 1e-6) # dropout is not deterministic

layer_norm = [LayerNorm]
gpu_gradtest("LayerNorm 1", layer_norm, rand(Float32, 28,28,3,4), 28, test_cpu = false) #TODO fix errors
gpu_gradtest("LayerNorm 1", layer_norm, rand(Float32, 28,28,3,4), 28)
gpu_gradtest("LayerNorm 2", layer_norm, rand(Float32, 5,4), 5)

upsample = [x -> Upsample(scale=x)]
Expand All @@ -140,32 +91,27 @@ gpu_gradtest("PixelShuffle 2d", pixelshuffle, rand(Float32, 3, 4, 18, 3), 3)
gpu_gradtest("PixelShuffle 1d", pixelshuffle, rand(Float32, 3, 18, 3), 3)

embedding = [Flux.Embedding]
gpu_gradtest("Embedding", embedding, [1,3,5], 5, 2)
gpu_gradtest("Embedding repeated indices", embedding, [1,3,5,3], 5, 2)
gpu_gradtest("Embedding integer index", embedding, 1, 5, 2)
gpu_gradtest("Embedding 2d index", embedding, [1 2; 3 4], 5, 2)
gpu_gradtest("Embedding OneHotVec index", embedding, OneHotVector(1, 5), 5, 2)
gpu_gradtest("Embedding OneHotMatrix index", embedding, OneHotMatrix([1,2,3], 5), 5, 2)
gpu_gradtest("Embedding OneHotMatrix repeated indices", embedding, OneHotMatrix([1,2,2], 5), 5, 2)
gpu_gradtest("Embedding", embedding, [1,3,5], 5, 2, test_grad_x=false)
gpu_gradtest("Embedding repeated indices", embedding, [1,3,5,3], 5, 2, test_grad_x=false)
gpu_gradtest("Embedding integer index", embedding, 1, 5, 2, test_grad_x=false)
gpu_gradtest("Embedding 2d index", embedding, [1 2; 3 4], 5, 2, test_grad_x=false)
gpu_gradtest("Embedding OneHotVec index", embedding, OneHotVector(1, 5), 5, 2, test_grad_x=false)
gpu_gradtest("Embedding OneHotMatrix index", embedding, OneHotMatrix([1,2,3], 5), 5, 2, test_grad_x=false)
gpu_gradtest("Embedding OneHotMatrix repeated indices", embedding, OneHotMatrix([1,2,2], 5), 5, 2, test_grad_x=false)

@testset "function layers" begin
x = rand(Float32, 3,3)
gpu_autodiff_test(x -> sum(Flux.normalise(x; dims=1)), x)
gpu_autodiff_test(x -> sum(Flux.normalise(x; dims=2)), x)
gpu_autodiff_test(x -> sum(Flux.normalise(x)), x)
x = rand(Float32, 3, 3)
test_gradients(x -> sum(Flux.normalise(x; dims=1)), x, test_gpu=true, compare_finite_diff=false)
test_gradients(x -> sum(Flux.normalise(x; dims=2)), x, test_gpu=true, compare_finite_diff=false)
test_gradients(x -> sum(Flux.normalise(x)), x, test_gpu=true, compare_finite_diff=false)
end

@testset "Zeros mapped for $cl" for cl in (Conv, ConvTranspose, CrossCor, DepthwiseConv)
l = cl((2,2), 1=>3, bias = false) |> gpu
ip = zeros(Float32, 28,28,1,1) |> gpu
if typeof(l) <: BROKEN_LAYERS
@test_broken sum(l(ip)) ≈ 0.f0
@test_broken gradient(() -> sum(l(ip)), Flux.params(l)) isa Flux.Zygote.Grads
else
@test sum(l(ip)) ≈ 0.f0
gs = gradient(() -> sum(l(ip)), Flux.params(l))
@test l.bias ∉ gs.params
end
@test sum(l(ip)) ≈ 0.f0
gs = gradient(() -> sum(l(ip)), Flux.params(l))
@test l.bias ∉ gs.params
end

@testset "Dense without bias" begin
Expand Down Expand Up @@ -366,14 +312,6 @@ end
@test Array(y_gpu) ≈ y_cpu atol=1e-4
@test Array(α_gpu) ≈ α_cpu atol=1e-4

gm_cpu, gx_cpu = gradient(mha_cpu, x_cpu) do mha, x
y, α = mha(x)
return sum(y.^2) + sum(α.^2)
end
gm_gpu, gx_gpu = gradient(mha_gpu, x_gpu) do mha, x
y, α = mha(x)
return sum(y.^2) + sum(α.^2)
end
check_grad(gm_gpu, gm_cpu)
check_grad(gx_gpu, gx_cpu)
test_gradients(mha_cpu, x_cpu, loss = o -> sum(o[1].^2) + sum(o[2].^2),
test_gpu=true, compare_finite_diff=false)
end
7 changes: 4 additions & 3 deletions test/ext_cuda/losses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ y = [1 0 0 0 1
@test focal_loss(x, y) ≈ focal_loss(gpu(x), gpu(y))

@testset "GPU: $loss" for loss in ALL_LOSSES
x = rand(Float32, 3,4)
y = rand(Float32, 3,4)
# let's stay far from the boundaries to avoid problems with finite differences gradients
x = 0.1f0 .+ 0.8f0 .* rand(Float32, 3, 4)
y = 0.1f0 .+ 0.8f0 .* rand(Float32, 3, 4)
@test loss(x, y) ≈ loss(gpu(x), gpu(y))

gpu_autodiff_test(loss, x, y)
test_gradients(loss, x, y, test_gpu=true, test_grad_f=false, compare_finite_diff=false)

# Float16 tests
@test loss(f16(x), f16(y)) ≈ loss(gpu(f16(x)), gpu(f16(y)))
Expand Down
4 changes: 0 additions & 4 deletions test/ext_cuda/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,9 @@ using Random, LinearAlgebra, Statistics
@assert CUDA.functional()
CUDA.allowscalar(false)

# include("../test_utils.jl")
include("test_utils.jl")

@testset "get_devices" begin
include("get_devices.jl")
end

@testset "cuda" begin
include("cuda.jl")
end
Expand Down
Loading
Loading