Tweaks - I

FluxML · Jun 27, 2022 · de079bc · de079bc
1 parent a038ff8
commit de079bc
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 6 deletions.
diff --git a/src/convnets/inception.jl b/src/convnets/inception.jl
@@ -340,7 +340,7 @@ struct Inceptionv4
 end
 
 function Inceptionv4(; pretrain = false, inchannels = 3, drop_rate = 0.0, nclasses = 1000)
-    layers = inceptionv4(; inchannels, dropout, nclasses)
+    layers = inceptionv4(; inchannels, drop_rate, nclasses)
     pretrain && loadpretrain!(layers, "Inceptionv4")
     return Inceptionv4(layers)
 end

diff --git a/src/layers/attention.jl b/src/layers/attention.jl
@@ -1,5 +1,5 @@
 """
-    MHAttention(nheads::Integer, qkv_layer, attn_drop, projection)
+    MHAttention(nheads::Integer, qkv_layer, attn_drop_rate, projection)
 
 Multi-head self-attention layer.
 
@@ -34,9 +34,9 @@ function MHAttention(planes::Integer, nheads::Integer = 8; qkv_bias::Bool = fals
                      attn_drop_rate = 0.0, proj_drop_rate = 0.0)
     @assert planes % nheads==0 "planes should be divisible by nheads"
     qkv_layer = Dense(planes, planes * 3; bias = qkv_bias)
-    attn_drop = Dropout(attn_drop_rate)
+    attn_drop_rate = Dropout(attn_drop_rate)
     proj = Chain(Dense(planes, planes), Dropout(proj_drop_rate))
-    return MHAttention(nheads, qkv_layer, attn_drop, proj)
+    return MHAttention(nheads, qkv_layer, attn_drop_rate, proj)
 end
 
 @functor MHAttention
@@ -52,7 +52,7 @@ function (m::MHAttention)(x::AbstractArray{T, 3}) where {T}
                            seq_len * batch_size)
     query_reshaped = reshape(permutedims(query, (1, 2, 3, 4)), nfeatures ÷ m.nheads,
                              m.nheads, seq_len * batch_size)
-    attention = m.attn_drop(softmax(batched_mul(query_reshaped, key_reshaped) .* scale))
+    attention = m.attn_drop_rate(softmax(batched_mul(query_reshaped, key_reshaped) .* scale))
     value_reshaped = reshape(permutedims(value, (1, 2, 3, 4)), nfeatures ÷ m.nheads,
                              m.nheads, seq_len * batch_size)
     pre_projection = reshape(batched_mul(attention, value_reshaped),

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
@@ -24,4 +24,4 @@ function ChannelLayerNorm(sz::Integer, λ = identity; ϵ = 1.0f-5)
     return ChannelLayerNorm(diag, ϵ)
 end
 
-(m::ChannelLayerNorm)(x) = m.diag(MLUtils.normalise(x; dims = ndims(x) - 1, ϵ = m.ϵ))
+(m::ChannelLayerNorm)(x) = m.diag(Flux.normalise(x; dims = ndims(x) - 1, ϵ = m.ϵ))