New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ERROR: try/catch is not supported error when computing gradient of gradients on CUDA array #1450
Labels
Comments
mcabbott
added
CUDA
All things GPU
second order
zygote over zygote, or otherwise
labels
Aug 30, 2023
The same issue appears with Flux: using CUDA, Flux,Zygote, Optimisers
function mweFlux(dev)
D = Dense(5,1, relu) |> dev
ps, r = Optimisers.destructure(D)
x = rand(Float32,5, 2) |> dev
g(ps) = sum(abs2,only(gradient(x -> sum(r(ps)(x)),x)))
gradient(x->g(x),ps)
end julia> mweFlux(cpu)
┌ Warning: second derivatives of Restructure may not work yet, sorry!
└ @ Optimisers ~/.julia/packages/Optimisers/TxzMn/src/destructure.jl:166
(Float32[1.233114, -0.5352969, -1.307327, 0.31797743, 0.41771483, 0.0],)
julia> mweFlux(gpu)
ERROR: try/catch is not supported.
Refer to the Zygote documentation for fixes.
https://fluxml.ai/Zygote.jl/latest/limitations
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] instrument(ir::IRTools.Inner.IR)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:128
[3] #Primal#31
@ ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:227 [inlined]
[4] Primal
@ ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:226 [inlined]
[5] Zygote.Adjoint(ir::IRTools.Inner.IR; varargs::Nothing, normalise::Bool)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:352
[6] _generate_pullback_via_decomposition(T::Type, world::Nothing)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/emit.jl:101
[7] _generate_pullback(::Type, ::Nothing, ::Type, ::Type, ::Vararg{Type})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:27
[8] #s86#1607
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:102 [inlined]
[9] var"#s86#1607"(::Any, ctx::Any, f::Any, args::Any)
@ Zygote ./none:0
[10] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any})
@ Core ./boot.jl:602
[11] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/compiler/execution.jl:310 [inlined]
[12] _pullback(::Zygote.Context{false}, ::typeof(cufunction), ::GPUArrays.var"#broadcast_kernel#26", ::Type{Tuple{CUDA.CuKernelContext, CuDeviceMatrix{Float32, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}}, Int64}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[13] macro expansion
@ ~/.julia/packages/CUDA/35NC6/src/compiler/execution.jl:104 [inlined]
[14] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/gpuarrays.jl:17 [inlined]
[15] _pullback(::Zygote.Context{false}, ::CUDA.var"##launch_heuristic#1080", ::Int64, ::Int64, ::typeof(GPUArrays.launch_heuristic), ::CUDA.CuArrayBackend, ::GPUArrays.var"#broadcast_kernel#26", ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}}, ::Int64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[16] _apply(::Function, ::Vararg{Any})
@ Core ./boot.jl:838
[17] adjoint
@ ~/.julia/packages/Zygote/4rucm/src/lib/lib.jl:203 [inlined]
[18] adjoint(::Zygote.Context{false}, ::typeof(Core._apply_iterate), ::typeof(iterate), ::Function, ::Tuple{Int64, Int64, typeof(GPUArrays.launch_heuristic), CUDA.CuArrayBackend, GPUArrays.var"#broadcast_kernel#26"}, ::Tuple{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}}, Int64})
@ Zygote ./none:0
[19] _pullback
@ ~/.julia/packages/ZygoteRules/OgCVT/src/adjoint.jl:66 [inlined]
[20] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/gpuarrays.jl:15 [inlined]
[21] _pullback(::Zygote.Context{false}, ::typeof(Core.kwcall), ::NamedTuple{(:elements, :elements_per_thread), Tuple{Int64, Int64}}, ::typeof(GPUArrays.launch_heuristic), ::CUDA.CuArrayBackend, ::GPUArrays.var"#broadcast_kernel#26", ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}}, ::Int64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[22] _pullback
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:65 [inlined]
[23] _pullback(::Zygote.Context{false}, ::typeof(GPUArrays._copyto!), ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[24] _pullback
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:41 [inlined]
[25] _pullback
@ ./broadcast.jl:881 [inlined]
[26] _pullback(::Zygote.Context{false}, ::typeof(Base.Broadcast.materialize!), ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[27] _pullback
@ ./broadcast.jl:877 [inlined]
[28] _pullback
@ ~/.julia/packages/Zygote/4rucm/src/lib/broadcast.jl:369 [inlined]
[29] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#1453#1456"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, args::Float32)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[30] _pullback
@ ~/.julia/packages/ZygoteRules/OgCVT/src/adjoint.jl:71 [inlined]
[31] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#4229#back#1457"{Zygote.var"#1453#1456"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, args::Float32)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[32] _pullback
@ ./REPL[8]:5 [inlined]
... |
As the labels suggest, this is a problem with Zygote rules being hit while diffing GPU code which are not themselves differentuable. What's really needed is a MWE which doesn't use any libraries aside from Zygote and CUDA. |
Here is a MWE: using Zygote,CUDA
function MWE(D,x)
g(D) = sum(only(gradient(x -> sum(D*x),x)))
gradient(x->g(x),D)
end julia> D = randn(Float64, 1, 5);
julia> x = randn(Float64, 5);
julia> MWE(D,x)
([1.0 1.0 … 1.0 1.0],)
julia> D = CUDA.randn(Float64, 1, 5);
julia> x = CUDA.randn(Float64, 5);
julia> MWE(D,x)
ERROR: try/catch is not supported.
Refer to the Zygote documentation for fixes.
https://fluxml.ai/Zygote.jl/latest/limitations
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] instrument(ir::IRTools.Inner.IR)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:128
[3] #Primal#31
@ ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:227 [inlined]
[4] Primal
@ ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:226 [inlined]
[5] Zygote.Adjoint(ir::IRTools.Inner.IR; varargs::Nothing, normalise::Bool)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:352
[6] _generate_pullback_via_decomposition(T::Type, world::Nothing)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/emit.jl:101
[7] _generate_pullback(::Type, ::Nothing, ::Type, ::Type, ::Vararg{Type})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:27
[8] #s86#1607
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:102 [inlined]
[9] var"#s86#1607"(::Any, ctx::Any, f::Any, args::Any)
@ Zygote ./none:0
[10] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any})
@ Core ./boot.jl:602
[11] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/compiler/execution.jl:310 [inlined]
[12] _pullback(::Zygote.Context{false}, ::typeof(cufunction), ::GPUArrays.var"#broadcast_kernel#26", ::Type{Tuple{CUDA.CuKernelContext, CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}}, Int64}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[13] macro expansion
@ ~/.julia/packages/CUDA/35NC6/src/compiler/execution.jl:104 [inlined]
[14] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/gpuarrays.jl:17 [inlined]
[15] _pullback(::Zygote.Context{false}, ::CUDA.var"##launch_heuristic#1080", ::Int64, ::Int64, ::typeof(GPUArrays.launch_heuristic), ::CUDA.CuArrayBackend, ::GPUArrays.var"#broadcast_kernel#26", ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}}, ::Int64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[16] _apply(::Function, ::Vararg{Any})
@ Core ./boot.jl:838
[17] adjoint
@ ~/.julia/packages/Zygote/4rucm/src/lib/lib.jl:203 [inlined]
[18] adjoint(::Zygote.Context{false}, ::typeof(Core._apply_iterate), ::typeof(iterate), ::Function, ::Tuple{Int64, Int64, typeof(GPUArrays.launch_heuristic), CUDA.CuArrayBackend, GPUArrays.var"#broadcast_kernel#26"}, ::Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}}, Int64})
@ Zygote ./none:0
[19] _pullback
@ ~/.julia/packages/ZygoteRules/OgCVT/src/adjoint.jl:66 [inlined]
[20] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/gpuarrays.jl:15 [inlined]
[21] _pullback(::Zygote.Context{false}, ::typeof(Core.kwcall), ::NamedTuple{(:elements, :elements_per_thread), Tuple{Int64, Int64}}, ::typeof(GPUArrays.launch_heuristic), ::CUDA.CuArrayBackend, ::GPUArrays.var"#broadcast_kernel#26", ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}}, ::Int64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[22] _pullback
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:65 [inlined]
[23] _pullback(::Zygote.Context{false}, ::typeof(GPUArrays._copyto!), ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[24] _pullback
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:41 [inlined]
[25] _pullback
@ ./broadcast.jl:881 [inlined]
[26] _pullback(::Zygote.Context{false}, ::typeof(Base.Broadcast.materialize!), ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[27] _pullback
@ ./broadcast.jl:877 [inlined]
[28] _pullback
@ ~/.julia/packages/Zygote/4rucm/src/lib/broadcast.jl:369 [inlined]
[29] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#1453#1456"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, args::Float64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[30] _pullback
@ ~/.julia/packages/ZygoteRules/OgCVT/src/adjoint.jl:71 [inlined]
[31] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#4229#back#1457"{Zygote.var"#1453#1456"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, args::Float64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[32] _pullback
@ ./REPL[17]:2 [inlined]
[33] _pullback(ctx::Zygote.Context{false}, f::Zygote.Pullback{Tuple{var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, Tuple{Zygote.var"#2184#back#303"{Zygote.var"#back#302"{:D, Zygote.Context{false}, var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.ZBack{ChainRules.var"#times_pullback#1481"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Zygote.var"#4229#back#1457"{Zygote.var"#1453#1456"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}}, args::Float64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[34] _pullback
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:45 [inlined]
[35] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#75#76"{Zygote.Pullback{Tuple{var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, Tuple{Zygote.var"#2184#back#303"{Zygote.var"#back#302"{:D, Zygote.Context{false}, var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.ZBack{ChainRules.var"#times_pullback#1481"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Zygote.var"#4229#back#1457"{Zygote.var"#1453#1456"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}}}, args::Float64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[36] _pullback
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:97 [inlined]
[37] _pullback(::Zygote.Context{false}, ::typeof(gradient), ::var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[38] _pullback
@ ./REPL[17]:2 [inlined]
[39] _pullback(ctx::Zygote.Context{false}, f::var"#g#19"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, args::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[40] _pullback
@ ./REPL[17]:3 [inlined]
[41] _pullback(ctx::Zygote.Context{false}, f::var"#18#21"{var"#g#19"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, args::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[42] pullback(f::Function, cx::Zygote.Context{false}, args::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:44
[43] pullback
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:42 [inlined]
[44] gradient(f::Function, args::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:96
[45] MWE(D::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, x::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
@ Main ./REPL[17]:3
[46] top-level scope
@ REPL[23]:1
[47] top-level scope
@ ~/.julia/packages/CUDA/35NC6/src/initialization.jl:190 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
I'm trying to implement a gradient penalty with Lux. It is fine on CPU but raise a "try/catch" error on GPU (CUDA). It is seems to be linked to the try catch here but I'm not able to figure out what could be the problem.
The text was updated successfully, but these errors were encountered: