Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
2021f6c
datadeps: Fix views and implement remainder copies
jpsamaroo Aug 10, 2025
79814dc
Add type-stable spawn code paths
jpsamaroo Oct 16, 2025
23ae7ed
datadeps: Optimize ainfo aliasing lookups
jpsamaroo Oct 16, 2025
e3418d5
datadeps: Optimize remote ArgumentWrapper lookup
jpsamaroo Oct 16, 2025
61ca6ec
thunk: Remove unnecessary scope allocations
jpsamaroo Oct 16, 2025
926a36c
test/datadeps: Remove aliasing=false tests
jpsamaroo Nov 11, 2025
4583f37
datadeps: ainfo_arg must track ainfo -> multiple arg_w
jpsamaroo Nov 15, 2025
36f4998
datadeps: Fix broken ChunkView unwrapping
jpsamaroo Nov 15, 2025
7dc4277
datadeps: Signature fixups and small cleanups
jpsamaroo Nov 15, 2025
965b131
datadeps: Fix aliased object detection around Chunks
jpsamaroo Dec 9, 2025
b3cde43
datadeps: Validate ManyMemorySpan inner span lengths
jpsamaroo Dec 9, 2025
32ab74c
datadeps: Optimize RemainderAliasing move! copies
jpsamaroo Dec 9, 2025
99530ae
datadeps: Overhaul Datadeps tests
jpsamaroo Dec 9, 2025
2584ac5
datadeps: Validate further that RemainderAliasing is not empty
jpsamaroo Dec 10, 2025
b2b2659
datadeps: Fix aliasing for degenerate views
jpsamaroo Dec 12, 2025
e3af44b
datadeps: Fix GPU execution
jpsamaroo Sep 23, 2025
855c90e
Sch: Skip set_failed! store when result already set
jpsamaroo Dec 14, 2025
be936a1
scopes: Disallow constructing empty UnionScope
jpsamaroo Dec 14, 2025
1ab1b3e
datadeps: Consolidate aliasing rewrap code
jpsamaroo Dec 15, 2025
0d83f4d
HaloArray: Add aliasing methods
jpsamaroo Dec 15, 2025
cf194e3
CI: Extend CUDA job time
jpsamaroo Dec 15, 2025
943764b
datadeps: Make IntervalTree find_overlapping non-recursive
jpsamaroo Dec 16, 2025
8857b17
datadeps: Add TID to dagdebug statements
jpsamaroo Dec 16, 2025
bf510ba
datadeps: Fix split-brain in aliasing object cache
jpsamaroo Dec 31, 2025
978be90
datadeps: Reduce remainder restart distance
jpsamaroo Dec 31, 2025
9aacb83
datadeps: Properly handle nested structures for remainder copies
jpsamaroo Dec 31, 2025
a3f9d67
ManyPair: Add missing convert rule
jpsamaroo Jan 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ steps:
codecov: true

- label: Julia 1.11 (CUDA)
timeout_in_minutes: 20
timeout_in_minutes: 30
<<: *gputest
plugins:
- JuliaCI/julia#v1:
Expand Down
9 changes: 9 additions & 0 deletions ext/CUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ function Dagger.memory_space(x::CuArray)
device_uuid = CUDA.uuid(dev)
return CUDAVRAMMemorySpace(myid(), device_id, device_uuid)
end
function Dagger.aliasing(x::CuArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
cuptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(cuptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::CuArrayDeviceProc) = Set([CUDAVRAMMemorySpace(proc.owner, proc.device, proc.device_uuid)])
Dagger.processors(space::CUDAVRAMMemorySpace) = Set([CuArrayDeviceProc(space.owner, space.device, space.device_uuid)])
Expand Down Expand Up @@ -75,6 +82,8 @@ function with_context!(space::CUDAVRAMMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
with_context!(space.device)
end
Dagger.with_context!(proc::CuArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::CUDAVRAMMemorySpace) = with_context!(space)
function with_context(f, x)
old_ctx = context()
old_stream = stream()
Expand Down
9 changes: 9 additions & 0 deletions ext/IntelExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ function Dagger.memory_space(x::oneArray)
return IntelVRAMMemorySpace(myid(), device_id)
end
_device_id(dev::ZeDevice) = findfirst(other_dev->other_dev === dev, collect(oneAPI.devices()))
function Dagger.aliasing(x::oneArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
gpu_ptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(gpu_ptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::oneArrayDeviceProc) = Set([IntelVRAMMemorySpace(proc.owner, proc.device_id)])
Dagger.processors(space::IntelVRAMMemorySpace) = Set([oneArrayDeviceProc(space.owner, space.device_id)])
Expand All @@ -68,6 +75,8 @@ function with_context!(space::IntelVRAMMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
with_context!(space.device_id)
end
Dagger.with_context!(proc::oneArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::IntelVRAMMemorySpace) = with_context!(space)
function with_context(f, x)
old_drv = driver()
old_dev = device()
Expand Down
9 changes: 9 additions & 0 deletions ext/MetalExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ function Dagger.memory_space(x::MtlArray)
return MetalVRAMMemorySpace(myid(), device_id)
end
_device_id(dev::MtlDevice) = findfirst(other_dev->other_dev === dev, Metal.devices())
function Dagger.aliasing(x::MtlArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
gpu_ptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(gpu_ptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::MtlArrayDeviceProc) = Set([MetalVRAMMemorySpace(proc.owner, proc.device_id)])
Dagger.processors(space::MetalVRAMMemorySpace) = Set([MtlArrayDeviceProc(space.owner, space.device_id)])
Expand All @@ -66,6 +73,8 @@ end
function with_context!(space::MetalVRAMMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
end
Dagger.with_context!(proc::MtlArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::MetalVRAMMemorySpace) = with_context!(space)
function with_context(f, x)
with_context!(x)
return f()
Expand Down
9 changes: 9 additions & 0 deletions ext/OpenCLExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ function Dagger.memory_space(x::CLArray)
idx = findfirst(==(queue), QUEUES)
return CLMemorySpace(myid(), idx)
end
function Dagger.aliasing(x::CLArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
gpu_ptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(gpu_ptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::CLArrayDeviceProc) = Set([CLMemorySpace(proc.owner, proc.device)])
Dagger.processors(space::CLMemorySpace) = Set([CLArrayDeviceProc(space.owner, space.device)])
Expand Down Expand Up @@ -71,6 +78,8 @@ function with_context!(space::CLMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
with_context!(space.device)
end
Dagger.with_context!(proc::CLArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::CLMemorySpace) = with_context!(space)
function with_context(f, x)
old_ctx = cl.context()
old_queue = cl.queue()
Expand Down
9 changes: 9 additions & 0 deletions ext/ROCExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ end
Dagger.root_worker_id(space::ROCVRAMMemorySpace) = space.owner
Dagger.memory_space(x::ROCArray) =
ROCVRAMMemorySpace(myid(), AMDGPU.device(x).device_id)
function Dagger.aliasing(x::ROCArray{T}) where T
space = Dagger.memory_space(x)
S = typeof(space)
gpu_ptr = pointer(x)
rptr = Dagger.RemotePtr{Cvoid}(UInt64(gpu_ptr), space)
return Dagger.ContiguousAliasing(Dagger.MemorySpan{S}(rptr, sizeof(T)*length(x)))
end

Dagger.memory_spaces(proc::ROCArrayDeviceProc) = Set([ROCVRAMMemorySpace(proc.owner, proc.device_id)])
Dagger.processors(space::ROCVRAMMemorySpace) = Set([ROCArrayDeviceProc(space.owner, space.device_id)])
Expand Down Expand Up @@ -67,6 +74,8 @@ function with_context!(space::ROCVRAMMemorySpace)
@assert Dagger.root_worker_id(space) == myid()
with_context!(space.device_id)
end
Dagger.with_context!(proc::ROCArrayDeviceProc) = with_context!(proc)
Dagger.with_context!(space::ROCVRAMMemorySpace) = with_context!(space)
function with_context(f, x)
old_ctx = context()
old_device = AMDGPU.device()
Expand Down
10 changes: 9 additions & 1 deletion src/Dagger.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ include("utils/fetch.jl")
include("utils/chunks.jl")
include("utils/logging.jl")
include("submission.jl")
abstract type MemorySpace end
include("utils/memory-span.jl")
include("utils/interval_tree.jl")
include("memory-spaces.jl")

# Task scheduling
Expand All @@ -83,7 +86,12 @@ include("utils/caching.jl")
include("sch/Sch.jl"); using .Sch

# Data dependency task queue
include("datadeps.jl")
include("datadeps/aliasing.jl")
include("datadeps/chunkview.jl")
include("datadeps/remainders.jl")
include("datadeps/queue.jl")

# Stencils
include("utils/haloarray.jl")
include("stencil.jl")

Expand Down
32 changes: 31 additions & 1 deletion src/argument.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ function pos_kw(pos::ArgPosition)
@assert pos.kw != :NULL
return pos.kw
end

mutable struct Argument
pos::ArgPosition
value
Expand All @@ -41,6 +42,35 @@ function Base.iterate(arg::Argument, state::Bool)
return nothing
end
end

Base.copy(arg::Argument) = Argument(ArgPosition(arg.pos), arg.value)
chunktype(arg::Argument) = chunktype(value(arg))

mutable struct TypedArgument{T}
pos::ArgPosition
value::T
end
TypedArgument(pos::Integer, value::T) where T = TypedArgument{T}(ArgPosition(true, pos, :NULL), value)
TypedArgument(kw::Symbol, value::T) where T = TypedArgument{T}(ArgPosition(false, 0, kw), value)
Base.setproperty!(arg::TypedArgument, name::Symbol, value::T) where T =
throw(ArgumentError("Cannot set properties of TypedArgument"))
ispositional(arg::TypedArgument) = ispositional(arg.pos)
iskw(arg::TypedArgument) = iskw(arg.pos)
pos_idx(arg::TypedArgument) = pos_idx(arg.pos)
pos_kw(arg::TypedArgument) = pos_kw(arg.pos)
raw_position(arg::TypedArgument) = raw_position(arg.pos)
value(arg::TypedArgument) = arg.value
valuetype(arg::TypedArgument{T}) where T = T
Base.iterate(arg::TypedArgument) = (arg.pos, true)
function Base.iterate(arg::TypedArgument, state::Bool)
if state
return (arg.value, false)
else
return nothing
end
end
Base.copy(arg::TypedArgument{T}) where T = TypedArgument{T}(ArgPosition(arg.pos), arg.value)
chunktype(arg::TypedArgument) = chunktype(value(arg))

Argument(arg::TypedArgument) = Argument(arg.pos, arg.value)

const AnyArgument = Union{Argument, TypedArgument}
Loading
Loading