Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions compiler/rustc_codegen_llvm/src/back/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -710,8 +710,7 @@ pub(crate) unsafe fn llvm_optimize(
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
let cx =
SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size);
// For now we only support up to 10 kernels named kernel_0 ... kernel_9, a follow-up PR is
// introducing a proper offload intrinsic to solve this limitation.

for func in cx.get_functions() {
let offload_kernel = "offload-kernel";
if attributes::has_string_attr(func, offload_kernel) {
Expand Down
16 changes: 15 additions & 1 deletion compiler/rustc_codegen_llvm/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ use rustc_middle::dep_graph;
use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrs, SanitizerFnAttrs};
use rustc_middle::mir::mono::Visibility;
use rustc_middle::ty::TyCtxt;
use rustc_session::config::DebugInfo;
use rustc_session::config::{DebugInfo, Offload};
use rustc_span::Symbol;
use rustc_target::spec::SanitizerSet;

use super::ModuleLlvm;
use crate::attributes;
use crate::builder::Builder;
use crate::builder::gpu_offload::OffloadGlobals;
use crate::context::CodegenCx;
use crate::llvm::{self, Value};

Expand Down Expand Up @@ -85,6 +86,19 @@ pub(crate) fn compile_codegen_unit(
let llvm_module = ModuleLlvm::new(tcx, cgu_name.as_str());
{
let mut cx = CodegenCx::new(tcx, cgu, &llvm_module);

// Declare and store globals shared by all offload kernels
//
// These globals are left in the LLVM-IR host module so all kernels can access them.
// They are necessary for correct offload execution. We do this here to simplify the
// `offload` intrinsic, avoiding the need for tracking whether it's the first
// intrinsic call or not.
if cx.sess().opts.unstable_opts.offload.contains(&Offload::Enable)
&& !cx.sess().target.is_like_gpu
{
cx.offload_globals.replace(Some(OffloadGlobals::declare(&cx)));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm a bit unsure about this location. we could also cache these globals and generate them on the first intrinsic call, but that felt like overloading intrinsic codegen a bit too much

i don't have a strong opinion though, so happy to go with whatever u think is best

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd also prefer if the intrinsics stay simple and don't need global context about whether they're the first intrinsic or not.

The location seems fine, but can you leave a comment explaining that we have to leave a bunch of globals in the LLVM-IR host module? Similar to e.g. objective-c below. Otherwise lgtm.

}

let mono_items = cx.codegen_unit.items_in_deterministic_order(cx.tcx);
for &(mono_item, data) in &mono_items {
mono_item.predefine::<Builder<'_, '_, '_>>(
Expand Down
Loading
Loading