Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/codegen-llvm/autodiff/abi_handling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ fn f1(x: &[f32; 2]) -> f32 {
// CHECK-NEXT: Function Attrs
// debug-NEXT: define internal { float, float }
// debug-SAME: (ptr %f, float %x, float %dret)
// release-NEXT: define internal fastcc float
// release-NEXT: define internal fastcc noundef float
// release-SAME: (float noundef %x)

// CHECK-LABEL: ; abi_handling::f2
Expand All @@ -77,7 +77,7 @@ fn f2(f: fn(f32) -> f32, x: f32) -> f32 {
// CHECK-NEXT: Function Attrs
// debug-NEXT: define internal { float, float }
// debug-SAME: (ptr align 4 %x, ptr align 4 %bx_0, ptr align 4 %y, ptr align 4 %by_0)
// release-NEXT: define internal fastcc { float, float }
// release-NEXT: define internal fastcc
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is its return type now?

// release-SAME: (float %x.0.val)

// CHECK-LABEL: ; abi_handling::f3
Expand Down
78 changes: 34 additions & 44 deletions tests/codegen-llvm/autodiff/batched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn square(x: &f32) -> f32 {
}

// d_square2
// CHECK: define internal [4 x float] @fwddiffe4square(ptr noalias noundef readonly align 4 captures(none) dereferenceable(4) %x, [4 x ptr] %"x'")
// CHECK: define internal {{[ a-z_]*}} [4 x float] @fwddiffe4square({{[ a-z0-9()_]*}} %x{{.*}}, [4 x ptr] %"x'")
// CHECK-NEXT: start:
// CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0
// CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4
Expand All @@ -33,28 +33,23 @@ fn square(x: &f32) -> f32 {
// CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4
// CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3
// CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4
// CHECK-NEXT: %_2 = load float, ptr %x, align 4
// CHECK-NEXT: %4 = fmul fast float %"_2'ipl", %_2
// CHECK-NEXT: %5 = fmul fast float %"_2'ipl1", %_2
// CHECK-NEXT: %6 = fmul fast float %"_2'ipl2", %_2
// CHECK-NEXT: %7 = fmul fast float %"_2'ipl3", %_2
// CHECK-NEXT: %8 = fmul fast float %"_2'ipl", %_2
// CHECK-NEXT: %9 = fmul fast float %"_2'ipl1", %_2
// CHECK-NEXT: %10 = fmul fast float %"_2'ipl2", %_2
// CHECK-NEXT: %11 = fmul fast float %"_2'ipl3", %_2
// CHECK-NEXT: %12 = fadd fast float %4, %8
// CHECK-NEXT: %13 = insertvalue [4 x float] undef, float %12, 0
// CHECK-NEXT: %14 = fadd fast float %5, %9
// CHECK-NEXT: %15 = insertvalue [4 x float] %13, float %14, 1
// CHECK-NEXT: %16 = fadd fast float %6, %10
// CHECK-NEXT: %17 = insertvalue [4 x float] %15, float %16, 2
// CHECK-NEXT: %18 = fadd fast float %7, %11
// CHECK-NEXT: %19 = insertvalue [4 x float] %17, float %18, 3
// CHECK-NEXT: ret [4 x float] %19
// CHECK-NEXT: }
// CHECK-NEXT: %4 = fadd fast float %"_2'ipl", %"_2'ipl"
// CHECK-NEXT: %5 = fmul fast float %4, %x.0.val
// CHECK-NEXT: %6 = insertvalue [4 x float] undef, float %5, 0
// CHECK-NEXT: %7 = fadd fast float %"_2'ipl1", %"_2'ipl1"
// CHECK-NEXT: %8 = fmul fast float %7, %x.0.val
// CHECK-NEXT: %9 = insertvalue [4 x float] %6, float %8, 1
// CHECK-NEXT: %10 = fadd fast float %"_2'ipl2", %"_2'ipl2"
// CHECK-NEXT: %11 = fmul fast float %10, %x.0.val
// CHECK-NEXT: %12 = insertvalue [4 x float] %9, float %11, 2
// CHECK-NEXT: %13 = fadd fast float %"_2'ipl3", %"_2'ipl3"
// CHECK-NEXT: %14 = fmul fast float %13, %x.0.val
Comment on lines +36 to +46
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless ZuseZ4 prefers this sort of exact matching, this could use CHECK-DAG so we can capture the operations we're actually concerned about: four insertvalues, and an fadd and fmul apiece, in any order.

// CHECK-NEXT: %[[RETVAR:[0-9]+]] = insertvalue [4 x float] %{{[0-9]+}, float %{{[0-9]+}, 3
// CHECK-NEXT: ret [4 x float] %[[RETVAR]]
// CHECK-NEXT: }

// d_square3, the extra float is the original return value (x * x)
// CHECK: define internal { float, [4 x float] } @fwddiffe4square.1(ptr noalias noundef readonly align 4 captures(none) dereferenceable(4) %x, [4 x ptr] %"x'")
// d_square1, the extra float is the original return value (x * x)
// CHECK: define internal fastcc { float, [4 x float] } @fwddiffe4square.{{[0-9]+}}(float %x.0.val, [4 x ptr] %"x'")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same idea as my comment on R26

// CHECK-NEXT: start:
// CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0
// CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4
Expand All @@ -64,28 +59,23 @@ fn square(x: &f32) -> f32 {
// CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4
// CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3
// CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4
// CHECK-NEXT: %_2 = load float, ptr %x, align 4
// CHECK-NEXT: %_0 = fmul float %_2, %_2
// CHECK-NEXT: %4 = fmul fast float %"_2'ipl", %_2
// CHECK-NEXT: %5 = fmul fast float %"_2'ipl1", %_2
// CHECK-NEXT: %6 = fmul fast float %"_2'ipl2", %_2
// CHECK-NEXT: %7 = fmul fast float %"_2'ipl3", %_2
// CHECK-NEXT: %8 = fmul fast float %"_2'ipl", %_2
// CHECK-NEXT: %9 = fmul fast float %"_2'ipl1", %_2
// CHECK-NEXT: %10 = fmul fast float %"_2'ipl2", %_2
// CHECK-NEXT: %11 = fmul fast float %"_2'ipl3", %_2
// CHECK-NEXT: %12 = fadd fast float %4, %8
// CHECK-NEXT: %13 = insertvalue [4 x float] undef, float %12, 0
// CHECK-NEXT: %14 = fadd fast float %5, %9
// CHECK-NEXT: %15 = insertvalue [4 x float] %13, float %14, 1
// CHECK-NEXT: %16 = fadd fast float %6, %10
// CHECK-NEXT: %17 = insertvalue [4 x float] %15, float %16, 2
// CHECK-NEXT: %18 = fadd fast float %7, %11
// CHECK-NEXT: %19 = insertvalue [4 x float] %17, float %18, 3
// CHECK-NEXT: %20 = insertvalue { float, [4 x float] } undef, float %_0, 0
// CHECK-NEXT: %21 = insertvalue { float, [4 x float] } %20, [4 x float] %19, 1
// CHECK-NEXT: ret { float, [4 x float] } %21
// CHECK-NEXT: }
// CHECK-NEXT: %_0 = fmul float %x.0.val, %x.0.val
// CHECK-NEXT: %4 = fadd fast float %"_2'ipl", %"_2'ipl"
// CHECK-NEXT: %5 = fmul fast float %4, %x.0.val
// CHECK-NEXT: %6 = insertvalue [4 x float] undef, float %5, 0
// CHECK-NEXT: %7 = fadd fast float %"_2'ipl1", %"_2'ipl1"
// CHECK-NEXT: %8 = fmul fast float %7, %x.0.val
// CHECK-NEXT: %9 = insertvalue [4 x float] %6, float %8, 1
// CHECK-NEXT: %10 = fadd fast float %"_2'ipl2", %"_2'ipl2"
// CHECK-NEXT: %11 = fmul fast float %10, %x.0.val
// CHECK-NEXT: %12 = insertvalue [4 x float] %9, float %11, 2
// CHECK-NEXT: %13 = fadd fast float %"_2'ipl3", %"_2'ipl3"
// CHECK-NEXT: %14 = fmul fast float %13, %x.0.val
// CHECK-NEXT: %15 = insertvalue [4 x float] %12, float %14, 3
// CHECK-NEXT: %16 = insertvalue { float, [4 x float] } undef, float %_0, 0
// CHECK-NEXT: %17 = insertvalue { float, [4 x float] } %16, [4 x float] %15, 1
// CHECK-NEXT: ret { float, [4 x float] } %17
// CHECK-NEXT: }
Comment on lines +62 to +78
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all I would say here is reiterating previous commentary


fn main() {
let x = std::hint::black_box(3.0);
Expand Down
9 changes: 3 additions & 6 deletions tests/codegen-llvm/autodiff/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,9 @@ fn square<T: std::ops::Mul<Output = T> + Copy>(x: &T) -> T {

// Ensure that `d_square::<f64>` code is generated even if `square::<f64>` was never called
//
// CHECK: ; generic::square
// CHECK-NEXT: ; Function Attrs:
// CHECK-NEXT: define internal {{.*}} double
// CHECK-NEXT: start:
// CHECK-NOT: ret
// CHECK: fmul double
// CHECK: define internal{{.*}} @diffe_{{.*}}generic6square
// CHECK-SAME: double
// CHECK: f{{add|mul}} {{.*}} double
Comment on lines +25 to +27
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm. this looks very different, now?


fn main() {
let xf32: f32 = std::hint::black_box(3.0);
Expand Down
6 changes: 3 additions & 3 deletions tests/codegen-llvm/autodiff/identical_fnc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ fn square2(x: &f64) -> f64 {

// CHECK:; identical_fnc::main
// CHECK-NEXT:; Function Attrs:
// CHECK-NEXT:define internal void @_ZN13identical_fnc4main17h6009e4f751bf9407E()
// CHECK-NEXT:define internal void @{{[^ (]+identical_fnc4main[^ (]*}}()
// CHECK-NEXT:start:
// CHECK-NOT:br
// CHECK-NOT:ret
// CHECK:; call identical_fnc::d_square
// CHECK-NEXT:call fastcc void @_ZN13identical_fnc8d_square[[HASH:.+]](double %x.val, ptr noalias noundef align 8 dereferenceable(8) %dx1)
// CHECK-NEXT:{{(tail )?}}call fastcc void @[[DSQUARE:[^ (]+identical_fnc8d_square[^ (]*]](double %x.val, ptr noalias noundef align 8 dereferenceable(8) %dx1)
// CHECK:; call identical_fnc::d_square
// CHECK-NEXT:call fastcc void @_ZN13identical_fnc8d_square[[HASH]](double %x.val, ptr noalias noundef align 8 dereferenceable(8) %dx2)
// CHECK-NEXT:{{(tail )?}}call fastcc void @[[DSQUARE]](double %x.val, ptr noalias noundef align 8 dereferenceable(8) %dx2)

fn main() {
let x = std::hint::black_box(3.0);
Expand Down
Loading