test1:
sub rsp, 168
lea rax, [rsp + 40]
mov qword ptr [rsp], offset foo
mov ecx, offset foo
mov qword ptr [rsp + 24], rax
mov dword ptr [rsp + 44], edi
mov dword ptr [rsp + 48], esi
lea rax, [rsp + 64]
mov dword ptr [rsp + 128], edi
mov dword ptr [rsp + 132], esi
lea rdx, [rsp + 104]
mov qword ptr [rsp + 96], 0
mov qword ptr [rsp + 64], offset bar
mov qword ptr [rsp + 88], rdx
mov dword ptr [rsp + 108], edi
mov dword ptr [rsp + 112], esi
mov edx, 2
vmovq xmm0, rdx
vmovdqu xmmword ptr [rsp + 8], xmm0
mov qword ptr [rip + frame1], rax
mov qword ptr [rsp + 72], 2
mov dword ptr [rsp + 104], edi
mov rdi, rsp
mov rax, rdi
not rax
mov qword ptr [rsp + 80], rax
mov rsi, -2
call rcx
mov rdi, qword ptr [rip + frame2]
mov rsi, -1
call qword ptr [rdi]
mov rax, qword ptr [rsp + 24]
mov eax, dword ptr [rax]
add rsp, 168
ret
foo:
mov rax, qword ptr [rdi + 8]
cmp rax, 3
je .LBB1_6
lea r8, [rdi + 64]
cmp rax, 2
je .LBB1_5
mov ecx, dword ptr [rdi + 44]
mov edx, dword ptr [rdi + 48]
mov dword ptr [rdi + 128], ecx
mov dword ptr [rdi + 132], edx
lea r9, [rdi + 104]
mov qword ptr [rdi + 96], 0
mov qword ptr [rdi + 64], offset bar
mov eax, offset frame1
mov esi, 2
jmp .LBB1_3
.LBB1_6:
mov rcx, qword ptr [rdi + 24]
mov eax, dword ptr [rdi + 104]
mov dword ptr [rdi + 148], eax
add eax, dword ptr [rdi + 136]
mov dword ptr [rdi + 152], eax
mov dword ptr [rcx], eax
mov rax, qword ptr [rdi + 16]
mov rdx, rax
not rdx
mov qword ptr [rdi + 16], rdx
test rax, rax
je .LBB1_4
mov rdx, qword ptr [rdi + 32]
test rdx, rdx
je .LBB1_9
mov ecx, dword ptr [rcx]
mov dword ptr [rdx], ecx
.LBB1_9:
mov rcx, qword ptr [rax]
mov rdi, rax
mov rsi, -2
jmp rcx
.LBB1_5:
lea r9, [rdi + 104]
mov eax, dword ptr [rdi + 104]
mov dword ptr [rdi + 136], eax
mov ecx, dword ptr [rdi + 44]
mov edx, dword ptr [rdi + 48]
mov dword ptr [rdi + 140], ecx
mov dword ptr [rdi + 144], edx
mov qword ptr [rdi + 96], 0
mov qword ptr [rdi + 64], offset baz
mov eax, offset frame2
mov esi, 3
.LBB1_3:
mov qword ptr [rdi + 80], rdi
mov qword ptr [rdi + 88], r9
mov dword ptr [rdi + 108], ecx
mov dword ptr [rdi + 112], edx
mov qword ptr [rdi + 8], rsi
mov qword ptr [rax], r8
mov qword ptr [rdi + 72], 2
.LBB1_4:
ret
test2:
lea eax, [rdi + rsi]
ret
bar:
cmp qword ptr [rdi + 8], 0
je .LBB3_5
mov rcx, qword ptr [rdi + 24]
mov eax, dword ptr [rdi + 44]
mov dword ptr [rcx], eax
mov rax, qword ptr [rdi + 16]
mov rdx, rax
not rdx
mov qword ptr [rdi + 16], rdx
test rax, rax
je .LBB3_6
mov rdx, qword ptr [rdi + 32]
test rdx, rdx
je .LBB3_4
mov ecx, dword ptr [rcx]
mov dword ptr [rdx], ecx
.LBB3_4:
mov rcx, qword ptr [rax]
mov rdi, rax
mov rsi, -2
jmp rcx
.LBB3_5:
mov qword ptr [rip + frame1], rdi
mov qword ptr [rdi + 8], 2
.LBB3_6:
ret
baz:
cmp qword ptr [rdi + 8], 0
je .LBB4_5
mov rcx, qword ptr [rdi + 24]
mov eax, dword ptr [rdi + 48]
mov dword ptr [rcx], eax
mov rax, qword ptr [rdi + 16]
mov rdx, rax
not rdx
mov qword ptr [rdi + 16], rdx
test rax, rax
je .LBB4_6
mov rdx, qword ptr [rdi + 32]
test rdx, rdx
je .LBB4_4
mov ecx, dword ptr [rcx]
mov dword ptr [rdx], ecx
.LBB4_4:
mov rcx, qword ptr [rax]
mov rdi, rax
mov rsi, -2
jmp rcx
.LBB4_5:
mov qword ptr [rip + frame2], rdi
mov qword ptr [rdi + 8], 2
.LBB4_6:
ret
Here's a nice simple example:
https://godbolt.org/z/Ca9_Wd
With
--single-threaded,test1andtest2have identical semantics, and should produce identical machine code. Instead: