I had below C program:
__int64_t fib_tr_const (__int64_t n, __int64_t n_1, __int64_t n_2){ if (n == 0x42 || n == 0x43 ) { return n_1 + n_2; } return 0x31 + fib_tr_const (n-1, n_2, n_1 + n_2);}void main (int argc, char ** argv){ __int64_t n = atoi (argv[1]); // printf("fib(%ld) = %ld\n", n, fib(n)); // printf("fib_tr(%ld) = %ld\n", n, fib_tr(n, 0, 1)); __int64_t volatile result = 0x4567; printf ("11111\n"); printf ("11111\n"); result = fib_tr_const(n, 0x123, 0x234); printf ("22222\n"); printf ("22222\n"); printf("fib_tr_const(%ld) = %ld\n", n, result);}
And I built it with -O2
.
gcc tail_call.c -O2 -o tail_call_const_O2
I disassembled the binary:
The main():
00000000000010a0 <main>: 10a0: f3 0f 1e fa endbr64 10a4: 41 54 push %r12 10a6: ba 0a 00 00 00 mov $0xa,%edx 10ab: 48 83 ec 10 sub $0x10,%rsp 10af: 48 8b 7e 08 mov 0x8(%rsi),%rdi 10b3: 31 f6 xor %esi,%esi 10b5: e8 c6 ff ff ff callq 1080 <strtol@plt> 10ba: 48 8d 3d 43 0f 00 00 lea 0xf43(%rip),%rdi # 2004 <_IO_stdin_used+0x4> 10c1: 48 c7 44 24 08 67 45 movq $0x4567,0x8(%rsp) 10c8: 00 00 10ca: 4c 63 e0 movslq %eax,%r12 10cd: e8 9e ff ff ff callq 1070 <puts@plt> 10d2: 48 8d 3d 2b 0f 00 00 lea 0xf2b(%rip),%rdi # 2004 <_IO_stdin_used+0x4> 10d9: e8 92 ff ff ff callq 1070 <puts@plt>------------------START------------------- 10de: 49 8d 44 24 ff lea -0x1(%r12),%rax 10e3: 48 83 f8 01 cmp $0x1,%rax 10e7: 76 75 jbe 115e <main+0xbe> 10e9: 4c 89 e0 mov %r12,%rax 10ec: 31 c9 xor %ecx,%ecx 10ee: ba 01 00 00 00 mov $0x1,%edx 10f3: 31 ff xor %edi,%edi 10f5: eb 0c jmp 1103 <main+0x63> 10f7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 10fe: 00 00 1100: 48 89 f2 mov %rsi,%rdx 1103: 48 8d 34 17 lea (%rdi,%rdx,1),%rsi 1107: 48 89 c7 mov %rax,%rdi 110a: 48 83 e8 01 sub $0x1,%rax 110e: 48 01 f9 add %rdi,%rcx 1111: 48 89 d7 mov %rdx,%rdi 1114: 48 83 f8 02 cmp $0x2,%rax 1118: 75 e6 jne 1100 <main+0x60> 111a: 48 01 f2 add %rsi,%rdx 111d: 48 01 d1 add %rdx,%rcx 1120: 48 8d 3d e3 0e 00 00 lea 0xee3(%rip),%rdi # 200a <_IO_stdin_used+0xa> 1127: 48 89 4c 24 08 mov %rcx,0x8(%rsp)------------------END------------------- 112c: e8 3f ff ff ff callq 1070 <puts@plt> 1131: 48 8d 3d d2 0e 00 00 lea 0xed2(%rip),%rdi # 200a <_IO_stdin_used+0xa> 1138: e8 33 ff ff ff callq 1070 <puts@plt> 113d: 48 8b 4c 24 08 mov 0x8(%rsp),%rcx 1142: 48 83 c4 10 add $0x10,%rsp 1146: 31 c0 xor %eax,%eax 1148: 4c 89 e2 mov %r12,%rdx 114b: 48 8d 35 be 0e 00 00 lea 0xebe(%rip),%rsi # 2010 <_IO_stdin_used+0x10> 1152: bf 01 00 00 00 mov $0x1,%edi 1157: 41 5c pop %r12 1159: e9 32 ff ff ff jmpq 1090 <__printf_chk@plt> 115e: ba 01 00 00 00 mov $0x1,%edx 1163: 31 c9 xor %ecx,%ecx 1165: eb b6 jmp 111d <main+0x7d> 1167: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 116e: 00 00
The fib_tr_const():
00000000000012e0 <fib_tr_const>: 12e0: f3 0f 1e fa endbr64 12e4: 48 8d 47 be lea -0x42(%rdi),%rax 12e8: 48 89 f9 mov %rdi,%rcx 12eb: 48 83 f8 01 cmp $0x1,%rax 12ef: 77 0a ja 12fb <fib_tr_const+0x1b> 12f1: eb 35 jmp 1328 <fib_tr_const+0x48> 12f3: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 12f8: 48 89 c2 mov %rax,%rdx 12fb: 48 83 ef 01 sub $0x1,%rdi 12ff: 48 8d 04 16 lea (%rsi,%rdx,1),%rax 1303: 48 89 d6 mov %rdx,%rsi 1306: 48 83 ff 43 cmp $0x43,%rdi 130a: 75 ec jne 12f8 <fib_tr_const+0x18> 130c: 48 8d 34 49 lea (%rcx,%rcx,2),%rsi 1310: 48 01 c2 add %rax,%rdx 1313: 48 c1 e6 04 shl $0x4,%rsi 1317: 48 8d 8c 31 2d f3 ff lea -0xcd3(%rcx,%rsi,1),%rcx 131e: ff 131f: 48 8d 04 0a lea (%rdx,%rcx,1),%rax 1323: c3 retq 1324: 0f 1f 40 00 nopl 0x0(%rax) 1328: 48 89 d0 mov %rdx,%rax 132b: 48 89 f2 mov %rsi,%rdx 132e: 31 c9 xor %ecx,%ecx 1330: 48 01 c2 add %rax,%rdx 1333: 48 8d 04 0a lea (%rdx,%rcx,1),%rax 1337: c3 retq 1338: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1) 133f: 00
With the landmarks printf ("11111\n");
and printf ("22222\n");
, and thus puts
in assembly. I believe the part between START
and END
should be where the main()
calls fib_tr_const()
.
I don't see any call
or jmp
instructions that branch from main() to the fib_tr_const(). So how is this function called?
I compared the assembly between the marked points with the assembly of the fib_tr_const(). The asm code looks to be similar. It seems the fib_tr_const() function is inlined.
Another question arises - if the function is inlined, why does the code still exist in the final binary? It would save space if it were removed, so why doesn't this happen?