Channel: Active questions tagged gcc - Stack Overflow

↧

Intel Compiler vs GCC code generation differences

August 27, 2024, 12:53 pm

≫ Next: Why is `std::find_if` implemented by loop unrolling but `std::for_each` isn't?

≪ Previous: Pack two binaries(executables) into one file

I'm learning about x64 programming and the differences between Intel C++ compiler and GCC and how they optimise the instructions

Questions:

What's the best way to tell Intel Compiler to dump the assembly code (similar to gcc -S) ?Right now I debug and disassemble in Visual Studio to see the instructions.
the disassembled Intel compiled psum1 doesn't obey the convention of argument passing on register rdi, rsi, rdx, rcx, r8, r9 like what can be seen in GCC assembler output. What am I missing here?

for some reason, intel compiler doesn't optimize away memory access, what settings do i need to change?

            //intel compiler /Ox output            p[i] = p[i-1] + a[i];            000000013F79118B  movss       xmm1,dword ptr [rcx+rax*4+8]            000000013F791191  addss       xmm0,dword ptr [rcx+rax*4+4]            000000013F791197  movss       dword ptr [rdx+rax*4+4],xmm0            000000013F79119D  addss       xmm0,xmm1            000000013F7911A1  movss       dword ptr [rdx+rax*4+8],xmm0//GCC -O3 ouputLBB1_3:decq    %rdxLBB1_2:addq    $4, %rsi        addq    $4, %rdi        addss   (%rdi), %xmm0        movss   %xmm0, (%rsi)testq   %rdx, %rdx        jne LBB1_3        LBB1_4:

Original C code

void psum1( float a[], float p[], long int n ) {    long int i;    p[0] = a[0];    for (i=1; i<n; i++) {        p[i] = p[i-1] + a[i];    }}

Disassembly from Intel C++ Compiler 2013 on Visual Studio 2010:

Full optimisation /Ox
Enable Intrinsic function /Oi

Favor speed /Ot

void psum1( float a[], float p[], long int n ) { long int i;

p[0] = a[0];000000013F791156  movss       xmm0,dword ptr [rcx]000000013F79115A  mov         dword ptr [rdx],eaxfor( i=1; i<n; i++ ) {    000000013F79115C  jle         psum1+7Ah (13F7911CAh)    000000013F79115E  mov         eax,1    000000013F791163  lea         r10d,[r8-1]    000000013F791167  mov         r11d,r10d    000000013F79116A  xor         r9d,r9d    000000013F79116D  shr         r11d,1Fh    000000013F791171  lea         r8d,[r11+r8-1]    000000013F791176  sar         r8d,1    000000013F791179  test        r8d,r8d    000000013F79117C  jbe         psum1+5Eh (13F7911AEh)    p[i] = p[i-1] + a[i];    000000013F79117E  lea         eax,[r9+r9]    for( i=1; i<n; i++ ) {        000000013F791182  inc         r9d        p[i] = p[i-1] + a[i];        000000013F791185  movsxd      rax,eax        for( i=1; i<n; i++ ) {            000000013F791188  cmp         r9d,r8d            p[i] = p[i-1] + a[i];            000000013F79118B  movss       xmm1,dword ptr [rcx+rax*4+8]            000000013F791191  addss       xmm0,dword ptr [rcx+rax*4+4]            000000013F791197  movss       dword ptr [rdx+rax*4+4],xmm0            000000013F79119D  addss       xmm0,xmm1            000000013F7911A1  movss       dword ptr [rdx+rax*4+8],xmm0            for( i=1; i<n; i++ ) {                000000013F7911A7  jb          psum1+2Eh (13F79117Eh)                000000013F7911A9  lea         eax,[r9+r9+1]                000000013F7911AE  lea         r8d,[rax-1]                000000013F7911B2  cmp         r10d,r8d                000000013F7911B5  jbe         psum1+7Ah (13F7911CAh)                p[i] = p[i-1] + a[i];                000000013F7911B7  movsxd      rax,eax                000000013F7911BA  movss       xmm0,dword ptr [rdx+rax*4-4]                000000013F7911C0  addss       xmm0,dword ptr [rcx+rax*4]                000000013F7911C5  movss       dword ptr [rdx+rax*4],xmm0            }        }        000000013F7911CA  ret        000000013F7911CB  nop         dword ptr [rax+rax]

GCC Assembly output with full optimisation -O3

.section    __TEXT,__text,regular,pure_instructions.globl  _psum1.align  4, 0x90_psum1:Leh_func_begin1:pushq   %rbp        Ltmp0:movq    %rsp, %rbp        Ltmp1:movss   (%rdi), %xmm0        movss   %xmm0, (%rsi)cmpq    $2, %rdx        jl  LBB1_4        addq    $-2, %rdx        jmp LBB1_2.align  4, 0x90LBB1_3:decq    %rdxLBB1_2:addq    $4, %rsi        addq    $4, %rdi        addss   (%rdi), %xmm0        movss   %xmm0, (%rsi)testq   %rdx, %rdx        jne LBB1_3LBB1_4:popq    %rbp        retLeh_func_end1:

↧

Trending Articles

Bath man appears in court charged with attempted murder of a man...

March 16, 2015, 7:37 am

MACLEAN, Allan

July 30, 2019, 6:00 am

Black Angus Grilled Artichokes

July 16, 2016, 4:37 pm

Practice Sheet of Right form of verbs for HSC Students

September 22, 2019, 11:40 pm

Police blotter for Jan. 12

January 12, 2018, 3:30 am

99 God Status for Whatsapp, Facebook

June 5, 2016, 11:46 pm

Rajasthan Board 12th Science Result 2018 name wise- RBSE 12th commerce result...

May 26, 2018, 9:35 pm

Notorious Naushad of Ippa gang nabbed

July 19, 2019, 6:37 am

Child Kidnapping: Amy McNeil was kidnapped on her way to school by 5 adults;...

February 5, 2017, 10:40 am

Sonible Smartlimit v1.1.5-R2R

April 16, 2024, 7:10 am

NCERT Solutions for Class 9th Sanskrit Chapter 3 पाथेयम्

December 22, 2016, 3:50 am

मतलबी दोस्त स्टेट्स | Matlabi Dost Status in Hindi – Selfish Friends Status

February 13, 2020, 3:12 am

Arrow Flash 2 – Sinhala Dubbed – Episode 23 – 20th March 2016

March 20, 2016, 9:39 am

[GET] AI Traffic Goldmine

July 6, 2025, 4:23 am

[E² Plugin] HDF-Radio

January 26, 2025, 9:02 am

Universal Multi-Patch v1.3 By RADIXX11

January 29, 2018, 2:45 pm

IWAN – Thanks and Praise ( Throw Back Thursday )

March 9, 2016, 11:43 pm

RONALD P SONDERGAARD Arrested by Miami-Dade County Corrections on Mar 03, 2017

March 3, 2017, 6:25 am

मुख मैथुन से उठाएं सेक्स का भरपूर मज़ा, जानें क्या है इसका सही तरीकामुख मैथुन...

May 17, 2020, 2:04 pm

HSSC Excise & Taxation Inspector Result 2017 Scorecard/ Category Wise Merit List

July 29, 2017, 2:44 am

© 2025 //www.rssing.com