Quantcast
Channel: Active questions tagged gcc - Stack Overflow
Viewing all articles
Browse latest Browse all 22011

How to check for TSX support?

$
0
0

My current attempt:

/**simplified from * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family **/#include <stdio.h>#include <stdint.h>#if defined(_MSC_VER)#   include <intrin.h>#endifvoid get_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd){    #if defined(_MSC_VER)        __cpuidex(abcd,eax,ecx);    #else        uint32_t ebx,edx;        #if defined( __i386__ ) && defined ( __PIC__ )            /*in case of PIC, under 32-bit EBX cannot be clobbered*/            __asm__( "movl %%ebx, %%edi \n\t xchgl %%ebx, %%edi" : "=D"(ebx),        #else            __asm__( "cpuid" : "+b"(ebx),        #endif"+a"(eax), "+c"(ecx), "=d"(edx));        abcd[0]=eax;abcd[1]=ebx;abcd[2]=ecx;abcd[3]=edx;    #endif}int has_RTM_support(){    uint32_t abcd[4];    /*processor supports RTM execution if CPUID.07H.EBX.RTM [bit 11] = 1*/    get_cpuid(0x7,0x0,abcd);    return (abcd[1] & (1 << 11)) != 0;}int main(int argc, char **argv){    if(has_RTM_support()){        printf("This CPU supports RTM.");    }else{        printf("This CPU does NOT support RTM.");    }    return 0;}

I have an Intel® Core™ i7-7600U (cpuinfo below), and as you can see from the ark, it's supposed to support TSX-NI.

Still, above check will return

This CPU does NOT support RTM.

And the has_tsx implementation from the tsx-tools agrees:

RTM: No

HLE: No

Yet at the same time, I can execute this snippet just fine...

#include <stdio.h>int main(){    volatile int i = 0;    while (i < 100000000) {        __asm__ ("xbegin ABORT");        i++;        __asm__ ("xend");        __asm__ ("ABORT:");    }    printf("%d\n", i);    return 0;}

Where my understanding would've been these asm instructions "will generate a #UD exception when used on a processor that does not support RTM", or at least that's what the intel manual says on the matter (page 387).

I checked the asm code, too, and these instructions are still there (see below for the content of the .s file).

So since these instructions appear to be executed, are these checks simply wrong?

If so, how would you properly test for RTM support?

ASM Code of the snippet

    .file   "rtm_simple.c"# GNU C11 (Ubuntu 6.3.0-12ubuntu2) version 6.3.0 20170406 (x86_64-linux-gnu)#   compiled by GNU C version 6.3.0 20170406, GMP version 6.1.2, MPFR version 3.1.5, MPC version 1.0.3, isl version 0.15# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072# options passed:  -imultiarch x86_64-linux-gnu rtm_simple.c -mtune=generic# -march=x86-64 -fverbose-asm -fstack-protector-strong -Wformat# -Wformat-security# options enabled:  -fPIC -fPIE -faggressive-loop-optimizations# -fasynchronous-unwind-tables -fauto-inc-dec -fchkp-check-incomplete-type# -fchkp-check-read -fchkp-check-write -fchkp-instrument-calls# -fchkp-narrow-bounds -fchkp-optimize -fchkp-store-bounds# -fchkp-use-static-bounds -fchkp-use-static-const-bounds# -fchkp-use-wrappers -fcommon -fdelete-null-pointer-checks# -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types# -ffunction-cse -fgcse-lm -fgnu-runtime -fgnu-unique -fident# -finline-atomics -fira-hoist-pressure -fira-share-save-slots# -fira-share-spill-slots -fivopts -fkeep-static-consts# -fleading-underscore -flifetime-dse -flto-odr-type-merging -fmath-errno# -fmerge-debug-strings -fpeephole -fplt -fprefetch-loop-arrays# -freg-struct-return -fsched-critical-path-heuristic# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-fusion# -fsemantic-interposition -fshow-column -fsigned-zeros# -fsplit-ivs-in-unroller -fssa-backprop -fstack-protector-strong# -fstdarg-opt -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math# -ftree-cselim -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im# -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=# -ftree-phiprop -ftree-reassoc -ftree-scev-cprop -funit-at-a-time# -funwind-tables -fverbose-asm -fzero-initialized-in-bss# -m128bit-long-double -m64 -m80387 -malign-stringops# -mavx256-split-unaligned-load -mavx256-split-unaligned-store# -mfancy-math-387 -mfp-ret-in-387 -mfxsr -mglibc -mieee-fp# -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone -msse -msse2# -mstv -mtls-direct-seg-refs -mvzeroupper    .section    .rodata.LC0:    .string "%d\n"    .text    .globl  main    .type   main, @functionmain:.LFB0:    .cfi_startproc    pushq   %rbp    #    .cfi_def_cfa_offset 16    .cfi_offset 6, -16    movq    %rsp, %rbp  #,    .cfi_def_cfa_register 6    subq    $16, %rsp   #,    movl    $0, -4(%rbp)    #, i    jmp .L2 #.L3:#APP# 7 "rtm_simple.c" 1    xbegin ABORT# 0 "" 2#NO_APP    movl    -4(%rbp), %eax  # i, i.0_5    addl    $1, %eax    #, i.1_6    movl    %eax, -4(%rbp)  # i.1_6, i#APP# 9 "rtm_simple.c" 1    xend# 0 "" 2# 10 "rtm_simple.c" 1    ABORT:# 0 "" 2#NO_APP.L2:    movl    -4(%rbp), %eax  # i, i.2_4    cmpl    $99999999, %eax #, i.2_4    jle .L3 #,    movl    -4(%rbp), %eax  # i, i.3_8    movl    %eax, %esi  # i.3_8,    leaq    .LC0(%rip), %rdi    #,    movl    $0, %eax    #,    call    printf@PLT  #    movl    $0, %eax    #, _10    leave    .cfi_def_cfa 7, 8    ret    .cfi_endproc.LFE0:    .size   main, .-main    .ident  "GCC: (Ubuntu 6.3.0-12ubuntu2) 6.3.0 20170406"    .section    .note.GNU-stack,"",@progbits

CPUINFO

processor       : 0vendor_id       : GenuineIntelcpu family      : 6model           : 142model name      : Intel(R) Core(TM) i7-7600U CPU @ 2.80GHzstepping        : 9cpu MHz         : 2904.004cache size      : 4096 KBphysical id     : 0siblings        : 2core id         : 0cpu cores       : 2apicid          : 0initial apicid  : 0fpu             : yesfpu_exception   : yescpuid level     : 22wp              : yesflags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm 3dnowprefetch rdseed clflushoptbugs            :bogomips        : 5808.00clflush size    : 64cache_alignment : 64address sizes   : 39 bits physical, 48 bits virtualpower management:processor       : 1vendor_id       : GenuineIntelcpu family      : 6model           : 142model name      : Intel(R) Core(TM) i7-7600U CPU @ 2.80GHzstepping        : 9cpu MHz         : 2904.004cache size      : 4096 KBphysical id     : 0siblings        : 2core id         : 1cpu cores       : 2apicid          : 1initial apicid  : 1fpu             : yesfpu_exception   : yescpuid level     : 22wp              : yesflags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm 3dnowprefetch rdseed clflushoptbugs            :bogomips        : 5808.00clflush size    : 64cache_alignment : 64address sizes   : 39 bits physical, 48 bits virtualpower management:

Viewing all articles
Browse latest Browse all 22011

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>