Quantcast
Channel: Active questions tagged gcc - Stack Overflow
Viewing all articles
Browse latest Browse all 22113

GCC inline asm constraints for XBurst (MXU2) SIMD registers?

$
0
0

We are rewriting gemmlowp MIPS SIMD code from MSA to MXU2, which is a different SIMD implementation than the MSA.

In the original MSA, SIMD registers w0-w31 are aliases for floating point registers f0-f31, which can be passed as inline asm constraints:

inline void MipsMsaStore4x8(const RegBlockUint8<8, 4>& src,
std::uint8_t* dst_ptr, int stride) {
  // Assembly temporaries that will be handily referred to by their names.
  std::uint8_t *dst_ptr1, *dst_ptr2, *dst_ptr3;
  int tmp0, tmp1, tmp2, tmp3;
  asm volatile(
    GEMMLOWP_MIPS_XADDU " %[dst_ptr1], %[dst_ptr0], %[stride]\n"
    GEMMLOWP_MIPS_XLSA  " %[dst_ptr2], %[stride], %[dst_ptr0], 1\n"
    GEMMLOWP_MIPS_XLSA  " %[dst_ptr3], %[stride], %[dst_ptr1], 1\n""copy_s.w             %[tmp0], %w[src0][0]\n""copy_s.w             %[tmp1], %w[src0][1]\n""copy_s.w             %[tmp2], %w[src0][2]\n""copy_s.w             %[tmp3], %w[src0][3]\n""swr                  %[tmp0], 0(%[dst_ptr0])\n""swl                  %[tmp0], 3(%[dst_ptr0])\n""swr                  %[tmp1], 4(%[dst_ptr0])\n""swl                  %[tmp1], 7(%[dst_ptr0])\n""swr                  %[tmp2], 0(%[dst_ptr1])\n""swl                  %[tmp2], 3(%[dst_ptr1])\n""swr                  %[tmp3], 4(%[dst_ptr1])\n""swl                  %[tmp3], 7(%[dst_ptr1])\n""copy_s.w             %[tmp0], %w[src1][0]\n""copy_s.w             %[tmp1], %w[src1][1]\n""copy_s.w             %[tmp2], %w[src1][2]\n""copy_s.w             %[tmp3], %w[src1][3]\n""swr                  %[tmp0], 0(%[dst_ptr2])\n""swl                  %[tmp0], 3(%[dst_ptr2])\n""swr                  %[tmp1], 4(%[dst_ptr2])\n""swl                  %[tmp1], 7(%[dst_ptr2])\n""swr                  %[tmp2], 0(%[dst_ptr3])\n""swl                  %[tmp2], 3(%[dst_ptr3])\n""swr                  %[tmp3], 4(%[dst_ptr3])\n""swl                  %[tmp3], 7(%[dst_ptr3])\n"
    :
    // Outputs.
    [dst_ptr0] "+r"(dst_ptr), [dst_ptr1] "=&r"(dst_ptr1),
    [dst_ptr2] "=&r"(dst_ptr2), [dst_ptr3] "=&r"(dst_ptr3), [tmp0] "=&r (tmp0),
    [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3)
    :
    // Inputs.
    [src0] "f"(src.buf.reg[0]), [src1] "f"(src.buf.reg[1]),
    [stride] "r"(stride)
    :
    // Clobbers.
    "memory");

However, MXU2 uses independent register file vr0-vr31 instead. So our code looks like this:

 asm volatile(
    GEMMLOWP_MIPS_XADDU " %[dst_ptr1], %[dst_ptr0], %[stride]\n"
    GEMMLOWP_MIPS_XADDU " %[dst_ptr2], %[dst_ptr1], %[stride]\n"
    GEMMLOWP_MIPS_XADDU " %[dst_ptr3], %[dst_ptr2], %[stride]\n""mtcpusw              %[tmp0], %vr[src0][0]\n""mtcpusw              %[tmp1], %vr[src0][1]\n""mtcpusw              %[tmp2], %vr[src0][2]\n""mtcpusw              %[tmp3], %vr[src0][3]\n""swr                  %[tmp0], 0(%[dst_ptr0])\n""swl                  %[tmp0], 3(%[dst_ptr0])\n"
...
    "swr                  %[tmp3], 4(%[dst_ptr3])\n""swl                  %[tmp3], 7(%[dst_ptr3])\n"
    :
    // Outputs.
    [dst_ptr0] "+r"(dst_ptr),
    [dst_ptr1] "=&r"(dst_ptr1),
    [dst_ptr2] "=&r"(dst_ptr2),
    [dst_ptr3] "=&r"(dst_ptr3),
    [tmp0] "=&r"(tmp0),
    [tmp1] "=&r"(tmp1),
    [tmp2] "=&r"(tmp2),
    [tmp3] "=&r"(tmp3)
    :
    // Inputs.
    [stride] "r"(stride),
    [src0] "vr"(src.buf.reg[0]),
    [src1] "vr"(src.buf.reg[1])
    :
    // Clobbers.
    "memory");

But GCC does not take 'vr' as a valid constraint:

error: inconsistent operand constraints in an 'asm'

Now what should we do?


Viewing all articles
Browse latest Browse all 22113

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>