Quantcast
Channel: Active questions tagged gcc - Stack Overflow
Viewing all articles
Browse latest Browse all 22171

Prevent GCC from optimizing away a looped write to memory mapped address

$
0
0

I've got an address which points to a control port like so (for context I'm working on a Sega/Megadrive game):

volatile u32 * vdp_ctrl = (u32 *) 0x00C00004;

And an array of initial values I want to set:

const u8 initial_vdp_vals[24] = {
  0x20,
  0x74,
  0x30,
  ..etc
};

With a loop:

typedef struct {
  u16 upper;
  u8 reg;
  u8 val;
} bitset;

typedef union {
  bitset b;
  u32 as_u32;
} u_bitset;

static inline void init_vdp() {
  u_bitset cmd = {{0x00008000}};
  for(int i = 0; i < 24; i++) {
     cmd.b.val = initial_vdp_vals[i];
     *vdp_ctrl = cmd.as_u32;
     cmd.b.reg += 1;
  }
}

The problem is that gcc (with O2 at least) optimizes this away and only writes the last value to the *vdp_ctrl pointer. I've managed to get around this by setting one of the properties in the bitset struct to volatile, but that doesn't seem like an intuitive solution, and the assembly it generates as a result is very lengthy.

So my question is two fold:

  1. What's the "correct" way to suggest to gcc that my vdp_ctrl pointer needs to accept multiple writes over time. I'm going to be using this pattern often (writing const/static data to control/data addresses in a loop), and marking random fields as volatile seem unintuitive.
  2. The "standard of quality" I'm using for the generated asm looks like the following (not gcc generated):
    move.l #initial_vdp_vals, a0
    move.l #24, d0
    move.l #0x00008000, d1

.copy:
    move.b (a0)+, d1
    move.w d1, 0x00C00004
    add.w #0x0100, d1
    dbra d0, .copy

Which is very nice and succinct. So my other question might be (as a complete C novice): Is there a better approach to my C solution to get me closer to the assembly above? I'm not even sure if my code is correct to be honest as I'm just trying to get past this loop optimization issue first since I know it's going to be a constant issue moving forward.

Runnable example to produce my issue:

volatile unsigned long * vdp_ctrl = (unsigned long *) 0x00C00004;

const unsigned char initial_vdp_vals[24] = {
  0x20,
  0x74,
  0x30,
  0x40,
  0x05,
  0x70,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x08,
  0x81,
  0x34,
  0x00,
  0x00,
  0x01,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00,
  0x00
};

typedef struct {
  unsigned int upper;
  unsigned char reg;
  unsigned char val;
} bitset;

typedef union {
  bitset b;
  unsigned long as_u32;
} u_bitset;

static inline void init_vdp() {
  u_bitset cmd = {{0x00008000}};
  for(int i = 0; i < 24; i++) {
     cmd.b.val = initial_vdp_vals[i];
     *vdp_ctrl = cmd.as_u32;
  }
}

void init() {
  init_vdp();
  for(;;) {
  }
}

With m68k-linux-gnu-gcc -ffreestanding -O2 -S -c test.c -o test.s. Generates the following:

#NO_APP
    .file   "test.c"
    .text
    .align  2
    .globl  init
    .type   init, @function
init:
    link.w %fp,#0
    move.l vdp_ctrl,%a0
    moveq #24,%d0
    move.l #32768,%d1
.L2:
    move.l %d1,(%a0)
    subq.l #1,%d0
    jne .L2
.L3:
    jra .L3
    .size   init, .-init
    .globl  initial_vdp_vals
    .section    .rodata
    .type   initial_vdp_vals, @object
    .size   initial_vdp_vals, 24
initial_vdp_vals:
    .byte   32
    .byte   116
    .byte   48
    .byte   64
    .byte   5
    .byte   112
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   8
    .byte   -127
    .byte   52
    .byte   0
    .byte   0
    .byte   1
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .byte   0
    .globl  vdp_ctrl
    .data
    .align  2
    .type   vdp_ctrl, @object
    .size   vdp_ctrl, 4
vdp_ctrl:
    .long   12582916
    .ident  "GCC: (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0"
    .section    .note.GNU-stack,"",@progbits
gcc version 7.4.0 (Ubuntu 7.4.0-1ubuntu1~18.04.1)

Note: It does appear the size of my array determines if it gets optimized out or not. When I made it only two elements it did not optimize out.


Viewing all articles
Browse latest Browse all 22171


<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>