/* Direct access to vector data */ voidcombine3(vec_ptr v, data_t *dest) { long i; long length = vec_length(v); data_t *data = get_vec_start(v);
*dest = IDENT; for (i = 0; i < length; i++) { *dest = *dest OP data[i]; } }
3.Eliminating Unneeded
Memory References
The assembly code for combine3 is as below:
1 2 3 4 5 6 7 8 9 10
# Inner loop of combine3. data_t = double, OP = * # dest in %rbx, data+i in %rdx, data+length in %rax
.L17: loop: vmovsd (%rbx), %xmm0 # Read product from dest vmulsd (%rdx), %xmm0, %xmm0 # Multiply product by data[i] vmovsd %xmm0, (%rbx) # Store product at dest addq $8, %rdx # Increment data+i cmpq %rax, %rdx # Compare to data+length jne .L17 # If !=, goto loop
We can see that the accumulated values read from and written to
memory on each iteration. We can eliminate this needless reading and
writing of memory by rewriting the code in the style of
combine4:
1 2 3 4 5 6 7 8 9 10 11
voidcombine4(vec_ptr v, data_t *dest) { long i; long length = vec_length(v); data_t *data = get_vec_start(v); data_t acc = IDENT;
for (i = 0; i < length; i++) { acc = acc OP data[i]; } *dest = acc; }
With the following assembly code:
1 2 3 4 5 6 7
# Inner loop of combine4. data_t = double, OP = * # acc in %xmm0, data+i in %rdx, data+length in %rax .L25: loop: vmulsd (%rdx), %xmm0, %xmm0 # Multiply acc by data[i] addq $8, %rdx # Increment data+i cmpq %rax, %rdx # Compare to data+length jne .L25 # If !=, goto loop
The key is to dereference the pointer as fewer as
possible.
However, this program will lead to different result due to
memory alias:
/* Make sure dest updated on each iteration */ voidcombine3w(vec_ptr v, data_t *dest) { long i; long length = vec_length(v); data_t *data = get_vec_start(v); data_t acc = IDENT;
for (i = 0; i < length; i++) { acc = acc OP data[i]; *dest = acc; } }
with the following assembly code:
1 2 3 4 5 6 7 8 9
# Inner loop of combine3. data_t = double, OP = *. Compiled -O1 # dest in %rbx, data+i in %rdx, data+length in %rax .L17: loop: vmovsd (%rbx), %xmm0 # Read product from dest vmulsd (%rdx), %xmm0, %xmm0 # Multiply product by data[i] vmovsd %xmm0, (%rbx) # Store product at dest addq $8, %rdx # Increment data+i cmpq %rax, %rdx # Compare to data+length jne .L17 # If !=, goto loop
This program dereferences the dest less while avoiding
the memory alias.