[<<][meta][>>][..]
Wed Dec 28 16:31:00 EST 2011

Full stack?

The following compiles without warnings.

  struct tuple_f0 {
      float m0;
  };
  struct tuple_f0_f0 {
      float m0; float m1;
  };
  int fun(struct tuple_f0_f0 * a0,
          struct tuple_f0 * a1,
          struct tuple_f0 * a2,
          int a3)
  {
      {
          float fun4_0;
          float fun4_1;
          int fun4_2;
          {
              const struct tuple_f0_f0 t8 = a0[0];
              const float a9 = t8.m0;
              const float a10 = t8.m1;
              fun4_0 = a9;
              fun4_1 = a10;
              fun4_2 = 0;
              goto fun4;
          }
      fun4:
          {
              const float a5 = fun4_0;
              const float a6 = fun4_1;
              const int a7 = fun4_2;
              const struct tuple_f0 t8 = a1[a7];
              const float a9 = t8.m0;
              const float t10 = a6 + a9;
              const float t11 = a5 + t10;
              const struct tuple_f0 t12 = { t11 };
              a2[a7] = t12;
              const _Bool t13 = a7 < a3;
              if (t13)
              {
                  const int t14 = a7 + 1;
                  fun4_0 = t11;
                  fun4_1 = t10;
                  fun4_2 = t14;
                  goto fun4;
              }
              else
              {
                  const struct tuple_f0_f0 t14 = { t11, t10 };
                  a0[0] = t14;
                  return 0;
              }
          }
      }
  }

With "gcc -O3 -c test.c" I get the following asm output.  Looks pretty
good.  Can probably improve a bit still if the loop size is fixed.


0000000000000000 <fun>:
   0:	f3 0f 10 47 04       	movss  0x4(%rdi),%xmm0
   5:	31 c0                	xor    %eax,%eax
   7:	f3 0f 58 06          	addss  (%rsi),%xmm0
   b:	f3 0f 10 0f          	movss  (%rdi),%xmm1
   f:	85 c9                	test   %ecx,%ecx
  11:	f3 0f 58 c8          	addss  %xmm0,%xmm1
  15:	f3 0f 11 0a          	movss  %xmm1,(%rdx)
  19:	7e 1d                	jle    38 <fun+0x38>
  1b:	0f 1f 44 00 00       	nopl   0x0(%rax,%rax,1)

  20:	f3 0f 58 44 86 04    	addss  0x4(%rsi,%rax,4),%xmm0
  26:	f3 0f 58 c8          	addss  %xmm0,%xmm1
  2a:	f3 0f 11 4c 82 04    	movss  %xmm1,0x4(%rdx,%rax,4)
  30:	48 83 c0 01          	add    $0x1,%rax
  34:	39 c1                	cmp    %eax,%ecx
  36:	7f e8                	jg     20 <fun+0x20>

  38:	f3 0f 11 0f          	movss  %xmm1,(%rdi)
  3c:	31 c0                	xor    %eax,%eax
  3e:	f3 0f 11 47 04       	movss  %xmm0,0x4(%rdi)
  43:	c3                   	retq 



[Reply][About]
[<<][meta][>>][..]