c++ - Why is the compiler generating different instructions for my SSE class and the intrinsic in debug? -


(using visual c++ 19.00.23918, compiling in debug optimizations turned off)

i created class wrapper __m128 intrinsic so:

class alignas(16) v4xm { public:      inline constexpr v4xm()         : _mfoo({0}){     }      inline v4xm(float x, float y, float z, float w)         : _mfoo(_mm_setr_ps(x, y, z, w)) {     }      inline v4xm(const v4xm& other) :         _mfoo(other._mfoo) {     }      inline v4xm(const __m128& intrinsic) :         _mfoo(intrinsic) {     }      inline v4xm operator*(const v4xm& other) const {         return(v4xm(_mm_mul_ps(_mfoo, other._mfoo)));     }  private:     __m128 _mfoo; }; 

and tried compare simple cases in debug see difference compiler when generated assembly code between inline class , raw __m128 this:

v4xm bar(const v4xm& a) {     return(a * a); }  __m128 bar(const __m128& a) {     return(_mm_mul_ps(a, a)); }   int main() {      v4xm a(1, 1, 1, 1);     = bar(a);      __m128 b(_mm_setr_ps(1, 1, 1, 1));     b = bar(b);      return(0); } 

this generated first class:

    v4xm a(1, 1, 1, 1); 00a11a6a  push        ecx   00a11a6b  movss       xmm0,dword ptr [__real@3f800000 (0a16b30h)]   00a11a73  movss       dword ptr [esp],xmm0   00a11a78  push        ecx   00a11a79  movss       xmm0,dword ptr [__real@3f800000 (0a16b30h)]   00a11a81  movss       dword ptr [esp],xmm0   00a11a86  push        ecx   00a11a87  movss       xmm0,dword ptr [__real@3f800000 (0a16b30h)]   00a11a8f  movss       dword ptr [esp],xmm0   00a11a94  push        ecx   00a11a95  movss       xmm0,dword ptr [__real@3f800000 (0a16b30h)]   00a11a9d  movss       dword ptr [esp],xmm0   00a11aa2  lea         ecx,[a]   00a11aa5  call        v4xm::v4xm (0a11325h)        = bar(a); 00a11aaa  lea         eax,[a]   00a11aad  push        eax   00a11aae  lea         ecx,[ebp-120h]   00a11ab4  push        ecx   00a11ab5  call        bar (0a112e4h)   00a11aba  add         esp,8   00a11abd  mov         edx,dword ptr [eax]   00a11abf  mov         dword ptr [a],edx   00a11ac2  mov         ecx,dword ptr [eax+4]   00a11ac5  mov         dword ptr [ebp-1ch],ecx   00a11ac8  mov         edx,dword ptr [eax+8]   00a11acb  mov         dword ptr [ebp-18h],edx   00a11ace  mov         eax,dword ptr [eax+0ch]   00a11ad1  mov         dword ptr [ebp-14h],eax   

and fewer instructions intrinsic:

    __m128 b(_mm_setr_ps(1, 1, 1, 1)); 00a11ad4  movaps      xmm0,xmmword ptr[__xmm@3f8000003f8000003f8000003f800000 (0a16b40h)]   00a11adb  movaps      xmmword ptr [ebp-140h],xmm0   00a11ae2  movaps      xmm0,xmmword ptr [ebp-140h]   00a11ae9  movaps      xmmword ptr [b],xmm0       b = bar(b); 00a11aed  lea         eax,[b]   00a11af0  push        eax   00a11af1  call        bar (0a1102dh)   00a11af6  add         esp,4   00a11af9  movaps      xmmword ptr [ebp-160h],xmm0   00a11b00  movaps      xmm0,xmmword ptr [ebp-160h]   00a11b07  movaps      xmmword ptr [b],xmm0   

i'm curious why there such huge difference compiler between these 2 things in debug, class , intrinsic. eyes same such intrinsic. why happening?

also please note exclusive when compiling in debug optimizations turned off. when tried other tests in release compiler generated identical instructions both cases.


Comments