/* test program for SSE by kaz @ kobe1995.net % c++ -g loop.C && ./a.out 999999 update: 2001/05/13 added check of kernel support for SSE 2001/05/27 align 16(*8=128bit) for movaps 2001/07/09 "movaps (%0),%%xmm0::g(A)" -> "movaps %0,%%xmm0::g(*A)" */ #include #include #include #include #include int MMXcheck(void) { int mmx_available; asm("push %eax"); asm("push %edx"); __asm__ __volatile__ ( /* Get CPU version information */ "movl $1, %%eax\n\t" "cpuid\n\t" "andl $0x800000, %%edx\n\t" "movl %%edx, %0" : "=q" (mmx_available) : /* no input */ ); asm("pop %edx"); asm("pop %eax"); return (mmx_available != 0) ? 1 : 0; } int SSEcheck(void) { int sse_available; asm("push %eax"); asm("push %edx"); __asm__ __volatile__ ( /* Get CPU version information */ "movl $1, %%eax\n\t" "cpuid\n\t" "andl $0x2000000, %%edx\n\t" "movl %%edx, %0" : "=q" (sse_available) : /* no input */ ); asm("pop %edx"); asm("pop %eax"); return (sse_available != 0) ? 1 : 0; } /* 3D Now! check */ int TDNcheck(void) { int TDN_available; asm("push %eax"); asm("push %edx"); __asm__ __volatile__ ( /* Get CPU version information */ "movl $0x80000001, %%eax\n\t" "cpuid\n\t" "andl $0x80000000, %%edx\n\t" "movl %%edx, %0" : "=q" (TDN_available) : /* no input */ ); asm("pop %edx"); asm("pop %eax"); return (TDN_available != 0) ? 1 : 0; } int main(int argc,char **argv){ static float __attribute__((aligned(16))) A[4]={1.,1.,1.,1.}, B[4]={0.,1.,2.,3.}, C[4]; int i,sum=0,msum,n; double TClock; unsigned long TTime; unsigned int HTime0,HTime1,LTime0,LTime1; struct timeval Tim0,Tim1; printf("MMX %d SSE %d 3D Now! %d\n", MMXcheck(),SSEcheck(),TDNcheck()); if(argc!=2){ fprintf(stderr,"usage: %s #\n",argv[0]); n=1000000; // exit(1); }else{ n=atoi(argv[1]); } printf("A:%08x B:%08x C:%08x n=%d\n",A,B,C,n); printf("float A=("); for(i=0;i<4;i++) printf("%f,",A[i]); printf("\b)\n"); printf("float B=("); for(i=0;i<4;i++) printf("%f,",B[i]); printf("\b)\n"); if(SSEcheck()){ if(!system("exit `/sbin/sysctl -n hw.instruction_sse 2>/dev/null`")){ fprintf(stderr,"lack of kernel support for SSE\n"); exit(1); } asm("movaps %0,%%xmm0" //SSE : :"g"(*A)); asm("movaps %0,%%xmm1" //SSE : :"g"(*B)); gettimeofday(&Tim0,0); asm("push %eax"); asm("push %edx"); asm("rdtsc"); asm("mov %%edx,%0" :"=g"(HTime0)); asm("mov %%eax,%0" :"=g"(LTime0)); for(i=n;i>0;i--){ // asm("addps %xmm1,%xmm0"); //SSE asm("mulps %xmm1,%xmm0"); //SSE } asm("rdtsc"); asm("mov %%edx,%0" :"=g"(HTime1)); asm("mov %%eax,%0" :"=g"(LTime1)); asm("pop %edx"); asm("pop %eax"); gettimeofday(&Tim1,0); asm("movaps %%xmm0,%0":"=g"(*C)); //SSE TClock=(double)(LTime1-LTime0)+ (double)(UINT_MAX)*(double)(HTime1-HTime0); TTime=1000000*(Tim1.tv_sec-Tim0.tv_sec)+ (Tim1.tv_usec-Tim0.tv_usec); printf("A:%08x B:%08x C:%08x\n",A,B,C); printf("float C=("); for(i=0;i<4;i++) printf("%f,",C[i]); printf("\b)\n"); printf("time: %ld(us) clock: %.01f\n",TTime,TClock); } if (1) { asm("movaps %0,%%xmm0" //SSE : :"g"(*A)); asm("movaps %0,%%xmm1" //SSE : :"g"(*B)); gettimeofday(&Tim0,0); asm("push %eax"); asm("push %edx"); asm("rdtsc"); asm("mov %%edx,%0" :"=g"(HTime0)); asm("mov %%eax,%0" :"=g"(LTime0)); for(i=n;i>0;i--){ // A[0]+=B[0];A[1]+=B[1];A[2]+=B[2];A[3]+=B[3]; //x87 A[0]*=B[0];A[1]*=B[1];A[2]*=B[2];A[3]*=B[3]; //x87 } asm("rdtsc"); asm("mov %%edx,%0" :"=g"(HTime1)); asm("mov %%eax,%0" :"=g"(LTime1)); asm("pop %edx"); asm("pop %eax"); gettimeofday(&Tim1,0); for(i=0;i<4;i++) C[i]=A[i]; //x87 TClock=(double)(LTime1-LTime0)+ (double)(UINT_MAX)*(double)(HTime1-HTime0); TTime=1000000*(Tim1.tv_sec-Tim0.tv_sec)+ (Tim1.tv_usec-Tim0.tv_usec); printf("A:%08x B:%08x C:%08x\n",A,B,C); printf("float C=("); for(i=0;i<4;i++) printf("%f,",C[i]); printf("\b)\n"); printf("time: %ld(us) clock: %.01f\n",TTime,TClock); } }