News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

Use of MMX regs

Started by Magnum, March 02, 2013, 04:16:22 AM

Previous topic - Next topic

Magnum

If I have this written correctly, then XMM is quite a bit slower than EMM.

I am going to run it in safe mode to see if there is more consistency.

Andy


;
;
;
INCLUDE    \masm32\include\masm32rt.inc
.686p
.MMX
.XMM

INCLUDE    \masm32\macros\timers.asm

LOOP_COUNT = 1000000 ;try to choose a value so each run takes about 0.5 seconds

.DATA

.DATA?

.CODE

_main   PROC

; Bind the processor to a single core and delay

INVOKE  GetCurrentProcess
INVOKE  SetProcessAffinityMask,eax,1
INVOKE  Sleep,300

print "XMM instructions in HIGH_PRIORITY_CLASS",13,10
print "                                           ",13,10

mov     ecx,10

loop00:

push    ecx

counter_begin LOOP_COUNT,HIGH_PRIORITY_CLASS

;code to be timed goes here

xor eax,eax
xorps XMM0,XMM0  ; Clear xmm0
mov   ebx,7
movd XMM0,ebx
movd edx,XMM0

xor eax,eax
xorps XMM0,XMM0  ; Clear xmm0
mov   ebx,7
movd XMM0,ebx
movd edx,XMM0

xor eax,eax
xorps XMM0,XMM0  ; Clear xmm0
mov   ebx,7
movd XMM0,ebx
movd edx,XMM0

xor eax,eax
xorps XMM0,XMM0  ; Clear xmm0
mov   ebx,7
movd XMM0,ebx
movd edx,XMM0

xor eax,eax
xorps XMM0,XMM0  ; Clear xmm0
mov   ebx,7
movd XMM0,ebx
movd edx,XMM0

xor eax,eax
xorps XMM0,XMM0  ; Clear xmm0
mov   ebx,7
movd XMM0,ebx
movd edx,XMM0

xor eax,eax
xorps XMM0,XMM0  ; Clear xmm0
mov   ebx,7
movd XMM0,ebx
movd edx,XMM0

xor eax,eax
xorps XMM0,XMM0  ; Clear xmm0
mov   ebx,7
movd XMM0,ebx
movd edx,XMM0

;-------------------------

counter_end

        print   ustr$(eax),44,32
        pop     ecx
        dec     ecx
        jnz     loop00

        print   chr$(13,10)

print "                                           ",13,10
print "MM0 Instructions in HIGH_PRIORITY_CLASS",13,10
print "                                           ",13,10

mov     ecx,10

loop01:

push    ecx

counter_begin LOOP_COUNT,HIGH_PRIORITY_CLASS

xor eax,eax
pxor MM0,MM0 ; zero out MM1
mov  ebx,7
movd MM0,ebx     ; move value into 64 bit register
movd  ecx,MM0


xor eax,eax
pxor MM0,MM0 ; zero out MM1
mov  ebx,7
movd MM0,ebx     ; move value into 64 bit register
movd  ecx,MM0


xor eax,eax
pxor MM0,MM0 ; zero out MM1
mov  ebx,7
movd MM0,ebx     ; move value into 64 bit register
movd  ecx,MM0


xor eax,eax
pxor MM0,MM0 ; zero out MM1
mov  ebx,7
movd MM0,ebx     ; move value into 64 bit register
movd  ecx,MM0


xor eax,eax
pxor MM0,MM0 ; zero out MM1
mov  ebx,7
movd MM0,ebx     ; move value into 64 bit register
movd  ecx,MM0


xor eax,eax
pxor MM0,MM0 ; zero out MM1
mov  ebx,7
movd MM0,ebx     ; move value into 64 bit register
movd  ecx,MM0


xor eax,eax
pxor MM0,MM0 ; zero out MM1
mov  ebx,7
movd MM0,ebx     ; move value into 64 bit register
movd  ecx,MM0


xor eax,eax
pxor MM0,MM0 ; zero out MM1
mov  ebx,7
movd MM0,ebx     ; move value into 64 bit register
movd  ecx,MM0

counter_end

print   ustr$(eax),44,32
pop     ecx
dec     ecx
jnz     loop01

print   chr$(13,10)

inkey
exit

_main   ENDP

END     _main


Take care,
                   Andy

Ubuntu-mate-18.04-desktop-amd64

http://www.goodnewsnetwork.org

dedndave

it may run differently on different CPU's
that's why we run things in the laboratory sub-forum
so we can pick "what is best overall", rather than just "what is best on my CPU"

safe mode will probably make them both slower - just a guess   :biggrin:

frktons

On my CORE Duo the MMX and XMM registers perform more or less
at the same speed:
Quote
XMM instructions in HIGH_PRIORITY_CLASS

9, 8, 8, 8, 8, 7, 8, 8, 8, 8,

MM0 Instructions in HIGH_PRIORITY_CLASS

8, 8, 8, 8, 8, 9, 8, 9, 8, 8,
Press any key to continue ...

There are only two days a year when you can't do anything: one is called yesterday, the other is called tomorrow, so today is the right day to love, believe, do and, above all, live.

Dalai Lama

Magnum

Thanks frktons.

Dave,

I can't see how safe mode would have any negative effect.

XMM and EMM are built into the chip.

If anything, I would think they would be faster in safe mode because of minimal drivers and preloaded programs.

But I may be wrong.

Andy
Take care,
                   Andy

Ubuntu-mate-18.04-desktop-amd64

http://www.goodnewsnetwork.org

kode54

QuoteIntel(R) Core(TM) i5-3570K CPU @ 3.40GHz (SSE4)
loop overhead is approx. 195/100 cycles

??      cycles for 100 * mov
84      cycles for 100 * movd mm
81      cycles for 100 * movd xmm

8       cycles for 100 * mov
81      cycles for 100 * movd mm
81      cycles for 100 * movd xmm

13      cycles for 100 * mov
87      cycles for 100 * movd mm
86      cycles for 100 * movd xmm

2       bytes for mov
4       bytes for movd mm
4       bytes for movd xmm
Just thought I'd pop in to remind everyone about Ivy Bridge and its mov reg32, reg32 renaming optimization.

Gunther

Hi kode54,

Quote from: kode54 on March 17, 2013, 06:25:22 AM
Just thought I'd pop in to remind everyone about Ivy Bridge and its mov reg32, reg32 renaming optimization.

that's right, but Sandy Bridge and Ivy Bridge are not very wide spread at the present time. So, a lot of forum members can notice that, but not use that.

Gunther
You have to know the facts before you can distort them.