News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

AVX test piece.

Started by hutch--, November 13, 2017, 04:07:51 PM

Previous topic - Next topic

hutch--

Test piece to try out a few things, a segment for YMM sized data which shows up in the PE header, aligned procedure stack allowing YMM sized local variables and some simple test code to show that all of the aligned data works OK. The segment definition may not be politically correct but it works OK on Win 10 64 bit.

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    include \masm32\include64\masm64rt.inc

    .avxdata SEGMENT align(64)
      avx2a YMMWORD ?
      avx2b YMMWORD ?
      avx2c YMMWORD ?
      avx2d YMMWORD ?
      avx2e YMMWORD ?
      avx2f YMMWORD ?
      avx2g YMMWORD ?
      avx2h YMMWORD ?
    .avxdata ENDS

    .code

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

YMMSTACK                                        ; YMM reg alignment

entry_point proc

    LOCAL avx1  :YMMWORD                        ; YMM aligned at proc start
    LOCAL avx2  :YMMWORD
    LOCAL avx3  :YMMWORD
    LOCAL avx4  :YMMWORD
  ; -------------------------------------------
  ; smaller data sizes AFTER YMM aligned locals
  ; -------------------------------------------
    LOCAL pMem  :QWORD                          ; allocated memory pointer
    LOCAL aMem  :QWORD                          ; aligned memory pointer

    mov aMem, aalloc(pMem,1024*1024*64,4096)    ; aligned allocate to page boundary

    mov rax, aMem                               ; address is already in rax from aalloc

    vmovntdqa ymm0, YMMWORD PTR [rax]           ; load pointer into YMM register
    vmovntdq  avx1, ymm0                        ; copy register to aligned local
    vmovntdqa ymm1, avx1                        ; copy aligned local to YMM reg

    call testproc

    waitkey

    mfree pMem                                  ; release original allocation

    .exit 0

entry_point endp

STACKFRAME

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

testproc proc

    vmovntdq avx2a, ymm8
    vmovntdq avx2b, ymm9
    vmovntdq avx2c, ymm10
    vmovntdq avx2d, ymm11
    vmovntdq avx2e, ymm12
    vmovntdq avx2f, ymm13
    vmovntdq avx2g, ymm14
    vmovntdq avx2h, ymm15

    conout "Save and restore AVX2 registers.",lf,lf

    vmovntdqa ymm8, avx2a
    vmovntdqa ymm9, avx2b
    vmovntdqa ymm10, avx2c
    vmovntdqa ymm11, avx2d
    vmovntdqa ymm12, avx2e
    vmovntdqa ymm13, avx2f
    vmovntdqa ymm14, avx2g
    vmovntdqa ymm15, avx2h

    ret

testproc endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    end