The MASM Forum

Microsoft 64 bit MASM => MASM64 SDK => Topic started by: hutch-- on May 05, 2018, 10:51:33 PM

Title: Different segments for different data sizes.
Post by: hutch-- on May 05, 2018, 10:51:33 PM
I have been playing with this idea so that later larger data types can be isolated to maintain the correct alignment. The ones that matter the most are the AVX and AVX2 sizes but it was easy enough to add the SSE alignment as well. It means that on the fly in a procedure you can set an immediate to a correctly aligned data location with no melodrama.

The stackframe macros can already align the LOCAL data in procedures so that the first item is correctly aligned to the larger data sizes. This requires the discipline of putting the larger data sizes first and adding any others in descending size order to maintain alignment but it is simple enough to do.

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    include \masm32\include64\masm64rt.inc

    .sse_data equ <SEG16 SEGMENT align(16)>
    .ymm_data equ <SEG32 SEGMENT align(32)>
    .zmm_data equ <SEG64 SEGMENT align(64)>

    .SSE_DATA equ <.sse_data>
    .YMM_DATA equ <.ymm_data>
    .ZMM_DATA equ <.zmm_data>

    .ymm_data
      avx2a YMMWORD ?
      avx2b YMMWORD ?
      avx2c YMMWORD ?
      avx2d YMMWORD ?
      avx2e YMMWORD ?
      avx2f YMMWORD ?
      avx2g YMMWORD ?
      avx2h YMMWORD ?

    .code

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

YMMSTACK                                        ; YMM reg alignment

entry_point proc

    LOCAL avx1  :YMMWORD                        ; YMM aligned at proc start
    LOCAL avx2  :YMMWORD
    LOCAL avx3  :YMMWORD
    LOCAL avx4  :YMMWORD
  ; -------------------------------------------
  ; smaller data sizes AFTER YMM aligned locals
  ; -------------------------------------------
    LOCAL pMem  :QWORD                          ; allocated memory pointer
    LOCAL aMem  :QWORD                          ; aligned memory pointer

    mov aMem, aalloc(pMem,1024*1024*64,4096)    ; aligned allocate to page boundary

    mov rax, aMem                               ; address is already in rax from aalloc

    vmovntdqa ymm0, YMMWORD PTR [rax]           ; load pointer into YMM register
    vmovntdq  avx1, ymm0                        ; copy register to aligned local
    vmovntdqa ymm1, avx1                        ; copy aligned local to YMM reg

    call testproc

    waitkey

    mfree pMem                                  ; release original allocation

    .exit 0

entry_point endp

STACKFRAME

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

testproc proc

    vmovntdq avx2a, ymm8
    vmovntdq avx2b, ymm9
    vmovntdq avx2c, ymm10
    vmovntdq avx2d, ymm11
    vmovntdq avx2e, ymm12
    vmovntdq avx2f, ymm13
    vmovntdq avx2g, ymm14
    vmovntdq avx2h, ymm15

    .ymm_data
      inited YMMWORD 1234567890.0               ; initialise an immediate
      uninit YMMWORD ?                          ; unitialised
    .code

    vmovntdq inited, ymm15

    conout "Save and restore AVX2 registers.",lf,lf

    vmovntdqa ymm15, inited

    vmovntdqa ymm8, avx2a
    vmovntdqa ymm9, avx2b
    vmovntdqa ymm10, avx2c
    vmovntdqa ymm11, avx2d
    vmovntdqa ymm12, avx2e
    vmovntdqa ymm13, avx2f
    vmovntdqa ymm14, avx2g
    vmovntdqa ymm15, avx2h

    ret

testproc endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    end