Author Topic: JWasm212pre reloaded  (Read 12290 times)

habran

  • Member
  • *****
  • Posts: 1206
    • uasm
Re: JWasm212pre reloaded
« Reply #15 on: August 28, 2014, 05:49:40 AM »
Hi Gunther,
Yes, and I also changed the one compiled with CodeBlocks
The JWasm under the first post was compiled with MSVC13 Expres , which is free for commercial use
so fill free to work with it
Cod-Father

Gunther

  • Member
  • *****
  • Posts: 3585
  • Forgive your enemies, but never forget their names
Re: JWasm212pre reloaded
« Reply #16 on: August 29, 2014, 02:03:55 AM »
Hi habran,

Yes, and I also changed the one compiled with CodeBlocks
The JWasm under the first post was compiled with MSVC13 Expres , which is free for commercial use
so fill free to work with it

good to know, thank you.  :t

Another point. In that thread qWord did note that jWasm's current AVX implementation is buggy. Did you fix that?

Gunther
Get your facts first, and then you can distort them.

habran

  • Member
  • *****
  • Posts: 1206
    • uasm
Re: JWasm212pre reloaded
« Reply #17 on: August 29, 2014, 06:13:11 AM »
Thanks Gunther :biggrin:

I didn't fix AVX yet, I was hoping that Japhet will return (my laptop doesn't support AVX2)

I will look if I can fix it and if I can, I'll upload it here

Cod-Father

Gunther

  • Member
  • *****
  • Posts: 3585
  • Forgive your enemies, but never forget their names
Re: JWasm212pre reloaded
« Reply #18 on: August 29, 2014, 05:51:06 PM »
Hi habran,

I didn't fix AVX yet, I was hoping that Japhet will return (my laptop doesn't support AVX2)

I will look if I can fix it and if I can, I'll upload it here

that would be great. Are you in contact with Andreas (Japhet)? We miss him.

Gunther
Get your facts first, and then you can distort them.

habran

  • Member
  • *****
  • Posts: 1206
    • uasm
Re: JWasm212pre reloaded
« Reply #19 on: August 30, 2014, 05:52:33 AM »
Unfortunately I am not in contact with Japhet
As usual bad news never comes alone:
AFAIK JWasm doesn't support AVX2 yet :(
Cod-Father

Gunther

  • Member
  • *****
  • Posts: 3585
  • Forgive your enemies, but never forget their names
Re: JWasm212pre reloaded
« Reply #20 on: August 31, 2014, 09:11:35 AM »
Hi habran,

Unfortunately I am not in contact with Japhet
As usual bad news never comes alone:
AFAIK JWasm doesn't support AVX2 yet :(

not so bad, because is only supported by Haswell, and AVX512 doesn't exist in hardware at the moment.

Gunther
Get your facts first, and then you can distort them.

habran

  • Member
  • *****
  • Posts: 1206
    • uasm
Re: JWasm212pre reloaded
« Reply #21 on: August 31, 2014, 10:33:29 AM »
If I knew that Japhet will not return soon to JWasm I would try to add AVX2
however, I would need to change whole concept in Jwasm, because JWasm doesn't support 4 parameters yet
I would probably be able to do that with a lot of sweat because I would have to study how to implement it,
while for Japhet it would be a chickenshit :biggrin:
Even if I succeed to do that Japhet could refuse to accept it, you know him, he is very fussy ;)
Cod-Father

Gunther

  • Member
  • *****
  • Posts: 3585
  • Forgive your enemies, but never forget their names
Re: JWasm212pre reloaded
« Reply #22 on: August 31, 2014, 07:39:46 PM »
Hi habran,

Even if I succeed to do that Japhet could refuse to accept it, you know him, he is very fussy ;)

yes he is.  :lol:

Gunther
Get your facts first, and then you can distort them.

habran

  • Member
  • *****
  • Posts: 1206
    • uasm
Re: JWasm212pre reloaded
« Reply #23 on: September 23, 2014, 10:54:06 PM »
I have uploaded on the top of this tread a new version of JWasm
The reason is because I redesigned .for loop
Now it works faster and it is better than ever and also better than MSVC version

here you can see the difference:
Code: [Select]
tihs is from before which is the same design as MSVC
    .for (rax=rcx¦BYTE PTR[rax]¦rax++)
000000013F884A38 48 8B C1             mov         rax,rcx 
000000013F884A3B EB 03                jmp         LSKIP (013F884A40h) 
000000013F884A3D 48 FF C0     LSTART: inc         rax 
000000013F884A40 80 38 00     LSKIP:  cmp         byte ptr [rax],0 
000000013F884A43 74 02                je          LEXIT (013F884A47h) 
    .endfor
000000013F884A45 EB F6                jmp         LSTART (013F884A3Dh) 
000000013F884A47              LEXIT:

this is a new redesignet version:
    .for (rax=rcx¦BYTE PTR[rax]¦rax++)
000000013F03119E 48 8B C1             mov         rax,rcx 
000000013F0311A1 EB 03                jmp         LSKIP (013F0311A6h) 
    .endfor
000000013F0311A3 48 FF C0    LSTART:  inc         rax 
000000013F0311A6 80 38 00    LSKIP:   cmp         byte ptr [rax],0 
000000013F0311A9 75 F8                jne         xstrlenA+0Dh (013F0311A3h) 
000000013F0311AB 


the .CONTINUE has got additional label to jump to

here is an example:
Code: [Select]
  .for (¦WORD PTR[rcx] >= 48 && WORD PTR[rcx] <= 57¦rcx+=2)
000000013F1111A5 EB 23                jmp         LSKIP    ;(013F1111CAh) 
  movzx eax, WPTR [rcx]
000000013F1111A7 0F B7 01    LSTART:  movzx       eax,word ptr [rcx] 
    sub eax, 48
000000013F1111AA 83 E8 30             sub         eax,30h 
    .if (WORD PTR[rcx] == 43)
000000013F1111AD 66 83 39 2B          cmp         word ptr [rcx],2Bh 
000000013F1111B1 75 08                jne         main+25h (013F1111BBh) 
    mov r10d,TRUE
000000013F1111B3 41 BA 01 00 00 00    mov         r10d,1 
      .continue
000000013F1111B9 EB 0B                jmp         LCONT       ;(013F1111C6h) 
    .endif
    movsxd rdx, eax
000000013F1111BB 48 63 D0             movsxd      rdx,eax 
    lea rax, QWORD PTR [r8+r8*4]
000000013F1111BE 4B 8D 04 80          lea         rax,[r8+r8*4] 
    lea r8, QWORD PTR [rdx+rax*2]
000000013F1111C2 4C 8D 04 42          lea         r8,[rdx+rax*2] 
  .endfor
000000013F1111C6 48 83 C1 02  LCONT:  add         rcx,2 
000000013F1111CA 66 83 39 30  LSKIP:  cmp         word ptr [rcx],30h 
000000013F1111CE 72 06                jb          main+40h (013F1111D6h) 
000000013F1111D0 66 83 39 39          cmp         word ptr [rcx],39h 
000000013F1111D4 76 D1                jbe         main+11h (013F1111A7h) 
000000013F1111D6

and here is one function to see a complete work of this version of JWasm:
Code: [Select]
UTF16toUTF32 PROC FRAME USES rbx rbp rdi rsi r12 r13 pSource:PTR WORD,nSourceLen:UINT_PTR,nSourceDone:PTR UINT_PTR,szTarget:PTR DWORD,nTargetMax:UINT_PTR
  ;//rsi=pSource
  ;//r13 =PTR to WORD pSource + nSourceLen * 2 (end of source)
  ;//rdi = pointer to DWORD destinu
  ;//r12 = end of DWORD destinu
  ;//ebx = DWORD cahr
 
  mov rsi,rcx            ;rsi=pSource
  lea r13,[rsi+rdx*2]    ;r13=pSource + nSourceLen * 2
  mov rbp,nTargetMax     ;nTargetMax in rbp
  .if (!r9 && !rbp)      ;!szTarget && !nTargetMax
    xor edi,edi          ;clear rdi
    mov r12,MAXUINT_PTR  ;0ffffffffh
  .else
    mov rdi,r9           ;destinu = szTarget
    lea r12,[rdi+rbp*4]  ;end of DWORD destinu
  .endif
  .for (¦rsi < r13 && rdi < r12¦rsi+=2)
    movzx ebx,WORD PTR[rsi]
    ;//Surrogate pair. High surrogate.
    .if (ebx >= 0d800h && ebx <= 0dbffh)
      add rsi,2
      .if (rsi >= r13)
        sub rsi,2
        .break
      .endif
      ;//Low surrogate
      .if (WORD PTR[rsi] >= 0dc00h && WORD PTR[rsi] <= 0dfffh)
        sub ebx,0d800h
        shl ebx,10
        movzx rax,WORD PTR[rsi]
        lea ebx, DWORD PTR [rbx+rax+2400h]
      .else
        .continue
      .endif
    .endif
    .if (r9)               ;szTarget
      add rdi,4
      mov [rdi],ebx
    .else
      add rdi,2
    .endif
  .endfor
  .if (r8)
    mov rax,rsi
    sub rax,rcx
    sar rax,1
    mov [r8],rax
  .endif
  mov rax,rdi
  sub rax,r9
  sar rax,2
  ret
UTF16toUTF32 ENDP
produces this:
Code: [Select]
UTF16toUTF32 PROC FRAME USES rbx rbp rdi rsi r12 r13 pSource:PTR WORD,nSourceLen:UINT_PTR,nSourceDone:PTR UINT_PTR,szTarget:PTR DWORD,nTargetMax:UINT_PTR
000000013F62B41E 48 89 5C 24 10       mov         qword ptr [rsp+10h],rbx 
000000013F62B423 48 89 6C 24 18       mov         qword ptr [rsp+18h],rbp 
000000013F62B428 48 89 7C 24 20       mov         qword ptr [pSource],rdi 
000000013F62B42D 56                   push        rsi 
000000013F62B42E 41 54                push        r12 
000000013F62B430 41 55                push        r13 
  ;//rsi=pSource
  ;//r13 =PTR to WORD pSource + nSourceLen * 2 (end of source)
  ;//rdi = pointer to DWORD destinu
  ;//r12 = end of DWORD destinu
  ;//ebx = DWORD cahr
 
  mov rsi,rcx            ;rsi=pSource
000000013F62B432 48 8B F1             mov         rsi,rcx 
  lea r13,[rsi+rdx*2]    ;r13=pSource + nSourceLen * 2
000000013F62B435 4C 8D 2C 56          lea         r13,[rsi+rdx*2] 
  mov rbp,nTargetMax     ;nTargetMax in rbp
000000013F62B439 48 8B 6C 24 40       mov         rbp,qword ptr [nTargetMax] 
  .if (!r9 && !rbp)      ;!szTarget && !nTargetMax
000000013F62B43E 4D 85 C9             test        r9,r9 
000000013F62B441 75 10                jne         UTF16toUTF32+35h (013F62B453h) 
000000013F62B443 48 85 ED             test        rbp,rbp 
000000013F62B446 75 0B                jne         UTF16toUTF32+35h (013F62B453h) 
    xor edi,edi          ;clear rdi
000000013F62B448 33 FF                xor         edi,edi 
    mov r12,MAXUINT_PTR  ;0ffffffffh
000000013F62B44A 49 C7 C4 FF FF FF FF mov         r12,0FFFFFFFFFFFFFFFFh 
  .else
000000013F62B451 EB 07                jmp         UTF16toUTF32+3Ch (013F62B45Ah) 
    mov rdi,r9           ;destinu = szTarget
000000013F62B453 49 8B F9             mov         rdi,r9 
    lea r12,[rdi+rbp*4]  ;end of DWORD destinu
000000013F62B456 4C 8D 24 AF          lea         r12,[rdi+rbp*4] 
  .endif
  .for (¦rsi < r13 && rdi < r12¦rsi+=2)
000000013F62B45A EB 5D                jmp         UTF16toUTF32+9Bh (013F62B4B9h) 
    movzx ebx,WORD PTR[rsi]
000000013F62B45C 0F B7 1E             movzx       ebx,word ptr [rsi] 
    ;//Surrogate pair. High surrogate.
    .if (ebx >= 0d800h && ebx <= 0dbffh)
000000013F62B45F 81 FB 00 D8 00 00    cmp         ebx,0D800h 
000000013F62B465 72 3D                jb          UTF16toUTF32+86h (013F62B4A4h) 
000000013F62B467 81 FB FF DB 00 00    cmp         ebx,0DBFFh 
000000013F62B46D 77 35                ja          UTF16toUTF32+86h (013F62B4A4h) 
      add rsi,2
000000013F62B46F 48 83 C6 02          add         rsi,2 
      .if (rsi >= r13)
000000013F62B473 49 3B F5             cmp         rsi,r13 
000000013F62B476 72 06                jb          UTF16toUTF32+60h (013F62B47Eh) 
        sub rsi,2
000000013F62B478 48 83 EE 02          sub         rsi,2 
        .break
000000013F62B47C EB 45                jmp         UTF16toUTF32+0A5h (013F62B4C3h) 
      .endif
      ;//Low surrogate
      .if (WORD PTR[rsi] >= 0dc00h && WORD PTR[rsi] <= 0dfffh)
000000013F62B47E 66 81 3E 00 DC       cmp         word ptr [rsi],0DC00h 
000000013F62B483 72 1D                jb          UTF16toUTF32+84h (013F62B4A2h) 
000000013F62B485 66 81 3E FF DF       cmp         word ptr [rsi],0DFFFh 
000000013F62B48A 77 16                ja          UTF16toUTF32+84h (013F62B4A2h) 
        sub ebx,0d800h
000000013F62B48C 81 EB 00 D8 00 00    sub         ebx,0D800h 
        shl ebx,10
000000013F62B492 C1 E3 0A             shl         ebx,0Ah 
        movzx rax,WORD PTR[rsi]
000000013F62B495 48 0F B7 06          movzx       rax,word ptr [rsi] 
        lea ebx, DWORD PTR [rbx+rax+2400h]
000000013F62B499 8D 9C 18 00 24 00 00 lea         ebx,[rax+rbx+2400h] 
      .else
000000013F62B4A0 EB 02                jmp         UTF16toUTF32+86h (013F62B4A4h) 
        .continue
000000013F62B4A2 EB 11                jmp         UTF16toUTF32+97h (013F62B4B5h) 
      .endif
    .endif
    .if (r9)               ;szTarget
000000013F62B4A4 4D 85 C9             test        r9,r9 
000000013F62B4A7 74 08                je          UTF16toUTF32+93h (013F62B4B1h) 
      add rdi,4
000000013F62B4A9 48 83 C7 04          add         rdi,4 
      mov [rdi],ebx
000000013F62B4AD 89 1F                mov         dword ptr [rdi],ebx 
    .else
000000013F62B4AF EB 04                jmp         UTF16toUTF32+97h (013F62B4B5h) 
      add rdi,2
000000013F62B4B1 48 83 C7 02          add         rdi,2 
    .endif
  .endfor
000000013F62B4B5 48 83 C6 02          add         rsi,2 
000000013F62B4B9 49 3B F5             cmp         rsi,r13 
000000013F62B4BC 73 05                jae         UTF16toUTF32+0A5h (013F62B4C3h) 
000000013F62B4BE 49 3B FC             cmp         rdi,r12 
000000013F62B4C1 72 99                jb          UTF16toUTF32+3Eh (013F62B45Ch) 
  .if (r8)
000000013F62B4C3 4D 85 C0             test        r8,r8 
000000013F62B4C6 74 0C                je          UTF16toUTF32+0B6h (013F62B4D4h) 
    mov rax,rsi
000000013F62B4C8 48 8B C6             mov         rax,rsi 
    sub rax,rcx
000000013F62B4CB 48 2B C1             sub         rax,rcx 
    sar rax,1
000000013F62B4CE 48 D1 F8             sar         rax,1 
    mov [r8],rax
000000013F62B4D1 49 89 00             mov         qword ptr [r8],rax 
  .endif
  mov rax,rdi
000000013F62B4D4 48 8B C7             mov         rax,rdi 
  sub rax,r9
000000013F62B4D7 49 2B C1             sub         rax,r9 
  sar rax,2
000000013F62B4DA 48 C1 F8 02          sar         rax,2 
  ret
000000013F62B4DE 41 5D                pop         r13 
000000013F62B4E0 41 5C                pop         r12 
000000013F62B4E2 5E                   pop         rsi 
000000013F62B4E3 48 8B 5C 24 10       mov         rbx,qword ptr [rsp+10h] 
000000013F62B4E8 48 8B 6C 24 18       mov         rbp,qword ptr [rsp+18h] 
000000013F62B4ED 48 8B 7C 24 20       mov         rdi,qword ptr [pSource] 
000000013F62B4F2 C3                   ret 
UTF16toUTF32 ENDP


Cod-Father