I have uploaded on the top of this tread a new version of JWasm
The reason is because I redesigned .for loop
Now it works faster and it is better than ever and also better than MSVC version
here you can see the difference:
tihs is from before which is the same design as MSVC
.for (rax=rcx¦BYTE PTR[rax]¦rax++)
000000013F884A38 48 8B C1 mov rax,rcx
000000013F884A3B EB 03 jmp LSKIP (013F884A40h)
000000013F884A3D 48 FF C0 LSTART: inc rax
000000013F884A40 80 38 00 LSKIP: cmp byte ptr [rax],0
000000013F884A43 74 02 je LEXIT (013F884A47h)
.endfor
000000013F884A45 EB F6 jmp LSTART (013F884A3Dh)
000000013F884A47 LEXIT:
this is a new redesignet version:
.for (rax=rcx¦BYTE PTR[rax]¦rax++)
000000013F03119E 48 8B C1 mov rax,rcx
000000013F0311A1 EB 03 jmp LSKIP (013F0311A6h)
.endfor
000000013F0311A3 48 FF C0 LSTART: inc rax
000000013F0311A6 80 38 00 LSKIP: cmp byte ptr [rax],0
000000013F0311A9 75 F8 jne xstrlenA+0Dh (013F0311A3h)
000000013F0311AB
the .CONTINUE has got additional label to jump to
here is an example:
.for (¦WORD PTR[rcx] >= 48 && WORD PTR[rcx] <= 57¦rcx+=2)
000000013F1111A5 EB 23 jmp LSKIP ;(013F1111CAh)
movzx eax, WPTR [rcx]
000000013F1111A7 0F B7 01 LSTART: movzx eax,word ptr [rcx]
sub eax, 48
000000013F1111AA 83 E8 30 sub eax,30h
.if (WORD PTR[rcx] == 43)
000000013F1111AD 66 83 39 2B cmp word ptr [rcx],2Bh
000000013F1111B1 75 08 jne main+25h (013F1111BBh)
mov r10d,TRUE
000000013F1111B3 41 BA 01 00 00 00 mov r10d,1
.continue
000000013F1111B9 EB 0B jmp LCONT ;(013F1111C6h)
.endif
movsxd rdx, eax
000000013F1111BB 48 63 D0 movsxd rdx,eax
lea rax, QWORD PTR [r8+r8*4]
000000013F1111BE 4B 8D 04 80 lea rax,[r8+r8*4]
lea r8, QWORD PTR [rdx+rax*2]
000000013F1111C2 4C 8D 04 42 lea r8,[rdx+rax*2]
.endfor
000000013F1111C6 48 83 C1 02 LCONT: add rcx,2
000000013F1111CA 66 83 39 30 LSKIP: cmp word ptr [rcx],30h
000000013F1111CE 72 06 jb main+40h (013F1111D6h)
000000013F1111D0 66 83 39 39 cmp word ptr [rcx],39h
000000013F1111D4 76 D1 jbe main+11h (013F1111A7h)
000000013F1111D6
and here is one function to see a complete work of this version of JWasm:
UTF16toUTF32 PROC FRAME USES rbx rbp rdi rsi r12 r13 pSource:PTR WORD,nSourceLen:UINT_PTR,nSourceDone:PTR UINT_PTR,szTarget:PTR DWORD,nTargetMax:UINT_PTR
;//rsi=pSource
;//r13 =PTR to WORD pSource + nSourceLen * 2 (end of source)
;//rdi = pointer to DWORD destinu
;//r12 = end of DWORD destinu
;//ebx = DWORD cahr
mov rsi,rcx ;rsi=pSource
lea r13,[rsi+rdx*2] ;r13=pSource + nSourceLen * 2
mov rbp,nTargetMax ;nTargetMax in rbp
.if (!r9 && !rbp) ;!szTarget && !nTargetMax
xor edi,edi ;clear rdi
mov r12,MAXUINT_PTR ;0ffffffffh
.else
mov rdi,r9 ;destinu = szTarget
lea r12,[rdi+rbp*4] ;end of DWORD destinu
.endif
.for (¦rsi < r13 && rdi < r12¦rsi+=2)
movzx ebx,WORD PTR[rsi]
;//Surrogate pair. High surrogate.
.if (ebx >= 0d800h && ebx <= 0dbffh)
add rsi,2
.if (rsi >= r13)
sub rsi,2
.break
.endif
;//Low surrogate
.if (WORD PTR[rsi] >= 0dc00h && WORD PTR[rsi] <= 0dfffh)
sub ebx,0d800h
shl ebx,10
movzx rax,WORD PTR[rsi]
lea ebx, DWORD PTR [rbx+rax+2400h]
.else
.continue
.endif
.endif
.if (r9) ;szTarget
add rdi,4
mov [rdi],ebx
.else
add rdi,2
.endif
.endfor
.if (r8)
mov rax,rsi
sub rax,rcx
sar rax,1
mov [r8],rax
.endif
mov rax,rdi
sub rax,r9
sar rax,2
ret
UTF16toUTF32 ENDP
produces this:
UTF16toUTF32 PROC FRAME USES rbx rbp rdi rsi r12 r13 pSource:PTR WORD,nSourceLen:UINT_PTR,nSourceDone:PTR UINT_PTR,szTarget:PTR DWORD,nTargetMax:UINT_PTR
000000013F62B41E 48 89 5C 24 10 mov qword ptr [rsp+10h],rbx
000000013F62B423 48 89 6C 24 18 mov qword ptr [rsp+18h],rbp
000000013F62B428 48 89 7C 24 20 mov qword ptr [pSource],rdi
000000013F62B42D 56 push rsi
000000013F62B42E 41 54 push r12
000000013F62B430 41 55 push r13
;//rsi=pSource
;//r13 =PTR to WORD pSource + nSourceLen * 2 (end of source)
;//rdi = pointer to DWORD destinu
;//r12 = end of DWORD destinu
;//ebx = DWORD cahr
mov rsi,rcx ;rsi=pSource
000000013F62B432 48 8B F1 mov rsi,rcx
lea r13,[rsi+rdx*2] ;r13=pSource + nSourceLen * 2
000000013F62B435 4C 8D 2C 56 lea r13,[rsi+rdx*2]
mov rbp,nTargetMax ;nTargetMax in rbp
000000013F62B439 48 8B 6C 24 40 mov rbp,qword ptr [nTargetMax]
.if (!r9 && !rbp) ;!szTarget && !nTargetMax
000000013F62B43E 4D 85 C9 test r9,r9
000000013F62B441 75 10 jne UTF16toUTF32+35h (013F62B453h)
000000013F62B443 48 85 ED test rbp,rbp
000000013F62B446 75 0B jne UTF16toUTF32+35h (013F62B453h)
xor edi,edi ;clear rdi
000000013F62B448 33 FF xor edi,edi
mov r12,MAXUINT_PTR ;0ffffffffh
000000013F62B44A 49 C7 C4 FF FF FF FF mov r12,0FFFFFFFFFFFFFFFFh
.else
000000013F62B451 EB 07 jmp UTF16toUTF32+3Ch (013F62B45Ah)
mov rdi,r9 ;destinu = szTarget
000000013F62B453 49 8B F9 mov rdi,r9
lea r12,[rdi+rbp*4] ;end of DWORD destinu
000000013F62B456 4C 8D 24 AF lea r12,[rdi+rbp*4]
.endif
.for (¦rsi < r13 && rdi < r12¦rsi+=2)
000000013F62B45A EB 5D jmp UTF16toUTF32+9Bh (013F62B4B9h)
movzx ebx,WORD PTR[rsi]
000000013F62B45C 0F B7 1E movzx ebx,word ptr [rsi]
;//Surrogate pair. High surrogate.
.if (ebx >= 0d800h && ebx <= 0dbffh)
000000013F62B45F 81 FB 00 D8 00 00 cmp ebx,0D800h
000000013F62B465 72 3D jb UTF16toUTF32+86h (013F62B4A4h)
000000013F62B467 81 FB FF DB 00 00 cmp ebx,0DBFFh
000000013F62B46D 77 35 ja UTF16toUTF32+86h (013F62B4A4h)
add rsi,2
000000013F62B46F 48 83 C6 02 add rsi,2
.if (rsi >= r13)
000000013F62B473 49 3B F5 cmp rsi,r13
000000013F62B476 72 06 jb UTF16toUTF32+60h (013F62B47Eh)
sub rsi,2
000000013F62B478 48 83 EE 02 sub rsi,2
.break
000000013F62B47C EB 45 jmp UTF16toUTF32+0A5h (013F62B4C3h)
.endif
;//Low surrogate
.if (WORD PTR[rsi] >= 0dc00h && WORD PTR[rsi] <= 0dfffh)
000000013F62B47E 66 81 3E 00 DC cmp word ptr [rsi],0DC00h
000000013F62B483 72 1D jb UTF16toUTF32+84h (013F62B4A2h)
000000013F62B485 66 81 3E FF DF cmp word ptr [rsi],0DFFFh
000000013F62B48A 77 16 ja UTF16toUTF32+84h (013F62B4A2h)
sub ebx,0d800h
000000013F62B48C 81 EB 00 D8 00 00 sub ebx,0D800h
shl ebx,10
000000013F62B492 C1 E3 0A shl ebx,0Ah
movzx rax,WORD PTR[rsi]
000000013F62B495 48 0F B7 06 movzx rax,word ptr [rsi]
lea ebx, DWORD PTR [rbx+rax+2400h]
000000013F62B499 8D 9C 18 00 24 00 00 lea ebx,[rax+rbx+2400h]
.else
000000013F62B4A0 EB 02 jmp UTF16toUTF32+86h (013F62B4A4h)
.continue
000000013F62B4A2 EB 11 jmp UTF16toUTF32+97h (013F62B4B5h)
.endif
.endif
.if (r9) ;szTarget
000000013F62B4A4 4D 85 C9 test r9,r9
000000013F62B4A7 74 08 je UTF16toUTF32+93h (013F62B4B1h)
add rdi,4
000000013F62B4A9 48 83 C7 04 add rdi,4
mov [rdi],ebx
000000013F62B4AD 89 1F mov dword ptr [rdi],ebx
.else
000000013F62B4AF EB 04 jmp UTF16toUTF32+97h (013F62B4B5h)
add rdi,2
000000013F62B4B1 48 83 C7 02 add rdi,2
.endif
.endfor
000000013F62B4B5 48 83 C6 02 add rsi,2
000000013F62B4B9 49 3B F5 cmp rsi,r13
000000013F62B4BC 73 05 jae UTF16toUTF32+0A5h (013F62B4C3h)
000000013F62B4BE 49 3B FC cmp rdi,r12
000000013F62B4C1 72 99 jb UTF16toUTF32+3Eh (013F62B45Ch)
.if (r8)
000000013F62B4C3 4D 85 C0 test r8,r8
000000013F62B4C6 74 0C je UTF16toUTF32+0B6h (013F62B4D4h)
mov rax,rsi
000000013F62B4C8 48 8B C6 mov rax,rsi
sub rax,rcx
000000013F62B4CB 48 2B C1 sub rax,rcx
sar rax,1
000000013F62B4CE 48 D1 F8 sar rax,1
mov [r8],rax
000000013F62B4D1 49 89 00 mov qword ptr [r8],rax
.endif
mov rax,rdi
000000013F62B4D4 48 8B C7 mov rax,rdi
sub rax,r9
000000013F62B4D7 49 2B C1 sub rax,r9
sar rax,2
000000013F62B4DA 48 C1 F8 02 sar rax,2
ret
000000013F62B4DE 41 5D pop r13
000000013F62B4E0 41 5C pop r12
000000013F62B4E2 5E pop rsi
000000013F62B4E3 48 8B 5C 24 10 mov rbx,qword ptr [rsp+10h]
000000013F62B4E8 48 8B 6C 24 18 mov rbp,qword ptr [rsp+18h]
000000013F62B4ED 48 8B 7C 24 20 mov rdi,qword ptr [pSource]
000000013F62B4F2 C3 ret
UTF16toUTF32 ENDP