For large strings this should be faster because reverts 16 bytes at a time whenever possible:
include \masm32\include\masm32rt.inc
.xmm
.data
crapBasicMsg db "Since the old days of QuickBASIC fast string manipulation has been a main focus. When it comes to "
db "reversing a (ascii) string, the simplest way would be to iterate through the string, and create a copy in "
db "reverse order. But is it possible to reverse the string without doing a copy? During my QuickBASIC time, I chose to simply swap the characters while iterating to the middle. Is there a faster way?",0
msgLen dd ?
remaining dd ?
itCounter dd ?
align 16
result db 1024 dup (0)
align 16
shflmask dd 0C0D0E0Fh,08090A0Bh, 04050607h,00010203h
.code
main proc
invoke crt_strlen, addr crapBasicMsg
mov msgLen, eax
xor edx, edx
mov ecx, 16
div ecx
mov remaining, edx
mov itCounter, eax
lea esi, crapBasicMsg
add esi, msgLen
lea edi, result
mov ecx, 0
lea eax, shflmask
movdqa xmm2, [eax]
.while ecx<itCounter
sub esi, 16
movdqu xmm1, [esi]
vpshufb xmm0, xmm1, xmm2
movdqa [edi], xmm0
add edi, 16
inc ecx
.endw
mov ecx, 0
.while ecx<remaining
dec esi
mov al, [esi]
mov [edi], al
inc edi
inc ecx
.endw
print addr result,13,10
invoke ExitProcess,0
main endp
end