The crt functions don't bloat your executable. 1536 bytes:
include \masm32\include\masm32rt.inc
.code
AppName db "Masm32:", 0
hw db "Hello World", 0
start: MsgBox 0, addr hw, addr AppName, MB_OK
exit
call crt_memcpy
mov eax, ubyte$(al)
mov eax, sbyte$(cl)
mov eax, xbyte$(dl)
mov eax, uword$(ax)
mov eax, sword$(ax)
mov eax, xword$(ax)
mov eax, udword$(eax)
mov eax, sdword$(eax)
mov eax, xdword$(eax)
mov eax, uqword$(edx::eax)
mov eax, sqword$(edx::eax)
mov eax, xqword$(edx::eax)
mov eax, real4$(eax)
mov eax, real8$(edx::eax)
end start