Here is a test run with your (marginally modified) 1.asm ... 4.asm and other StrLen algos.

The 3.asm algo returns a wrong value, I have not found out why.

`Intel(R) Core(TM) i5-2450M CPU @ 2.50GHz (SSE4)`

6931 cycles for 100 * CRT strlen

5372 cycles for 100 * Masm32 StrLen

19701 cycles for 100 * Windows lstrlen

2006 cycles for 100 * MasmBasic Len

1579 cycles for 100 * Algo1

1712 cycles for 100 * Algo2

1103 cycles for 100 * Algo3

3209 cycles for 100 * Algo4

6941 cycles for 100 * CRT strlen

5393 cycles for 100 * Masm32 StrLen

19722 cycles for 100 * Windows lstrlen

1994 cycles for 100 * MasmBasic Len

1587 cycles for 100 * Algo1

1722 cycles for 100 * Algo2

1102 cycles for 100 * Algo3

3227 cycles for 100 * Algo4

6938 cycles for 100 * CRT strlen

5380 cycles for 100 * Masm32 StrLen

19692 cycles for 100 * Windows lstrlen

2011 cycles for 100 * MasmBasic Len

1580 cycles for 100 * Algo1

1713 cycles for 100 * Algo2

1101 cycles for 100 * Algo3

3205 cycles for 100 * Algo4

6970 cycles for 100 * CRT strlen

5404 cycles for 100 * Masm32 StrLen

19710 cycles for 100 * Windows lstrlen

2010 cycles for 100 * MasmBasic Len

1589 cycles for 100 * Algo1

1710 cycles for 100 * Algo2

1104 cycles for 100 * Algo3

3204 cycles for 100 * Algo4

14 bytes for CRT strlen

10 bytes for Masm32 StrLen

10 bytes for Windows lstrlen

10 bytes for MasmBasic Len

82 bytes for Algo1

114 bytes for Algo2

690 bytes for Algo3

761 bytes for Algo4

100 = eax CRT strlen

100 = eax Masm32 StrLen

100 = eax Windows lstrlen

100 = eax MasmBasic Len

100 = eax Algo1

100 = eax Algo2

14 = eax Algo3

100 = eax Algo4

This is the clear winner:

`; include 1j.asm`

Algo1 proc

mov eax,[esp+4]

mov ecx,eax ; much faster than [esp+4]

and eax,-16

and ecx,16-1

or edx,-1

shl edx,cl

xorps xmm0,xmm0

pcmpeqb xmm0,[eax]

add eax,16

pmovmskb ecx,xmm0

; xorps xmm0,xmm0 ; ??

and ecx,edx

jnz L2

L1:

movaps xmm1,[eax]

pcmpeqb xmm1,xmm0

pmovmskb ecx,xmm1

add eax,16

test ecx,ecx

jz L1

L2:

bsf ecx,ecx

lea eax,[eax+ecx-16]

sub eax,[esp+4]

retn 4

Algo1 endp