Another approach, no idea how fast it is:
include \masm32\MasmBasic\MasmBasic.inc
.data
Number OWORD 10000110111111101111111011111110100011101111111011111110111111101001111011111110111111101111111010111110111111101111111011111110y
.code
RotateLeft128 proc pNumber ; single shift left
mov eax, pNumber
movups xmm0, OWORD ptr [eax]
psllq xmm0, 1 ; shift left two qwords by one bit
movsx edx, byte ptr [eax+7] ; get sign of low qword
test sbyte ptr [eax+15], 128 ; get sign of high qword
movups OWORD ptr [eax], xmm0
.if Sign?
or byte ptr [eax], 1 ; rotate sign of high qword in
.endif
test edx, edx
.if Sign?
or byte ptr [eax+8], 1 ; set bit 0 of low qword
.endif
ret
RotateLeft128 endp
Init
Cls
deb 4, "in", b:Number:4 ; show number as binary, 4 dwords
invoke RotateLeft128, offset Number
deb 4, "out", b:Number:4
EndOfCode
Output:
in b:Number:4 10000110111111101111111011111110 10001110111111101111111011111110 10011110111111101111111011111110 10111110111111101111111011111110
out b:Number:4 00001101111111011111110111111101 00011101111111011111110111111101 00111101111111011111110111111101 01111101111111011111110111111101