i knew it was something like that, Hutch - lol
sinsi has the right idea, i think...
counter_begin LOOP_COUNT, HIGH_PRIORITY_CLASS
mov ebx, offset Source
mov eax, offset Dest
mov ecx, 4096/4
@@: mov dh,[ebx+12]
mov dl,[ebx+8]
shl edx,16
mov dh,[ebx+4]
mov dl,[ebx]
add ebx,16
mov [eax],edx
dec ecx
lea eax,[eax+4]
jnz @B
counter_end
it would help if the destination array is 4-aligned - maybe the source, too