All right, I attach what you asked for, but probably you can just see from here:
004010EF CC int 3
mov ebx, 0FFFFh
004010F0 BB FF FF 00 00 mov ebx,0FFFFh
kmovw k1, ebx
004010F5 C5 F8 92 CB kmovw k1,ebx
vmovdqa32 zmm0 {k1} , ZMMWORD PTR Index
004010F9 62 F1 7D 49 6F 85 00 68 40 00 vmovdqa32 zmm0{k1},zmmword ptr Index (0406800h)[ebp]
; ************ TRANSPOSING CODE START ***********************
@anotherCol:
mov ebx, 0FFFFh
00401103 BB FF FF 00 00 mov ebx,0FFFFh
kmovw k1, ebx
00401108 C5 F8 92 CB kmovw k1,ebx
mov ebx, 0
0040110C BB 00 00 00 00 mov ebx,0
mov eax, 0
00401111 B8 00 00 00 00 mov eax,0
.while ebx<vertLoads
00401116 EB 19 jmp main+0B0h (0401131h)
-------------------------------------------------------------------------------
VPGATHERDD zmm1{k1}, [edx+zmm0]
00401118 62 F2 7D 49 90 8C 00 00 62 F1 7E vpgatherdd zmm1{k1},dword ptr [eax+zmm0+7EF16200h]
vmovdqu32 ZMMWORD PTR [edi+eax], zmm1
00401123 48 dec eax
00401124 7F 0C jg main+0B1h (0401132h)
00401126 38 43 81 cmp byte ptr [ebx-7Fh],al
add edx, REGWIDTH*COLS*4
00401129 C2 00 04 ret 400h
0040112C 00 00 add byte ptr [eax],al
--------------------------------------------------------------------------------
add eax, REGWIDTH*4
0040112E 83 C0 40 add eax,40h
.endw
Note this part:
-------------------------------------------------------------------------------
VPGATHERDD zmm1{k1}, [edx+zmm0]
00401118 62 F2 7D 49 90 8C 00 00 62 F1 7E vpgatherdd zmm1{k1},dword ptr [eax+zmm0+7EF16200h]
vmovdqu32 ZMMWORD PTR [edi+eax], zmm1
00401123 48 dec eax
00401124 7F 0C jg main+0B1h (0401132h)
00401126 38 43 81 cmp byte ptr [ebx-7Fh],al
add edx, REGWIDTH*COLS*4
00401129 C2 00 04 ret 400h
0040112C 00 00 add byte ptr [eax],al
--------------------------------------------------------------------------------
May be is easier to see from here:
