Hi there

Just to let you know that I am not bludgeoning and how far is AVX512 construction gone
there is still lot of tweaking to be done but the base has been done
I use
objconv for testing the code and here it is what has been done so far:
; Disassembly of file: simple.exe
; Mon Aug 31 06:27:26 2015
; Mode: 64 bits
; Syntax: MASM/ML64
; Instruction set: AVX-512, x64
option dotname
public Entry_point
_text SEGMENT BYTE 'CODE' ; section number 1
int 3 ; breakpoint or filler ; 40001000 _ CC
int 3 ; breakpoint or filler ; 40001001 _ CC
int 3 ; breakpoint or filler ; 40001002 _ CC
int 3 ; breakpoint or filler ; 40001003 _ CC
int 3 ; breakpoint or filler ; 40001004 _ CC
Entry_point PROC
; Note: Immediate operand could be made smaller by sign extension
jmp ?_001 ; 40001005 _ E9, 00000006
; Filling space: 6H
; Filler type: INT 3 Debug breakpoint
; db 0CCH, 0CCH, 0CCH, 0CCH, 0CCH, 0CCH
ALIGN 8
vaddpd zmm30, zmm29, zmm28, {rn-sae} ; 40001010 _ 62 01 95 10: 58. F4
vaddpd zmm30, zmm29, zmm28, {rd-sae} ; 40001016 _ 62 01 95 30: 58. F4
vaddpd zmm30, zmm29, zmm28, {ru-sae} ; 4000101C _ 62 01 95 50: 58. F4
vaddpd zmm30, zmm29, zmm28, {rz-sae} ; 40001022 _ 62 01 95 70: 58. F4
vaddpd xmm1, xmm2, xmm3 ; 40001028 _ C5 E9: 58. CB
vaddpd zmm1, zmm29, zmmword ptr [rdx+10H*40H] ; 4000102C _ 62 F1 95 40: 58. 4A, 10
vaddpd xmm1, xmm2, xmm3 ; 40001033 _ C5 E9: 58. CB
vaddpd xmm1, xmm29, xmmword ptr [rdx+40H*10H] ; 40001037 _ 62 F1 95 00: 58. 4A, 40
vaddpd xmm1, xmm29, xmmword ptr [rdx+40H*10H] ; 4000103E _ 62 F1 95 00: 58. 4A, 40
vaddpd xmm1, xmm29, xmmword ptr [rdx+20H*10H] ; 40001045 _ 62 F1 95 00: 58. 4A, 20
vaddpd xmm1, xmm29, xmmword ptr [rdx+20H*10H] ; 4000104C _ 62 F1 95 00: 58. 4A, 20
vaddpd zmm13, zmm18, zmmword ptr [rdx+10H*40H] ; 40001053 _ 62 71 ED 40: 58. 6A, 10
vaddpd xmm15, xmm24, xmmword ptr [rdx+40H*10H] ; 4000105A _ 62 71 BD 00: 58. 7A, 40
vaddpd xmm1, xmm2, xmm3 ; 40001061 _ C5 E9: 58. CB
vaddps ymm17, ymm17, dword ptr [rdx+400H] {1to8} {1to8}; 40001065 _ 62 E1 74 30: 58. 8A, 00000400
vaddps ymm7, ymm7, dword ptr [rdx+400H] {1to8} {1to8}; 4000106F _ 62 F1 44 38: 58. BA, 00000400
vaddps ymm27, ymm27, dword ptr [rdx+400H] {1to8} {1to8}; 40001079 _ 62 61 24 30: 58. 9A, 00000400
vaddps xmm13, xmm12, xmmword ptr [rdx+400H] ; 40001083 _ C5 18: 58. AA, 00000400
vaddpd xmm1, xmm2, xmm3 ; 4000108B _ C5 E9: 58. CB
vaddpd ymm1, ymm2, ymm29 ; 4000108F _ 62 91 ED 28: 58. CD
vaddpd xmm1, xmm2, xmm29 ; 40001095 _ 62 91 ED 08: 58. CD
vaddpd xmm1, xmm2, xmmword ptr [rdx+400H] ; 4000109B _ C5 E9: 58. 8A, 00000400
vaddps xmm1, xmm8, dword ptr [rdx+400H] {1to4} {1to4}; 400010A3 _ 62 F1 3C 18: 58. 8A, 00000400
vaddps xmm7, xmm2, dword ptr [rdx+400H] {1to4} {1to4}; 400010AD _ 62 F1 6C 18: 58. BA, 00000400
vaddps ymm8, ymm2, dword ptr [rdx+400H] {1to8} {1to8}; 400010B7 _ 62 71 6C 38: 58. 82, 00000400
vaddps ymm8, ymm2, dword ptr [rdx+400H] {1to8} {1to8}; 400010C1 _ 62 71 6C 38: 58. 82, 00000400
vaddps zmm30, zmm29, dword ptr [rdx+400H] {1to16} {1to16}; 400010CB _ 62 61 94 50: 58. B2, 00000400
vaddps zmm30, zmm29, dword ptr [rdx+400H] {1to16} {1to16}; 400010D5 _ 62 61 94 50: 58. B2, 00000400
vaddps zmm30, zmm29, dword ptr [rdx+400H] {1to16} {1to16}; 400010DF _ 62 61 94 50: 58. B2, 00000400
vaddps xmm30, xmm29, xmmword ptr [rdx+20H*10H] ; 400010E9 _ 62 61 94 00: 58. 72, 20
vaddps zmm30, zmm29, zmmword ptr [rdx+10H*40H] ; 400010F0 _ 62 61 94 40: 58. 72, 10
vaddps xmm30, xmm29, xmmword ptr [rdx+20H*10H] ; 400010F7 _ 62 61 94 00: 58. 72, 20
vaddps zmm30, zmm29, zmmword ptr [rdx+10H*40H] ; 400010FE _ 62 61 94 40: 58. 72, 10
vaddps zmm30, zmm29, dword ptr [rdx+400H] {1to16} {1to16}; 40001105 _ 62 61 94 50: 58. B2, 00000400
vaddps ymm18, ymm17, dword ptr [rdx+400H] {1to8} {1to8}; 4000110F _ 62 E1 74 30: 58. 92, 00000400
vaddps xmm13, xmm12, xmmword ptr [rdx+400H] ; 40001119 _ C5 18: 58. AA, 00000400
vaddps xmm30, xmm29, xmmword ptr [rdx+20H*10H] ; 40001121 _ 62 61 94 00: 58. 72, 20
vaddps zmm30, zmm29, zmmword ptr [rdx+10H*40H] ; 40001128 _ 62 61 94 40: 58. 72, 10
vaddpd zmm30, zmm29, qword ptr [rdx+400H] {1to8} {1to16}; 4000112F _ 62 61 95 50: 58. B2, 00000400
vaddpd zmm30, zmm29, qword ptr [rdx+400H] {1to8} {1to16}; 40001139 _ 62 61 95 50: 58. B2, 00000400
vmulps zmm1, zmm2, dword ptr [rax] {1to16} {1to16}; 40001143 _ 62 F1 EC 58: 59. 08
vaddps xmm30, xmm29, dword ptr [rdx+400H] {1to4} {1to4}; 40001149 _ 62 61 14 10: 58. B2, 00000400
vaddps xmm30, xmm29, dword ptr [rdx+400H] {1to4} {1to4}; 40001153 _ 62 61 14 10: 58. B2, 00000400
vaddps ymm30, ymm29, dword ptr [rdx+400H] {1to8} {1to8}; 4000115D _ 62 61 14 30: 58. B2, 00000400
vaddps ymm30, ymm29, dword ptr [rdx+400H] {1to8} {1to8}; 40001167 _ 62 61 14 30: 58. B2, 00000400
vaddps zmm30, zmm29, dword ptr [rdx+400H] {1to16} {1to16}; 40001171 _ 62 61 94 50: 58. B2, 00000400
vaddps zmm30, zmm29, dword ptr [rdx+400H] {1to16} {1to16}; 4000117B _ 62 61 94 50: 58. B2, 00000400
vaddpd zmm30, zmm29, qword ptr [rdx+400H] {1to8} {1to16}; 40001185 _ 62 61 95 50: 58. B2, 00000400
vaddps zmm30, zmm29, dword ptr [rdx+400H] {1to16} {1to16}; 4000118F _ 62 61 94 50: 58. B2, 00000400
vmulps zmm1, zmm2, dword ptr [rax] {1to16} {1to16}; 40001199 _ 62 F1 EC 58: 59. 08
vaddpd zmm30, zmm29, qword ptr [rdx] {1to8} {1to16}; 4000119F _ 62 61 95 50: 58. 32
vaddpd zmm30, zmm29, zmmword ptr [rdx+10H*40H] ; 400011A5 _ 62 61 95 40: 58. 72, 10
vaddpd zmm1, zmm29, zmmword ptr [rdx+10H*40H] ; 400011AC _ 62 F1 95 40: 58. 4A, 10
vaddpd zmm30, zmm29, qword ptr [rdx+400H] {1to8} {1to16}; 400011B3 _ 62 61 95 50: 58. B2, 00000400
kmovw k7, word ptr [rbp] ; 400011BD _ C5 F8: 90. 7D, 00
kmovb k2, byte ptr [rbx+rdx*2] ; 400011C2 _ C5 F9: 90. 14 53
kmovb byte ptr [rbx+rdx*2], k2 ; 400011C7 _ C5 F9: 91. 14 53
kmovw k2, word ptr [rbx+rdx*2] ; 400011CC _ C5 F8: 90. 14 53
kmovw word ptr [rbx+rdx*2], k2 ; 400011D1 _ C5 F8: 91. 14 53
kmovd k2, dword ptr [rbx+rdx*2] ; 400011D6 _ C4 E1 F9: 90. 14 53
kmovd dword ptr [rbx+rdx*2], k2 ; 400011DC _ C4 E1 F9: 91. 14 53
kmovq k2, qword ptr [rbx+rdx*2] ; 400011E2 _ C4 E1 F8: 90. 14 53
kmovq qword ptr [rbx+rdx*2], k2 ; 400011E8 _ C4 E1 F8: 91. 14 53
kmovb k1, k2 ; 400011EE _ C5 F9: 90. CA
kmovw k1, k2 ; 400011F2 _ C5 F8: 90. CA
kmovd k1, k2 ; 400011F6 _ C4 E1 F9: 90. CA
kmovq k1, k2 ; 400011FB _ C4 E1 F8: 90. CA
kmovb k2, ecx ; 40001200 _ C5 F9: 92. D1
kmovw k2, ecx ; 40001204 _ C5 F8: 92. D1
kmovd k2, ecx ; 40001208 _ C5 FB: 92. D1
kmovq k2, rcx ; 4000120C _ C4 E1 FB: 92. D1
kmovb ecx, k2 ; 40001211 _ C5 F9: 93. CA
kmovw ecx, k2 ; 40001215 _ C5 F8: 93. CA
kmovd ecx, k2 ; 40001219 _ C5 FB: 93. CA
kmovq rcx, k2 ; 4000121D _ C4 E1 FB: 93. CA
kmovb k2, byte ptr [rbx] ; 40001222 _ C5 F9: 90. 13
kmovb byte ptr [rbx], k2 ; 40001226 _ C5 F9: 91. 13
kmovw k2, word ptr [rbx] ; 4000122A _ C5 F8: 90. 13
kmovw word ptr [rbx], k2 ; 4000122E _ C5 F8: 91. 13
kmovd k2, dword ptr [rbx] ; 40001232 _ C4 E1 F9: 90. 13
kmovd dword ptr [rbx], k2 ; 40001237 _ C4 E1 F9: 91. 13
kmovq k2, qword ptr [rbx] ; 4000123C _ C4 E1 F8: 90. 13
kmovq qword ptr [rbx], k2 ; 40001241 _ C4 E1 F8: 91. 13
kmovb k2, byte ptr [rbx+rdx*2] ; 40001246 _ C5 F9: 90. 14 53
kmovb byte ptr [rbx+rdx*2], k2 ; 4000124B _ C5 F9: 91. 14 53
kmovw k2, word ptr [rbx+rdx*2] ; 40001250 _ C5 F8: 90. 14 53
kmovw word ptr [rbx+rdx*2], k2 ; 40001255 _ C5 F8: 91. 14 53
kmovd k2, dword ptr [rbx+rdx*2] ; 4000125A _ C4 E1 F9: 90. 14 53
kmovd dword ptr [rbx+rdx*2], k2 ; 40001260 _ C4 E1 F9: 91. 14 53
kmovq k2, qword ptr [rbx+rdx*2] ; 40001266 _ C4 E1 F8: 90. 14 53
kmovq qword ptr [rbx+rdx*2], k2 ; 4000126C _ C4 E1 F8: 91. 14 53
kmovb k3, byte ptr [rbp+rdi*8-1E240H] ; 40001272 _ C5 F9: 90. 9C FD, FFFE1DC0
kmovw k3, word ptr [rbp+rdi*8-1E240H] ; 4000127B _ C5 F8: 90. 9C FD, FFFE1DC0
kmovd k3, dword ptr [rbp+rdi*8-1E240H] ; 40001284 _ C4 E1 F9: 90. 9C FD, FFFE1DC0
kmovq k3, qword ptr [rbp+rdi*8-1E240H] ; 4000128E _ C4 E1 F8: 90. 9C FD, FFFE1DC0
kmovb byte ptr [rbp+rdi*8-1E240H], k3 ; 40001298 _ C5 F9: 91. 9C FD, FFFE1DC0
kmovw word ptr [rbp+rdi*8-1E240H], k3 ; 400012A1 _ C5 F8: 91. 9C FD, FFFE1DC0
kmovd dword ptr [rbp+rdi*8-1E240H], k3 ; 400012AA _ C4 E1 F9: 91. 9C FD, FFFE1DC0
kmovq qword ptr [rbp+rdi*8-1E240H], k3 ; 400012B4 _ C4 E1 F8: 91. 9C FD, FFFE1DC0
ret ; 400012BE _ C3
Entry_point ENDP