News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

IOCP - 64-bit IPv6 Echo Server Stress Test

Started by aw27, November 21, 2017, 02:02:28 AM

Previous topic - Next topic

aw27

Input Output Completion Port 64-bit IPv6 Client/Server.

Launch first the Server then the Client (you can launch more than 1 Client). Each Client will launch 64 connecting threads.
This is based on the  Windows 7 SDK IOCP sample.

Server:


;\masm32\bin\ml64  -c -Zp8 iocpServer.asm
;\masm32\bin\link /ENTRY:main /SUBSYSTEM:console /MACHINE:X64 iocpServer.obj
OPTION casemap:NONE

include iocpHelper.inc
.data
heapHandle HANDLE ?
SemaphoreObject HANDLE ?

.code

ServerWorkerThread proc CompletionPortID : PTR
LOCAL CompletionPort : HANDLE
LOCAL BytesTransferred : DWORD
LOCAL PerHandleData : LPPER_HANDLE_DATA
LOCAL PerIoData : LPPER_IO_OPERATION_DATA
LOCAL SendBytes : DWORD
LOCAL RecvBytes : DWORD
LOCAL Flags : DWORD

sub rsp, 38h ; maximum 7 parameters + shadow space
and rsp, -16

mov CompletionPort, rcx

@whileLoop:
mov rcx, CompletionPort
lea rdx, BytesTransferred
lea r8, PerHandleData
lea r9, PerIoData
mov DWORD PTR [rsp+20h], INFINITE
call GetQueuedCompletionStatus
cmp eax, 0
jnz @F
errMacro err11
@@:
cmp BytesTransferred, 0
jnz @F1
mov r8, PerHandleData
mov rcx, (PER_HANDLE_DATA PTR [r8]).Socket
call closesocket
cmp eax, SOCKET_ERROR
jnz @F
errMacro err12, 1
@@:
mov rcx, heapHandle
mov edx,0
mov r8, PerHandleData
call HeapFree
mov rcx, heapHandle
mov edx,0
mov r8, PerIoData
call HeapFree
jmp @whileLoop
@F1:
mov r10, PerIoData
cmp (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV,0
jnz @F
mov eax, BytesTransferred
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV, eax
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND,0
jmp @F2
@@:
mov eax, BytesTransferred
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND, eax
@F2:
mov eax, (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV
cmp eax, (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND
jbe @F3
mov r10, PerIoData
lea rdi, (PER_IO_OPERATION_DATA PTR [r10]).Overlapped
mov al, 0
mov rcx,  sizeof OVERLAPPED
cld
rep stosb
lea rax, (PER_IO_OPERATION_DATA PTR [r10]).Buffer
mov ecx, (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND
add rax, rcx
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.buf, rax

mov ecx, (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV
sub ecx, (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.len, ecx
mov r11, PerHandleData
mov rcx, (PER_HANDLE_DATA PTR [r11]).Socket
lea rdx, (PER_IO_OPERATION_DATA PTR [r10]).DataBuf
mov r8, 1
lea r9, SendBytes
mov DWORD PTR [rsp+20h], 0
lea rax, (PER_IO_OPERATION_DATA PTR [r10]).Overlapped
mov [rsp+28h], rax
mov QWORD PTR [rsp+30h],0
call WSASend
cmp eax, SOCKET_ERROR
jnz @F
call WSAGetLastError
cmp eax, ERROR_IO_PENDING
jz @F
errMacro err13, 1
@@:
jmp @whileLoop
@F3:
mov r10, PerIoData
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV, 0
mov Flags,0
lea rdi, (PER_IO_OPERATION_DATA PTR [r10]).Overlapped
mov al, 0
mov rcx,  sizeof OVERLAPPED
cld
rep stosb
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.len, DATA_BUFSIZE
lea rax, (PER_IO_OPERATION_DATA PTR [r10]).Buffer
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.buf, rax

mov r11, PerHandleData
mov rcx, (PER_HANDLE_DATA PTR [r11]).Socket
lea rdx, (PER_IO_OPERATION_DATA PTR [r10]).DataBuf
mov r8, 1
lea r9, RecvBytes
lea rax, Flags
mov [rsp+20h], rax
lea rax, (PER_IO_OPERATION_DATA PTR [r10]).Overlapped
mov [rsp+28h], rax
mov QWORD PTR [rsp+30h],0
call WSARecv
cmp eax, SOCKET_ERROR
jnz @F
call WSAGetLastError
cmp eax, ERROR_IO_PENDING
jz @F
errMacro err9, 1
@@:
jmp @whileLoop
@exit1:
mov rcx, 0
call ExitThread
ServerWorkerThread endp

main proc uses rdi r12
LOCAL wsadata:WSADATA
LOCAL CompletionPort : HANDLE
LOCAL SystemInfo : SYSTEM_INFO
LOCAL ThreadId : DWORD
LOCAL Listen : SOCKET
LOCAL Accept : SOCKET
LOCAL InternetAddr : SOCKADDR_IN6
LOCAL PerHandleData : LPPER_HANDLE_DATA
LOCAL PerIoData : LPPER_IO_OPERATION_DATA
LOCAL RecvBytes : DWORD
LOCAL Flags : DWORD


sub rsp, 38h ; maximum 7 parameters + shadow space
and rsp, -16

mov cx, 202h
lea rdx, wsadata
call WSAStartup
cmp eax,0
jz @F
lea rcx, err1
mov edx, eax
call printf
jmp @exit2
@@:
mov rcx, NULL
mov edx, 1
mov r8d, 1
mov r9, NULL

call CreateSemaphoreA
cmp rax, 0
jnz @F
errMacro err21
@@:
mov SemaphoreObject, rax

mov rcx, INVALID_HANDLE_VALUE
mov rdx, NULL
mov r8, 0
mov r9, 0
call CreateIoCompletionPort
cmp rax, 0
jnz @F
errMacro err2
@@:
mov CompletionPort, rax

lea rcx, SystemInfo
call GetSystemInfo

lea rax, SystemInfo
mov r12d, (SYSTEM_INFO PTR [rax]).dwNumberOfProcessors
shl r12d, 1

mov ebx, 0
@loop1start:
cmp ebx, r12d
jae @loop1exit
mov rcx,NULL
mov rdx,0
lea r8, ServerWorkerThread
mov r9, CompletionPort
mov dword ptr [rsp+20h], 0
lea rax, ThreadId
mov [rsp+28h], rax
call CreateThread
cmp rax, 0
jnz @F
errMacro err3
@@:
mov rcx, rax
call CloseHandle
inc ebx
jmp @loop1start
@loop1exit:

mov rcx, AF_INET6
mov rdx, SOCK_STREAM
mov r8, 0
mov r9, NULL
mov DWORD PTR [rsp+20h], 0
mov DWORD PTR [rsp+28h], WSA_FLAG_OVERLAPPED
call WSASocketA
cmp rax, INVALID_HANDLE_VALUE ;INVALID_SOCKET
jnz @F
errMacro err4, 1
@@:
mov Listen, rax
lea rdi, InternetAddr
mov al,0
mov rcx, sizeof InternetAddr
cld
rep stosb

mov InternetAddr.sin6_family, AF_INET6
mov cx, PORT
call htons
mov InternetAddr.sin6_port, ax
; no need to fill in6addr_any because InternetAddr.sin6_addr already filled with zeros
mov rcx, Listen
lea rdx, InternetAddr
mov r8d, sizeof InternetAddr
call bind
cmp eax, SOCKET_ERROR
jnz @F
errMacro err5, 1
@@:
mov rcx, Listen
mov edx, SOMAXCONN
call listen
cmp eax, SOCKET_ERROR
jnz @F
errMacro err6, 1
@@:
call GetProcessHeap
cmp rax, 0
jnz @F
errMacro err10
@@:
mov heapHandle, rax

@loop2start:
mov rcx, Listen
mov rdx, NULL
mov r8, NULL
mov r9, NULL
mov dword ptr [rsp+20h], 0
call WSAAccept
cmp eax, SOCKET_ERROR
jnz @F
errMacro err7, 1
@@:
mov Accept, rax
mov rcx, heapHandle
mov edx, HEAP_ZERO_MEMORY
mov r8, sizeof PER_HANDLE_DATA
call HeapAlloc
cmp rax,0
jnz @F
errMacro err8
@@:
mov PerHandleData, rax
lea rcx, msg2
mov rdx, Accept
call printf

mov r8, PerHandleData
mov rax, Accept
mov (PER_HANDLE_DATA PTR [r8]).Socket, rax

mov rcx, Accept
mov rdx, CompletionPort
mov r8, PerHandleData
mov r9,0
call CreateIoCompletionPort
cmp rax, 0
jnz @F
errMacro err2
@@:
mov rcx, heapHandle
mov edx, HEAP_ZERO_MEMORY
mov r8, sizeof PER_IO_OPERATION_DATA
call HeapAlloc

cmp rax,0
jnz @F
errMacro err8
@@:
mov PerIoData, rax

mov r10, PerIoData
lea rdi, (PER_IO_OPERATION_DATA PTR [r10]).Overlapped
mov al, 0
mov rcx,  sizeof OVERLAPPED
cld
rep stosb
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND,0
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV,0
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.len, DATA_BUFSIZE
lea r9, (PER_IO_OPERATION_DATA PTR [r10]).Buffer
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.buf, r9

mov Flags,0

mov rcx, Accept
lea rdx, (PER_IO_OPERATION_DATA PTR [r10]).DataBuf
mov r8, 1
lea r9, RecvBytes
lea rax, Flags
mov [rsp+20h], rax
lea rax, (PER_IO_OPERATION_DATA PTR [r10]).Overlapped
mov [rsp+28h], rax
mov QWORD PTR [rsp+30h],0
call WSARecv
cmp eax, SOCKET_ERROR
jnz @F
call WSAGetLastError
cmp eax, ERROR_IO_PENDING
jz @F
errMacro err9, 1
@@:
jmp @loop2start

@exit1:
mov rcx, SemaphoreObject
call CloseHandle
call WSACleanup
@exit2:
mov ecx, 0
call ExitProcess
main endp

end


Client:


;\masm32\bin\ml64  -c -Zp8 iocpClient.asm
;\masm32\bin\link /ENTRY:main /SUBSYSTEM:console /MACHINE:X64 iocpClient.obj

OPTION casemap:NONE

include iocpHelper.inc

.data
g_ThreadInfo THREADINFO <>
g_hCleanupEvent WSAEVENT ?
g_Options OPTIONS <>
heapHandle HANDLE ?
SemaphoreObject HANDLE ?

.code

SendBuffer proc uses rdi r12 nThreadNum:sdword, outbuf:PTR
LOCAL nTotalSend : SDWORD
LOCAL bufp : PTR

sub rsp, 20h
and rsp, -16

mov nThreadNum, ecx
mov outbuf, rdx
mov bufp, rdx
mov nTotalSend,0

mov r12d, g_Options.nBufSize
@loopStart:
cmp r12d, nTotalSend
jbe @loopEnd
mov ecx, nThreadNum
lea rdi, g_ThreadInfo.sd
lea rdi, [rdi+rcx*sizeof SOCKET]
mov rcx, [rdi]
mov rdx, bufp
mov r8d, g_Options.nBufSize
sub r8d, nTotalSend
mov r9d, 0
call send
cmp eax, SOCKET_ERROR
jnz @F
errMacro err19,1
@@:
cmp eax, 0
jnz @F
msgPrint msg3, nThreadNum
jmp @exit1
@@:
add nTotalSend, eax
add bufp, rax
jmp @loopStart
@loopEnd:
mov eax, 1
ret
@exit1:
mov eax, 0
ret
SendBuffer endp

RecvBuffer proc uses rdi r12 nThreadNum:sdword, inbuf:PTR
LOCAL nTotalRecv : SDWORD
LOCAL bufp : PTR

sub rsp, 20h
and rsp, -16

mov nThreadNum, ecx
mov inbuf, rdx
mov bufp, rdx
mov nTotalRecv,0

mov r12d, g_Options.nBufSize
@loopStart:
cmp r12d, nTotalRecv
jbe @loopEnd
mov ecx, nThreadNum
lea rdi, g_ThreadInfo.sd
lea rdi, [rdi+rcx*sizeof SOCKET]
mov rcx, [rdi]
mov rdx, bufp
mov r8d, g_Options.nBufSize
sub r8d, nTotalRecv
mov r9d, 0
call recv
cmp eax, SOCKET_ERROR
jnz @F
errMacro err20,1
@@:
cmp eax, 0
jnz @F
msgPrint msg3, nThreadNum
jmp @exit1
@@:
add nTotalRecv, eax
add bufp, rax
jmp @loopStart
@loopEnd:
mov eax, 1
ret
@exit1:
mov eax, 0
ret
RecvBuffer endp

EchoThread proc uses rbx rsi rdi r12 lpParameter : PTR
LOCAL inbuf : PTR
LOCAL outbuf : PTR
LOCAL nThreadNum : SDWORD

sub rsp, 20h
and rsp, -16

mov lpParameter, rcx
mov inbuf,0
mov outbuf,0
mov eax, SDWORD PTR [rcx]
mov nThreadNum, eax

msgPrint msg1, nThreadNum

mov rcx, heapHandle
mov edx, HEAP_ZERO_MEMORY
mov r8d, g_Options.nBufSize
call HeapAlloc
cmp rax,0
jnz @F
errMacro err8
@@:
mov inbuf, rax
mov rcx, heapHandle
mov edx, HEAP_ZERO_MEMORY
mov r8d, g_Options.nBufSize
call HeapAlloc
cmp rax,0
jnz @F
errMacro err8
@@:
mov outbuf, rax

mov rdi, outbuf
mov al, BYTE PTR nThreadNum
mov ecx,  g_Options.nBufSize
cld
rep stosb
@loopStart:
mov ecx, nThreadNum
mov rdx, outbuf
call SendBuffer
cmp eax, 0
je @loopEnd
mov ecx, nThreadNum
mov rdx, inbuf
call RecvBuffer
cmp eax,0
je @loopEnd
mov rsi, inbuf
mov rax, qword ptr [rsi]
mov rdi, outbuf
mov rbx, qword ptr [rdi]
cmp rax, rbx
jnz @err
mov eax, g_Options.nBufSize
sub eax, 8
add rsi, rax
add rdi, rax
mov rax, qword ptr [rsi]
mov rbx, qword ptr [rdi]
cmp rax, rbx
jnz @err
msgPrint msg4, nThreadNum
jmp @loopStart
@err:
msgPrint msg5, nThreadNum
@loopEnd:

@exit1:
cmp inbuf,0
jz @F
mov rcx, heapHandle
mov edx,0
mov r8, inbuf
call HeapFree
@@:
cmp outbuf,0
jz @F
mov rcx, heapHandle
mov edx,0
mov r8, outbuf
call HeapFree
@@:
mov rcx, 0
call ExitThread
EchoThread endp

CreateConnectedSocket proc uses rdi r12 nThreadNum : dword
LOCAL hints : addrinfo
LOCAL addr_srv : PTR addrinfo

sub rsp, 20h
and rsp, -16

mov nThreadNum, ecx

lea rdi, hints
mov al, 0
mov rcx,  sizeof addrinfo
cld
rep stosb
      mov hints.ai_flags, 0
mov hints.ai_family, AF_INET6
mov hints.ai_socktype, SOCK_STREAM
mov hints.ai_protocol, IPPROTO_TCP
lea rcx, g_Options.szHostname
lea rdx, g_Options.port
lea r8, hints
lea r9, addr_srv
call getaddrinfo
cmp eax, 0
jz @F
mov r12d, eax
msgPrint err15, r12d
jmp @exit1
@@:
mov r12, addr_srv
mov ecx, (addrinfo PTR [r12]).ai_family
mov edx, (addrinfo PTR [r12]).ai_socktype
mov r8d, (addrinfo PTR [r12]).ai_protocol
call socket
mov ecx, nThreadNum
lea rdi, g_ThreadInfo.sd
lea rdi, [rdi+rcx*sizeof SOCKET]
mov [rdi], rax
cmp rax, INVALID_SOCKET
jnz @F
errMacro err16, 1
@@:
mov rcx, [rdi]
mov rdx, (addrinfo PTR [r12]).ai_addr
mov r8, (addrinfo PTR [r12]).ai_addrlen
call connect
cmp eax, SOCKET_ERROR
jnz @F
errMacro err17, 1
@@:
mov rcx, r12
call freeaddrinfo
mov eax, 1
ret
@exit1:
mov eax, 0
ret
CreateConnectedSocket endp

main proc uses rbx rsi rdi r12
LOCAL nThreadNum[MAXTHREADS] : SDWORD
LOCAL wsadata : WSADATA
LOCAL bInitError : DWORD
LOCAL ThreadId : DWORD

sub rsp, 30h
and rsp, -16

mov ebx, 0
mov rsi, offset g_ThreadInfo.sd
mov rdi, offset g_ThreadInfo.hThread
lea r11, nThreadNum
@forLoop1Start:
cmp ebx, MAXTHREADS
jae @forLoop1End
mov rax, INVALID_SOCKET
mov [rsi], rax
mov [rdi], rax
mov DWORD PTR [r11], 0
add r11, sizeof SDWORD
add rsi, 8
add rdi, 8
inc ebx
@forLoop1End:

mov g_hCleanupEvent, WSA_INVALID_EVENT

mov cx, 202h
lea rdx, wsadata
call WSAStartup
cmp eax,0
jz @F
mov r12d, eax
msgPrint err1, r12d
jmp @exit2
@@:
call WSACreateEvent
cmp rax, WSA_INVALID_EVENT
jnz @F
errMacro err14, 1
@@:
mov g_hCleanupEvent, rax

mov rcx, NULL
mov edx, 1
mov r8d, 1
mov r9, NULL
call CreateSemaphoreA
cmp rax, 0
jnz @F
errMacro err21
@@:
mov SemaphoreObject, rax

call GetProcessHeap
cmp rax, 0
jnz @F
errMacro err10
@@:
mov heapHandle, rax

mov ebx,0
mov r12, 0
mov bInitError, 0
@forLoop2Start:
cmp ebx, g_Options.nTotalThreads

jae @forLoop2End
mov ecx, ebx
call CreateConnectedSocket
cmp eax, 1
jnz @F
lea r11, nThreadNum
add r11, r12
mov DWORD PTR [r11], ebx
mov rcx,NULL
mov rdx,0
lea r8, EchoThread
mov r9, r11
mov dword ptr [rsp+20h], 0
lea rax, ThreadId
mov [rsp+28h], rax
call CreateThread

cmp rax, NULL
jnz @F
call GetLastError
mov r12d, eax
msgPrint err18, r12d
jmp @forLoop2End
@@:
lea rdi, g_ThreadInfo.hThread
add rdi, r12
mov [rdi], rax
inc ebx
add r12, sizeof SDWORD
mov rcx, 1000
call Sleep ; let's give some time here
jmp @forLoop2Start
@forLoop2End:
mov ecx, g_Options.nTotalThreads
mov rdx, offset g_ThreadInfo.hThread
mov r8d, TRUE
mov r9d, INFINITE
call WaitForMultipleObjects
mov ecx, 1

lea rdx, g_hCleanupEvent
mov r8d, TRUE
mov r9d, WSA_INFINITE
mov DWORD PTR [rsp+20h], FALSE
call WSAWaitForMultipleEvents ; crash
mov rcx, g_hCleanupEvent
call WSACloseEvent
@exit1:
mov rcx, SemaphoreObject
call CloseHandle
call WSACleanup
@exit2:
mov ecx, 0
call ExitProcess
main endp

end



six_L

Say you, Say me, Say the codes together for ever.


six_L

hello, aw27
i translated your codes into the uasm64 styles,but using invaild address  had happended on 1-WSARecv.
i can't find where were the errors.
QuoteUASM v2.46, Dec  4 2017, Masm-compatible assembler.
Portions Copyright (c) 1992-2002 Sybase, Inc. All Rights Reserved.
Source code is available under the Sybase Open Watcom Public License.

iocpS_2.asm: 361 lines, 3 passes, 164 ms, 0 warnings, 0 errors
Microsoft (R) Incremental Linker Version 10.00.40219.01
Copyright (C) Microsoft Corporation.  All rights reserved.
option casemap:none
option frame:auto
option win64:3

include \UASM64\include\windows.inc

includelib \UASM64\Lib\msvcrt.lib
includelib \UASM64\Lib\kernel32.lib
includelib \UASM64\Lib\user32.lib
includelib \UASM64\Lib\ws2_32.lib

PORT EQU 5150
DATA_BUFSIZE EQU 8192
MAXTHREADS EQU 64
WAIT_OBJECT_0 EQU 0

SOCKADDR_IN6 STRUCT
sin6_family WORD ?
sin6_port WORD ?
sin6_flowinfo   DWORD ?
sin6_addr BYTE 16 DUP (?)
sin6_scope_id   DWORD ?
SOCKADDR_IN6 ENDS

LPPER_HANDLE_DATA TYPEDEF PTR PER_HANDLE_DATA
PER_HANDLE_DATA STRUCT
Socket SOCKET ?
PER_HANDLE_DATA ENDS

LPPER_IO_OPERATION_DATA TYPEDEF PTR PER_IO_OPERATION_DATA
PER_IO_OPERATION_DATA STRUCT
Overlapped OVERLAPPED <>
DataBuf WSABUF <>
Buffer SBYTE DATA_BUFSIZE dup (?)
BytesSEND DWORD ?
BytesRECV DWORD ?
PER_IO_OPERATION_DATA ENDS

THREADINFO STRUCT
hThread HANDLE MAXTHREADS dup (?)
sd SOCKET MAXTHREADS dup (?)
THREADINFO ENDS

OPTIONS STRUCT
szHostname db "localhost",0
port db "5150",0
nTotalThreads SDWORD 64
nBufSize SDWORD 4096
OPTIONS ENDS

sockaddr STRUCT
sa_family WORD ?
sa_data SBYTE 14 DUP (?)
sockaddr ENDS

addrinfo STRUCT
ai_flags SDWORD ?
ai_family SDWORD ?
ai_socktype SDWORD ?
ai_protocol SDWORD ?
ai_addrlen QWORD ?
ai_canonname LPVOID ?
ai_addr LPVOID ?
ai_next LPVOID ?
addrinfo ENDS

printf PROTO :PTR, :VARARG

.data
;extern in6addr_any:oword
msg2 db "Socket %d got connected",10,0

.data?
heapHandle HANDLE ?
SemaphoreObject HANDLE ?

.code

ErrorMessage Proc USES rsi rdi lpCaption:QWORD,nFlag:BOOL
Local lpErrorMessage:QWORD

invoke WaitForSingleObject,SemaphoreObject,1000
cmp eax, WAIT_OBJECT_0
jnz @1

.if nFlag==TRUE
call WSAGetLastError
.else
call GetLastError
.endif
mov edi,Eax
lea rsi,lpErrorMessage
invoke FormatMessage, FORMAT_MESSAGE_ALLOCATE_BUFFER or FORMAT_MESSAGE_FROM_SYSTEM, NULL, edi, LANG_NEUTRAL,rsi,0,NULL
invoke printf,CStr("%s:%s",13,10),lpCaption,lpErrorMessage
@1:
invoke ReleaseSemaphore,SemaphoreObject,1,NULL
invoke LocalFree, lpErrorMessage
ret   

ErrorMessage EndP

ServerWorkerThread proc CompletionPortID : PTR
LOCAL CompletionPort : HANDLE
LOCAL BytesTransferred : DWORD
LOCAL PerHandleData : LPPER_HANDLE_DATA
LOCAL PerIoData : LPPER_IO_OPERATION_DATA
LOCAL SendBytes : DWORD
LOCAL RecvBytes : DWORD
LOCAL Flags : DWORD

mov rcx,CompletionPortID
mov CompletionPort, rcx

@whileLoop:
invoke GetQueuedCompletionStatus,CompletionPort,addr BytesTransferred,addr PerHandleData,PerIoData,INFINITE
cmp eax, 0
jnz @F
invoke ErrorMessage,CStr("GetQueuedCompletionStatus"),FALSE
;jmp @exit1

@@:
cmp BytesTransferred, 0
jnz @F1
mov r8, PerHandleData
invoke closesocket,(PER_HANDLE_DATA PTR [r8]).Socket
cmp eax, SOCKET_ERROR
jnz @F
invoke ErrorMessage,CStr("closesocket"),TRUE
;jmp @exit1
@@:
invoke HeapFree,heapHandle,0,PerHandleData
invoke HeapFree,heapHandle,0,PerIoData
jmp @whileLoop
@F1:
mov r10, PerIoData
cmp (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV,0
jnz @F
mov eax, BytesTransferred
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV, eax
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND,0
jmp @F2
@@:
mov eax, BytesTransferred
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND, eax
@F2:
mov eax, (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV
cmp eax, (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND
jbe @F3
mov r10, PerIoData
invoke RtlZeroMemory,ADDR (PER_IO_OPERATION_DATA PTR [r10]).Overlapped, sizeof OVERLAPPED

lea rax, (PER_IO_OPERATION_DATA PTR [r10]).Buffer
mov ecx, (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND
add rax, rcx
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.buf, rax

mov ecx, (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV
sub ecx, (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.len, ecx
mov r11, PerHandleData
invoke WSASend,(PER_HANDLE_DATA PTR [r11]).Socket,addr (PER_IO_OPERATION_DATA PTR [r10]).DataBuf,\
1,addr SendBytes,0,addr (PER_IO_OPERATION_DATA PTR [r10]).Overlapped,0
cmp eax, SOCKET_ERROR
jnz @F
call WSAGetLastError
cmp eax, ERROR_IO_PENDING
jz @F
invoke ErrorMessage,CStr("WSASend"),TRUE
;jmp @exit1
@@:
jmp @whileLoop
@F3:
mov r10, PerIoData
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV, 0
mov Flags,0
invoke RtlZeroMemory,ADDR (PER_IO_OPERATION_DATA PTR [r10]).Overlapped, sizeof OVERLAPPED

mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.len, DATA_BUFSIZE
lea rax, (PER_IO_OPERATION_DATA PTR [r10]).Buffer
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.buf, rax

mov r11, PerHandleData
invoke WSARecv,(PER_HANDLE_DATA PTR [r11]).Socket,addr (PER_IO_OPERATION_DATA PTR [r10]).DataBuf,\
1,addr RecvBytes,addr Flags,addr (PER_IO_OPERATION_DATA PTR [r10]).Overlapped,0 
cmp eax, SOCKET_ERROR
jnz @F
call WSAGetLastError
cmp eax, ERROR_IO_PENDING
jz @F
invoke ErrorMessage,CStr("1-WSARecv"),TRUE
;jmp @exit1
@@:
jmp @whileLoop
@exit1:
invoke ExitThread,NULL
ServerWorkerThread endp

main proc uses rdi r12
LOCAL wsadata:WSADATA
LOCAL CompletionPort : HANDLE
LOCAL SystemInfo : SYSTEM_INFO
LOCAL ThreadId : DWORD
LOCAL Listen : SOCKET
LOCAL Accept : SOCKET
LOCAL InternetAddr : SOCKADDR_IN6
LOCAL PerHandleData : LPPER_HANDLE_DATA
LOCAL PerIoData : LPPER_IO_OPERATION_DATA
LOCAL RecvBytes : DWORD
LOCAL Flags : DWORD

invoke WSAStartup,0202h,ADDR wsadata
cmp eax,0
jz @F
invoke ErrorMessage,CStr("WSAStartup"),TRUE
jmp @exit2
@@:
invoke CreateSemaphoreA, NULL,1,1,NULL
cmp rax, 0
jnz @F
invoke ErrorMessage,CStr("CreateSemaphoreA"),FALSE
;jmp @exit1
@@:
mov SemaphoreObject, rax

invoke CreateIoCompletionPort,INVALID_HANDLE_VALUE,NULL,NULL,NULL
cmp rax, 0
jnz @F
invoke ErrorMessage,CStr("CreateIoCompletionPort"),FALSE
;jmp @exit1
@@:
mov CompletionPort, rax

invoke GetSystemInfo,addr SystemInfo

lea rax, SystemInfo
mov r12d, (SYSTEM_INFO PTR [rax]).dwNumberOfProcessors
shl r12d, 1

mov ebx, 0
@loop1start:
cmp ebx, r12d
jae @loop1exit
invoke CreateThread,NULL,NULL,addr ServerWorkerThread,CompletionPort,0,addr ThreadId
cmp rax, 0
jnz @F
invoke ErrorMessage,CStr("CreateThread"),FALSE
;jmp @exit1
@@:
invoke CloseHandle,rax
inc ebx
jmp @loop1start
@loop1exit:

invoke WSASocket,AF_INET6,SOCK_STREAM,0,NULL,0,WSA_FLAG_OVERLAPPED
cmp rax, INVALID_HANDLE_VALUE ;INVALID_SOCKET
jnz @F
invoke ErrorMessage,CStr("WSASocket"),TRUE
;jmp @exit1
@@:
mov Listen, rax
invoke RtlZeroMemory,ADDR InternetAddr, sizeof InternetAddr

mov InternetAddr.sin6_family, AF_INET6
invoke htons,PORT
mov InternetAddr.sin6_port, ax
; no need to fill in6addr_any because InternetAddr.sin6_addr already filled with zeros

invoke bind,Listen,ADDR InternetAddr,sizeof InternetAddr
cmp eax, SOCKET_ERROR
jnz @F
invoke ErrorMessage,CStr("bind"),TRUE
;jmp @exit1
@@:
invoke listen,Listen,SOMAXCONN
cmp eax, SOCKET_ERROR
jnz @F
invoke ErrorMessage,CStr("listen"),TRUE
;jmp @exit1
@@:
invoke GetProcessHeap
cmp rax, 0
jnz @F
invoke ErrorMessage,CStr("GetProcessHeap"),FALSE
;jmp @exit1
@@:
mov heapHandle, rax

@loop2start:
invoke WSAAccept,Listen,NULL,NULL,NULL,0
cmp eax, SOCKET_ERROR
jnz @F
invoke ErrorMessage,CStr("WSAAccept"),TRUE
;jmp @exit1
@@:
mov Accept, rax
invoke HeapAlloc,heapHandle,HEAP_ZERO_MEMORY,sizeof PER_HANDLE_DATA
cmp rax,0
jnz @F
invoke ErrorMessage,CStr("HeapAlloc"),FALSE
;jmp @exit1
@@:
mov PerHandleData, rax
invoke printf,ADDR msg2,Accept

mov r8, PerHandleData
mov rax, Accept
mov (PER_HANDLE_DATA PTR [r8]).Socket, rax

invoke CreateIoCompletionPort,Accept,CompletionPort,PerHandleData,0
cmp rax, 0
jnz @F
invoke ErrorMessage,CStr("CreateIoCompletionPort"),FALSE
;jmp @exit1
@@:
invoke HeapAlloc,heapHandle,HEAP_ZERO_MEMORY,sizeof PER_IO_OPERATION_DATA
cmp rax,0
jnz @F
invoke ErrorMessage,CStr("HeapAlloc"),FALSE
;jmp @exit1
@@:
mov PerIoData, rax

mov r10, PerIoData
invoke RtlZeroMemory,ADDR (PER_IO_OPERATION_DATA PTR [r10]).Overlapped, sizeof OVERLAPPED

mov (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND,0
mov (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV,0
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.len, DATA_BUFSIZE
lea rax, (PER_IO_OPERATION_DATA PTR [r10]).Buffer
mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.buf, rax
mov Flags,0
invoke WSARecv,Accept,ADDR (PER_IO_OPERATION_DATA PTR [r10]).DataBuf,1,ADDR RecvBytes,ADDR Flags,\
ADDR (PER_IO_OPERATION_DATA PTR [r10]).Overlapped,0
cmp eax, SOCKET_ERROR
jnz @F
call WSAGetLastError
cmp eax, ERROR_IO_PENDING
jz @F
invoke ErrorMessage,CStr("2-WSARecv"),TRUE
;jmp @exit1
@@:
jmp @loop2start

@exit1:
invoke CloseHandle,SemaphoreObject
invoke WSACleanup
@exit2:
invoke ExitProcess,NULL
main endp

end

Quote\UASM64\bin\uasm64 -c -win64 %name%.asm
    \UASM64\bin\Link /ENTRY:main /SUBSYSTEM:console /MACHINE:X64 %name%.obj
Say you, Say me, Say the codes together for ever.

aw27

A fast look shows that at least this is not according to the original:
invoke GetQueuedCompletionStatus,CompletionPort,addr BytesTransferred,addr PerHandleData,PerIoData,INFINITE

should be addr PerIoData.

Debugging is indeed hard.  :(

six_L

#5
HI,AW27
thanks you respone.
the error still there.
Quotemov PerIoData, rax   
   ;INT 3
   mov r10, PerIoData
   invoke RtlZeroMemory,ADDR (PER_IO_OPERATION_DATA PTR [r10]).Overlapped, sizeof OVERLAPPED

   mov (PER_IO_OPERATION_DATA PTR [r10]).BytesSEND,0
   mov (PER_IO_OPERATION_DATA PTR [r10]).BytesRECV,0
   mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.len, DATA_BUFSIZE
   lea rax, (PER_IO_OPERATION_DATA PTR [r10]).Buffer
   mov (PER_IO_OPERATION_DATA PTR [r10]).DataBuf.buf, rax
   mov Flags,0

   invoke WSARecv,Accept,ADDR (PER_IO_OPERATION_DATA PTR [r10]).DataBuf,1,ADDR RecvBytes,ADDR Flags,\
      ADDR (PER_IO_OPERATION_DATA PTR [r10]).Overlapped,0
   cmp eax, SOCKET_ERROR
   jnz @F
   call WSAGetLastError
   cmp eax, ERROR_IO_PENDING
   jz @F
   invoke ErrorMessage,CStr("2-WSARecv"),TRUE
now all is OK.
option fieldalign:16
;option frame:auto

Say you, Say me, Say the codes together for ever.

aw27

option fieldalign:8 should be OK as well.
I am not sure why option frame:auto causes problems, may be you have to check with the UASM team.

six_L

Quoteoption fieldalign:8 should be OK as well.
it's OK.
Say you, Say me, Say the codes together for ever.

Raistlin

#8
@Aw27 & other IOCP enthusiasts - It's a real shame there is'nt much on IOCP / Multi-threaded anywhere.
The articles worthwhile are less than 10 on the entire Internet. Believe me I searched. What a struggle :(

I too, have been playing around with an old example as framework for my own and after 20 days got it working reasonably well in 32-bit ASM (IPv4).
For those interested: The original code has unsubstantiated claims and blatant logic errors all over the place. Ex: See code comments at the code start
and then look at the equates, code logic etc. - What a mess!, It was a real issue to clean it up (understatement)
(see:http://www.winsocketdotnetworkprogramming.com/winsock2programming/winsock2advancedscalableapp6b.html)

Anyhow, was wondering if anyone has managed to get around the single WSARecv post per thread issue.
Interested in the simplest algorithmic solution that doesn't cause thread/connection stalls or at least some advise please.

At the moment Aw27 and my examples will deal with multiple connections easily - but only make progress on 1 connection
should a client continuously send. It seems from what I've read, that there would be a need to "serialize" the
read/socket buffers in order to get correct send order or some such? The problem/pseudo solution explained here (but not very well):
(see: http://www.serverframework.com/handling-multiple-pending-socket-read-and-write-operations.html)

The only code example I could find, that actually attempts/succeeds to solve the problem, as per description, on the entire Internet in a reasonable way is:
(see: https://www.codeproject.com/Articles/10330/A-simple-IOCP-Server-Client-Class) Run the demo and look at send all feature.... 

Thanx
Raistlin         

Are you pondering what I'm pondering? It's time to take over the world ! - let's use ASSEMBLY...

aw27

hi Raistlin,

I think we are talking about an "out of order" potential problem, which will not occur with the simple echo server example.
This may happen, for example, when 2 server working threads are handling requests from the same client socket, and the second thread entering into action finishes before the first one.
The first link you provided is interesting, they also provide source code to download (you may not have noticed that). I did not investigate but I think you have enough to solve the problem. I have bookmarked it.   :t


Raistlin

Ok - well after some more (and more) reading  :( - I think I've got the main idea:

Statement:
IOCP is guaranteed to do operation in FIFO <-- however in multi-threaded scenarios (due to thread contention)
the order of the de-queued overlapped operation is almost guaranteed to be "out-of-order" [for socket operations, not necessarily file ops)
For a confusing explanation see:
(firstly)  https://msdn.microsoft.com/en-us/library/windows/desktop/aa365198(v=vs.85).aspx
(secondly) https://stackoverflow.com/questions/27955812/why-are-i-o-completion-port-packets-queued-in-fifo-order-if-they-may-be-dequeued
(thirdly) https://www.apriorit.com/dev-blog/412-win-api-programming-iocp

Comment: re: Statement (above) - sockets context
This is not a problem if we do a single read & write per socket AND per singular worker thread (the end). However this does not really take
advantage of the IOCP architecture and you end up with performance equal to event based asynchronous I/O & procedure call (APC).
For some metrics see: http://www.winsocketdotnetworkprogramming.com/winsock2programming/winsock2advancedscalableapp6a.html
Other opinions saying the same : https://stackoverflow.com/questions/32053666/how-to-ensure-thread-safe-in-iocp-receive

Alternative 1
Specifically a single read and multiple writes without directly sequencing read/write operations is possible. This would increase performance
moderately over event based (APC). A pending write list (linked list) is used to queue a shared "pending_write_list" across all threads (locking
a pending write list critical section) and processing ALL the writes in a per single worker thread manner. Unfortunately this creates the scenario
where only a single thread ends up handling the write queue in practice, especially when LARGE outbound writes are needed. The other threads
sit idle as the queue is being emptied.
For relevant explanation see : https://accu.org/index.php/journals/1956
For a code example - refer again to the convoluted and erroneous: http://www.winsocketdotnetworkprogramming.com/winsock2programming/winsock2advancedscalableapp6b.html

Alternative 2 Possibly close to optimal
Assign read and write sequence numbers atomically on a per socket operation (your buffer object containing the overlapped structure).
En-queue these multiple read or write buffers per socket in an independent pending_read and pending_write list (shared across threads).
Sort the reads and write lists per socket and per sequence number ascending within the list. Per single worker thread, process a threshold
(0 to maximum tolerance) of read and write ops. The process will de-queue on a list, only if the sequence number per socket - has a sequence number + 1
for that socket also in the list OR that sequence number + 1 is the next assignable sequence number for that operation - on that socket.
Else it should move on to the next socket in the list, as outstanding reads or writes have not as yet arrived or a socket error occurred.

Any comments - corrections?


Are you pondering what I'm pondering? It's time to take over the world ! - let's use ASSEMBLY...

aw27

I will not dig deeper into this right now, I have no projects involving high performance scalable servers, but will follow whatever is dropped here.

Raistlin

@aw27 - LOL  :biggrin: Perhaps a collaboration is in order ?

THE MAIN PROBLEM, MY SIDE: Sockets-Multi-Threaded with IOCP & Normal Debugging Techniques DON'T MIX.  :(

This all feels like I'am doing the devil's work. Again, using the devil narrative, I must be close to hell - it's just the multi-read/write thing to go.
But then again, I might be mistaken............... has happened before.
Are you pondering what I'm pondering? It's time to take over the world ! - let's use ASSEMBLY...

aw27

Quote from: Raistlin on February 08, 2018, 11:51:25 PM
But then again, I might be mistaken............... has happened before.
I believe you will handle the various problems once you finish the information gathering stage, start hands on and have them face to face.  :t

Quote
Sockets-Multi-Threaded with IOCP & Normal Debugging Techniques DON'T MIX
Yeap, another interesting theme worth a thread of its own (more generally, multithread debugging techniques) . 



Raistlin

#14
As mentioned the literature just isn't good enough
(my opinion) to actually produce a roadmap towards
solutions. Its driving me crazy. The debug thing adds
complexity which reduces effort-towards practical
implementation dramatically. Without feeling too sorry
for myself, its pseudo working at the moment.  I am
actually asking for help and advice towards the practical.
Truly, as soon as you think you're becoming an intermediate,
you actually understand you're a novice, whether you have been
programming for lotsa years or not... There's always something else...
Big UP to everyone for keeping this forum and interesting topics going.
I'am addicted. :icon_mrgreen:
Are you pondering what I'm pondering? It's time to take over the world ! - let's use ASSEMBLY...