Random number algorithm.

hutch-- · July 30, 2017, 05:28:07 PM

I did a quick knife and fork on the nrandom algo the Jaymeson Trudgen wrote years ago and generally it has been a good performer over a long period but its range is still unsigned DWORD.

I wonder if anyone has a decent 64 bit random number generator written in 64 bit assembler that I could add to the library.

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

nrandom PROC base:QWORD

; Park Miller random number generator
; -----------------------------------------
; original code written by Jaymeson Trudgen
; minor modifiation on the recommendation
; of Park and Miller 1993
; range is unsigned DWORD
; -----------------------------------------

mov rax, nrandom_seed

; ****************************************
test eax, 80000000h
jz @F
add eax, 7fffffffh
@@:
; ****************************************

xor edx, edx
mov ecx, 127773
div ecx
mov ecx, eax
mov eax, 48271 ; suggested mofification by Park and Miller : 16807 = old value
mul edx
mov edx, ecx
mov ecx, eax
mov eax, 2836
mul edx
sub ecx, eax
xor edx, edx
mov eax, ecx
mov nrandom_seed, rcx
div base

mov eax, edx
ret

nrandom ENDP

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

nseed proc TheSeed:QWORD

.data
nrandom_seed dq 12345678
.code

mov rax, TheSeed
mov nrandom_seed, rax

ret

nseed endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

jj2007 · July 30, 2017, 06:50:11 PM

I did a quick test with Rand64(), an adaption of Alex Bagayev's (aka Antariy) algo, and it seems to work in 64-bit land:

Code Select

1       8334395503510177650
2       1329069443230085730
3       2521605505399485735
4       -2513648910981773731
5       -6348034448820748366
6       -3307815091408750516
7       4066175164584430395
8       -3922227355712582401
9       428641309453789440
10      -8910786329316853516
11      -8070429261690835801
12      6730337785625512305
13      -7805041995582204841
14      -5974524378947664316
15      4637574160357525215
This code was assembled with ml64 in 64-bit format

It is ultrafast, and randomness is excellent, as tested with the Diehard and ENT suites.

TWell · July 30, 2017, 07:22:41 PM

https://en.wikipedia.org/wiki/Xorshift

PellesC output obj

Code Select

extern s: qword
xorshift128plus PROC
        mov     rax, qword ptr [s]                      ; 0000 _ 48: 8B. 05, 00000000(rel)
        mov     rdx, qword ptr [s+8H]                   ; 0007 _ 48: 8B. 15, 00000008(rel)
        mov     qword ptr [s], rdx                      ; 000E _ 48: 89. 15, 00000000(rel)
        mov     rcx, rax                                ; 0015 _ 48: 89. C1
        shl     rcx, 23                                 ; 0018 _ 48: C1. E1, 17
        xor     rax, rcx                                ; 001C _ 48: 31. C8
        mov     rcx, rax                                ; 001F _ 48: 89. C1
        xor     rcx, rdx                                ; 0022 _ 48: 31. D1
        shr     rax, 17                                 ; 0025 _ 48: C1. E8, 11
        xor     rcx, rax                                ; 0029 _ 48: 31. C1
        mov     rax, rdx                                ; 002C _ 48: 89. D0
        shr     rax, 26                                 ; 002F _ 48: C1. E8, 1A
        xor     rcx, rax                                ; 0033 _ 48: 31. C1
        mov     qword ptr [s+8H], rcx                   ; 0036 _ 48: 89. 0D, 00000008(rel)
        mov     rax, qword ptr [s+8H]                   ; 003D _ 48: 8B. 05, 00000008(rel)
        add     rax, rdx                                ; 0044 _ 48: 01. D0
        ret                                             ; 0047 _ C3
xorshift128plus ENDP

avcaballero · July 30, 2017, 07:30:26 PM

Good reference, TWell, thank you. Bit shift pseudorandom number generators must be ultra light fast. 8)

aw27 · July 31, 2017, 12:17:32 AM

From Wikipedia:
The Mersenne Twister is a pseudorandom number generator (PRNG). It is by far the most widely used general-purpose PRNG.[1] Its name derives from the fact that its period length is chosen to be a Mersenne prime.
The Mersenne Twister was developed in 1997 by Makoto Matsumoto (ja) (松本眞) and Takuji Nishimura (西村拓士).[2] It was designed specifically to rectify most of the flaws found in older PRNGs. It was the first PRNG to provide fast generation of high-quality pseudorandom integers.
The most commonly used version of the Mersenne Twister algorithm is based on the Mersenne prime 219937−1. The standard implementation of that, MT19937, uses a 32-bit word length. There is another implementation that uses a 64-bit word length, MT19937-64; it generates a different sequence.

Below is based on: http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c

Tested and obtained the same results as the original in C:

Code Select


; uasm64 -c -win64 -Zp8 MT64asm.asm
; link /ENTRY:main /SUBSYSTEM:console mt64asm.obj

option casemap:none
option frame:auto
OPTION WIN64:2

NN equ 312
MM equ 156
MATRIX_A equ 0B5026F5AA96619E9h
UM equ 0FFFFFFFF80000000h
LM equ 07FFFFFFFh
MULT equ 6364136223846793005

includelib \masm32\lib64\msvcrt.lib
printf proto :ptr, :vararg

.data

mt dq NN dup (0)
mti qword NN+1

mag01 dq 0, MATRIX_A

;KEYS dq 123456789ABCDEFh, 23456789ABCDEF1h, 3456789ABCDEF12h, 456789ABCDEF123h
KEYS dq 12345h, 23456h, 34567h, 45678h ; For testing, the same as in the C code
format0 db "1000 outputs of genrand64_int64()",13,10,0
format1 db 13,10,"1000 outputs of genrand64_real2()",13,10,0
format2 db "%20llu ",0
format3 db "%10.8f ",0
formatcr db 13,10,0

.const
c1 real8 1.1102230246251566636831481088739e-16
c2 real8 1.1102230246251565404236316680908e-16
c3 real8 0.5
c4 real8 2.2204460492503130808472633361816e-16

.code

init_genrand64 proc uses rsi seed : qword
	mov rsi, offset mt
	mov seed, rcx
	mov qword ptr [rsi], rcx
	mov r8, 1

	.while r8 < NN
		mov r9, r8
		dec r9
		mov r10, qword ptr [rsi+r9*sizeof qword]
		mov r11, r10 ; save it
		shr r10, 62
		xor r11, r10
		mov rax, MULT
		mul r11
		add rax, r8
		mov qword ptr [rsi+r8*sizeof qword ], rax
		inc r8
	.endw
	mov mti, r8	
	ret
init_genrand64 endp

init_by_array64 proc uses rsi init_key : ptr, key_len : qword
	mov init_key, rcx
	mov key_len, rdx
	INVOKE init_genrand64, 19650218
	mov rcx, init_key
	mov r8, 1
	mov r9, 0
	mov r10, 0
	
	.if key_len<NN
		mov r10, NN
	.else
		mov r10, key_len 
	.endif	
	
	mov rsi, offset mt
	.while r10>0
		mov rax, qword ptr [rsi+r8*sizeof qword - sizeof qword]
		mov r11, rax ; save
		shr r11, 62
		xor rax, r11
		mov r11, 3935559000370003845
		mul r11
		mov r11, qword ptr [rsi+r8*sizeof qword]
		xor r11, rax
		add r11, qword ptr[rcx + r9*sizeof qword]
		add r11, r9
		
		mov qword ptr [rsi+r8*sizeof qword], r11
		
		inc r8
		inc r9
		
		.if r8>=NN
			mov rax, qword ptr [rsi+(NN-1)*sizeof qword]
			mov qword ptr [rsi], rax
			mov r8, 1
		.endif
		
		.if r9>=key_len
			mov r9,0
		.endif
	
		dec r10
	.endw
	
	mov r10, NN-1
	
	.while r10>0
		mov rax, qword ptr [rsi+r8*sizeof qword - sizeof qword]
		mov r11, rax
		shr r11, 62
		xor rax, r11
		mov r11, 2862933555777941757
		mul r11
		
		mov r11, qword ptr [rsi+r8*sizeof qword]
		xor r11, rax
		sub r11, r8
		mov qword ptr [rsi+r8*sizeof qword], r11
		
		inc r8
		
		.if r8>=NN
			mov rax, qword ptr [rsi+(NN-1)*sizeof qword]
			mov qword ptr [rsi], rax
			mov r8, 1
		.endif
		
		dec r10
	.endw
		
	mov rax, 1 shl 63
	mov qword ptr [rsi], rax
	
	ret
init_by_array64 endp

genrand64_int64 proc uses rsi rdi
	mov rsi, offset mt
	.if mti>=NN
		.if mti==NN+1
			invoke init_genrand64, 5489
		.endif	
		mov r8, 0
		mov rdi, offset mag01
		.while r8<(NN-MM)
			mov rax, qword ptr [rsi+r8*sizeof qword]
			and rax, UM
			mov rcx, qword ptr [rsi+r8*sizeof qword+sizeof qword]
			and rcx, LM
			or rax, rcx ; x
			
			mov rcx, rax
			and rcx, 1
			mov  rdx, qword ptr [rdi+rcx*sizeof qword] ; mag01[(int)(x & 1ULL)];
			
			mov rcx, rax
			shr rcx, 1
			xor rcx, rdx ;  (x >> 1) ^ mag01[(int)(x & 1ULL)];
			
			mov rax, qword ptr [rsi+r8*sizeof qword+MM*sizeof qword ]
			xor rax, rcx
			mov qword ptr [rsi+r8*sizeof qword], rax
			
			inc r8
		.endw
		
		.while r8<(NN-1)
			mov rax, qword ptr [rsi+r8*sizeof qword]
			and rax, UM
			mov rcx, qword ptr [rsi+r8*sizeof qword+sizeof qword]
			and rcx, LM
			or rax, rcx ; x
			
			mov rcx, rax
			and rcx, 1
			mov  rdx, qword ptr [rdi+rcx*sizeof qword] ; mag01[(int)(x & 1ULL)];
			
			mov rcx, rax
			shr rcx, 1
			xor rcx, rdx ;  (x >> 1) ^ mag01[(int)(x & 1ULL)];

			mov rax, qword ptr [rsi + r8*sizeof qword+(MM - NN)*sizeof qword]
			xor rax, rcx
			mov qword ptr [rsi+r8*sizeof qword], rax

			
			inc r8
		.endw
		
		mov rax, qword ptr [rsi+(NN-1)*sizeof qword]
		and rax, UM
		mov rcx, qword ptr [rsi]
		and rcx, LM
		or rax, rcx ; x
			
		mov rcx, rax
		and rcx, 1
		mov  rdx, qword ptr [rdi+rcx*sizeof qword] ; mag01[(int)(x & 1ULL)];
	
		mov rcx, rax
		shr rcx, 1
		xor rcx, rdx ;  (x >> 1) ^ mag01[(int)(x & 1ULL)];
	
		mov rax, qword ptr [rsi+(MM-1)*sizeof qword]
		xor rax, rcx
		mov qword ptr [rsi+(NN-1)*sizeof qword], rax
		mov mti,0

	.endif

	mov r8, mti
	
	mov rax, qword ptr [rsi+r8*sizeof qword]
	inc r8	
	mov mti, r8
	
	mov rcx, rax ; save
	shr rax, 29
	mov rdx, 5555555555555555h
	and rax, rdx
	xor rcx, rax
	
	mov rax, rcx
	shl rax, 17
	mov rdx, 71D67FFFEDA60000h
	and rax, rdx
	xor rcx, rax
	
	mov rax, rcx
	shl rax, 37
	mov rdx, 0FFF7EEE000000000h
	and rax, rdx
	xor rcx, rax
	
	mov rax, rcx
	shr rcx, 43
	xor rax, rcx
	
	ret
genrand64_int64 endp

genrand64_int63 proc 
	INVOKE genrand64_int64
	shr rax, 1
	ret
genrand64_int63 endp

genrand64_real1 proc
	INVOKE genrand64_int64
	shr rax, 11
	cvtsi2sd  xmm0,rax 
	mulsd xmm0, c1
	
	ret
genrand64_real1 endp

genrand64_real2 proc
	INVOKE genrand64_int64
	shr rax, 11
	cvtsi2sd  xmm0,rax 
	mulsd xmm0, c2
	
	ret
genrand64_real2 endp

genrand64_real3 proc
	INVOKE genrand64_int64
	shr rax, 12
	cvtsi2sd  xmm0,rax 
	addsd xmm0, c3
	mulsd xmm0, c4
	
	ret
genrand64_real3 endp


main proc
	invoke init_by_array64, offset KEYS, LENGTHOF KEYS
	mov rbx, 0
	mov r12,0
	invoke printf, offset format0
	.while rbx<1000
		invoke genrand64_int64
		invoke printf, offset format2, rax
		inc r12
		.if r12==5
			mov r12,0
			invoke printf, offset formatcr
		.endif
		inc rbx
	.endw
	
	mov rbx, 0
	mov r12,0	
	invoke printf, offset format1
	.while rbx<1000
		invoke genrand64_real2
		movq rdx, xmm0
		invoke printf, offset format3, rdx
		inc r12
		.if r12==5
			mov r12,0
			invoke printf, offset formatcr
		.endif
		inc rbx
	.endw

	ret
main endp

end

hutch-- · July 31, 2017, 12:59:02 AM

I have so far tested the version that Tim posted, runs OK, seems fast enough but the results under ENT are very ordinary. I have played with twiddling the output and improved the result but its not what you would call good. Test piece attached, run the executable "xorshift.exe", the batch file is to run ENT which is called from the test piece.

jj2007 · July 31, 2017, 04:45:11 AM

It's difficult to find a high quality PRNG. Here is the (mediocre) ENT output for xorshift:

Code Select

Entropy = 7.762594 bits per byte.

Optimum compression would reduce the size
of this 1000000 byte file by 2 percent.

Chi square distribution for 1000000 samples is 536885.84, and randomly
would exceed this value 0.01 percent of the times.

Arithmetic mean value of data bytes is 128.0349 (127.5 = random).
Monte Carlo value for Pi is 3.164364657 (error 0.72 percent).
Serial correlation coefficient is -0.010624 (totally uncorrelated = 0.0).

For comparison:

Code Select

617 ms  incl. writing the file, M$ RtlRandomEx()
588 ms  without writing

2395 ms incl. writing the file, MasmBasic Rand()
7162 µs without writing


############ ENT results RtlRandomEx32, with swap:
Entropy = 7.999985 bits per byte.

Optimum compression would reduce the size
of this 11468800 byte file by 0 percent.

Chi square distribution for 11468800 samples is 244.22, and randomly
would exceed this value 50.00 percent of the times.

Arithmetic mean value of data bytes is 127.4928 (127.5 = random).
Monte Carlo value for Pi is 3.142348334 (error 0.02 percent).
Serial correlation coefficient is -0.000229 (totally uncorrelated = 0.0).


############ ENT results Rand64:

Entropy = 7.999984 bits per byte.

Optimum compression would reduce the size
of this 11468800 byte file by 0 percent.

Chi square distribution for 11468800 samples is 252.68, and randomly
would exceed this value 50.00 percent of the times.

Arithmetic mean value of data bytes is 127.5000 (127.5 = random).
Monte Carlo value for Pi is 3.142249980 (error 0.02 percent).
Serial correlation coefficient is 0.001482 (totally uncorrelated = 0.0).

There is great collection here at CAcert Research Lab.

hutch-- · July 31, 2017, 12:20:46 PM

JJ,
I downloaded the "DualRandMb.zip" but there is no source for the DualRand64 executable, I have looked at the DualRand.inc file but it contains a very messy looking macro with no context.

jj2007 · July 31, 2017, 01:15:50 PM

This is the source (DualRand.asm):

Code Select

include \Masm32\MasmBasic\Res\JBasic.inc	; ## console demo, builds in 32- or 64-bit mode with ML, AsmC, JWasm, HJWasm ##
include DualRand.inc			; excerpt from MasmBasic.inc
Init				; OPT_64 1	; put 0 for 32 bit, 1 for 64 bit assembly
  xor ebx, ebx
  @@:
	inc ebx
	Print Str$("%i\t", rbx)	; counter
	void Rand64()	; returns eax and edx
	shl rax, 32
	or rax, rdx
	Print Str$("%lli\n", rax)	; random number
  cmp ebx, 20
  jb @B
  Inkey Chr$("This code was assembled with ", @AsmUsed$(1), " in ", jbit$, "-bit format")
EndOfCode

The file DualRand.inc contains the Rand() macro, which is meant for the 32-version of MasmBasic; therefore it is not included in JBasic.inc, so I had to extract it to make it available in the dual 64/32-bit version.

Since I have a suspicion that you will not install this exclusive software package, I attach a version with an int 3 before the void Rand64(), and a nops 8 after that. You know how to use it, and you will be surprised ;-)

aw27 · July 31, 2017, 07:47:02 PM

While the Xorshift algo is clearly an inferior specimen, it is indeed fast.

Being fast does not mean that it can't be even faster. This raises again the point of artificial intelligence versus human intelligence.

Pelles compiler produced the logical output, most ASM programmers would do something very similar, myself included.

The Visual Studio 2017 used AI techniques saving 2 instructions and clearly gaining in speed. Although with only 15 instructions (versus 17 of Pelles) it took me a while to understand why it still works.

hutch-- · July 31, 2017, 08:04:16 PM

There are dozens of int3 lines in the disassembly and NO, I will not be messing up a development system with MB. Looks like you get some interesting things done but with no access to any assembler source and no way to independently test it, any comparisons are meaningless. In an assembler forum I post assembler code, can routinely translate between almost any assembler but I draw the line at a closed encrypted system written in RTF and only supplied as a binary where the content is unfindable as it is simply a waste of time.

This is a test piece in 32 MASM using NaN's old "nrandom" algo post processed. With small counts which are useful if you want a number range from 0 to 10, NaN's original algo runs OK but the level of randomness is poor but is useful for small counts of predetermined range. Give it a post process massage on a large sample > 1 million and it produces results like this.

Entropy = 7.999947 bits per byte.

Optimum compression would reduce the size
of this 4000000 byte file by 0 percent.

Chi square distribution for 4000000 samples is 291.42, and randomly
would exceed this value 10.00 percent of the times.

Arithmetic mean value of data bytes is 127.4818 (127.5 = random).
Monte Carlo value for Pi is 3.137529138 (error 0.13 percent).
Serial correlation coefficient is 0.000348 (totally uncorrelated = 0.0).
Press any key to continue . . .

Now with the version that Tim posted, it does not have a range specifier so its use will be limited to making very large pads but it is fast enough to do some post processing on it, when I get a bit further in front I would like to try out the mersenne twister that AW posted as it is a well known random algo.

Here is the 32 bit test piece.

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
include \masm32\include\masm32rt.inc
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

.data?
nrandom_seed dd ? ; global random seed variable

.code

start:

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

call main
exit

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

icnt equ <1000000> ; iteration count
rang equ <10000000> ; range = 0 to rang

main proc

LOCAL cntr :DWORD
LOCAL buff[64]:BYTE
LOCAL pbuf :DWORD
LOCAL pMem :DWORD
LOCAL rval :DWORD
LOCAL ivar :DWORD

LOCAL buf2[64]:BYTE

push ebx
push esi
push edi

mov pbuf, ptr$(buff)

fn GetTickCount
rol eax, 29
mov nrandom_seed, eax

mov cntr, icnt ; iteration count
mov eax, cntr
lea eax, [eax*4]
mov pMem, alloc(eax) ; allocate iteration count * 4
mov esi, pMem

@@:
invoke nrandom,rang
mov [esi], eax
add esi, 4
sub cntr, 1
cmp cntr, 0
ja @B

; --------------------------------------

mov ivar, 0

REPEAT 7

add ivar, 4

invoke SleepEx,100,0

fn GetTickCount
mov ecx, ivar
ror eax, cl
mov nrandom_seed, eax ; multiple re-seeding

mov cntr, icnt ; iteration count
mov esi, pMem
@@:
invoke nrandom,rang
bswap eax
rol eax,3
xor [esi], eax
add esi, 4
sub cntr, 1
jnz @B
; cmp cntr, 0
; ja @B

ENDM

; --------------------------------------

mov rval, OutputFileA("random.dat",pMem,icnt*4)

fn WinExec,"test.bat",1

free pMem

pop edi
pop esi
pop ebx

ret

main endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

end start

jj2007 · July 31, 2017, 08:40:32 PM

Quote from: hutch-- on July 31, 2017, 08:04:16 PMThere are dozens of int3 lines in the disassembly and NO, I will not be messing up a development system with MB.

Sigh...

Code Select

0000000140001002 | C8 50 00 00                       | enter 50, 0                           |
0000000140001006 | E8 84 01 00 00                    | call 14000118F                        |
000000014000100B | 33 DB                             | xor ebx, ebx                          |
000000014000100D | FF C3                             | inc ebx                               |
000000014000100F | 48 8D 15 FA 02 00 00              | lea rdx, qword ptr ds:[140001310]     | 140001310:"%i\t"
0000000140001016 | 4C 8B C3                          | mov r8, rbx                           |
0000000140001019 | 48 8D 0D 98 04 00 00              | lea rcx, qword ptr ds:[1400014B8]     |
0000000140001020 | FF 15 D2 05 00 00                 | call qword ptr ds:[1400015F8]         |
0000000140001026 | 48 89 05 83 04 00 00              | mov qword ptr ds:[1400014B0], rax     |
000000014000102D | 48 89 75 F8                       | mov qword ptr ss:[rbp-8], rsi         |
0000000140001031 | 48 89 5D F0                       | mov qword ptr ss:[rbp-10], rbx        |
0000000140001035 | 48 BE B8 14 00 40 01 00 00 00     | movabs rsi, 1400014B8                 |
000000014000103F | 48 C7 C1 F5 FF FF FF              | mov rcx, FFFFFFFFFFFFFFF5             |
0000000140001046 | FF 15 B4 05 00 00                 | call qword ptr ds:[140001600]         | scroll down, there is much more...
000000014000104C | 48 93                             | xchg rax, rbx                         |
000000014000104E | 48 8B CE                          | mov rcx, rsi                          |
0000000140001051 | FF 15 B1 05 00 00                 | call qword ptr ds:[140001608]         |
0000000140001057 | 45 33 D2                          | xor r10d, r10d                        |
000000014000105A | 4C 89 54 24 20                    | mov qword ptr ss:[rsp+20], r10        |
000000014000105F | 4C 8D 0D 32 04 00 00              | lea r9, qword ptr ds:[140001498]      |
0000000140001066 | 4C 8B C0                          | mov r8, rax                           |
0000000140001069 | 48 8B D6                          | mov rdx, rsi                          |
000000014000106C | 48 8B CB                          | mov rcx, rbx                          |
000000014000106F | FF 15 9B 05 00 00                 | call qword ptr ds:[140001610]         |
0000000140001075 | 48 8D 55 F8                       | lea rdx, qword ptr ss:[rbp-8]         | rdx:EntryPoint
0000000140001079 | 48 3B D4                          | cmp rdx, rsp                          | rdx:EntryPoint
000000014000107C | 75 00                             | jne 14000107E                         | jmp $0
000000014000107E | 48 8B 75 F8                       | mov rsi, qword ptr ss:[rbp-8]         |
0000000140001082 | 48 8B 5D F0                       | mov rbx, qword ptr ss:[rbp-10]        |
0000000140001086 | CC                                | int3                                  |<<<<<<<<< THERE IT IS!! <<<<<<<<<<<

Yes, there are dozens of int 3, right after the ret. That's what assemblers usually do for padding, but OK, if you don't trust me after a bit more than ten years on this forum, then you better use the xorshift algo.

hutch-- · July 31, 2017, 08:56:49 PM

> That's what assemblers usually do for padding, but OK, if you don't trust me after a bit more than ten years on this forum, then you better use the xorshift algo.

I will, I will !!!! :P

"xorshift2".

For a 1 instruction addition, it went from a dud to a kick ass random generator.

Entropy = 7.999979 bits per byte.

Optimum compression would reduce the size
of this 8000000 byte file by 0 percent.

Chi square distribution for 8000000 samples is 237.28, and randomly
would exceed this value 75.00 percent of the times.

Arithmetic mean value of data bytes is 127.5431 (127.5 = random).
Monte Carlo value for Pi is 3.139971785 (error 0.05 percent).
Serial correlation coefficient is -0.000246 (totally uncorrelated = 0.0).
Press any key to continue . . .

This is the only mod necessary.

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

NOSTACKFRAME

irand PROC

mov rax, random_seed
mov rdx, random_seed+8h
mov random_seed, rdx
mov rcx, rax
shl rcx, 23
xor rax, rcx
mov rcx, rax
xor rcx, rdx
shr rax, 17
xor rcx, rax
mov rax, rdx
shr rax, 26
xor rcx, rax
mov random_seed+8h, rcx
mov rax, random_seed+8h
add rax, rdx
bswap rax ; <<<< minor mod here. Set the most variable end first

ret

irand ENDP

STACKFRAME

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

It can be improved but not by much. Source and test piece attached. Run the executable, the batch file is to run ENT.

TWell · July 31, 2017, 09:25:22 PM

So with PellesC:

Code Select

#include <stdint.h>
#include <intrin.h>
/* The state must be seeded so that it is not zero */
uint64_t s[2];

uint64_t xorshift128plus(void) {
	uint64_t x = s[0];
	uint64_t const y = s[1];
	s[0] = y;
	x ^= x << 23; // a
	s[1] = x ^ y ^ (x >> 17) ^ (y >> 26); // b, c
	return _bswap64(s[1] + y);
}

hutch-- · July 31, 2017, 09:31:01 PM

JJ,

This is unintelligible.

enter 50, 0
call 14000118F ; calling address beyond end of sample
xor ebx, ebx
inc ebx
lea rdx, QWORD PTR [140001310] ; 140001310:"%i\t"
mov r8, rbx
lea rcx, QWORD PTR [1400014B8]
call QWORD PTR [1400015F8] ; calling address beyond end of sample
mov QWORD PTR [1400014B0], rax
mov QWORD PTR [rbp-8], rsi
mov QWORD PTR [rbp-10], rbx
movabs rsi, 1400014B8
mov rcx, FFFFFFFFFFFFFFF5
call QWORD PTR [140001600] ; scroll down, there is much more...
xchg rax, rbx
mov rcx, rsi
call QWORD PTR [140001608]
xor r10d, r10d
mov QWORD PTR [rsp+20], r10
lea r9, QWORD PTR [140001498]
mov r8, rax
mov rdx, rsi
mov rcx, rbx
call QWORD PTR [140001610] ; calling address beyond end of sample
lea rdx, QWORD PTR [rbp-8] ; rdx:EntryPoint
cmp rdx, rsp ; rdx:EntryPoint
jne lbl0 ; jmp $0
lbl0:
mov rsi, QWORD PTR [rbp-8]
mov rbx, QWORD PTR [rbp-10]

; Urrrrgh, how does it exit. Normally with the "enter" leading mnemonic it has a corresponding leave | ret

; 0000000140001002 | C8 50 00 00 | enter 50, 0 |
; 0000000140001006 | E8 84 01 00 00 | call 14000118F |
; 000000014000100B | 33 DB | xor ebx, ebx |
; 000000014000100D | FF C3 | inc ebx |
; 000000014000100F | 48 8D 15 FA 02 00 00 | lea rdx, qword ptr ds:[140001310] | 140001310:"%i\t"
; 0000000140001016 | 4C 8B C3 | mov r8, rbx |
; 0000000140001019 | 48 8D 0D 98 04 00 00 | lea rcx, qword ptr ds:[1400014B8] |
; 0000000140001020 | FF 15 D2 05 00 00 | call qword ptr ds:[1400015F8] |
; 0000000140001026 | 48 89 05 83 04 00 00 | mov qword ptr ds:[1400014B0], rax |
; 000000014000102D | 48 89 75 F8 | mov qword ptr ss:[rbp-8], rsi |
; 0000000140001031 | 48 89 5D F0 | mov qword ptr ss:[rbp-10], rbx |
; 0000000140001035 | 48 BE B8 14 00 40 01 00 00 00 | movabs rsi, 1400014B8 |
; 000000014000103F | 48 C7 C1 F5 FF FF FF | mov rcx, FFFFFFFFFFFFFFF5 |
; 0000000140001046 | FF 15 B4 05 00 00 | call qword ptr ds:[140001600] | scroll down, there is much more...
; 000000014000104C | 48 93 | xchg rax, rbx |
; 000000014000104E | 48 8B CE | mov rcx, rsi |
; 0000000140001051 | FF 15 B1 05 00 00 | call qword ptr ds:[140001608] |
; 0000000140001057 | 45 33 D2 | xor r10d, r10d |
; 000000014000105A | 4C 89 54 24 20 | mov qword ptr ss:[rsp+20], r10 |
; 000000014000105F | 4C 8D 0D 32 04 00 00 | lea r9, qword ptr ds:[140001498] |
; 0000000140001066 | 4C 8B C0 | mov r8, rax |
; 0000000140001069 | 48 8B D6 | mov rdx, rsi |
; 000000014000106C | 48 8B CB | mov rcx, rbx |
; 000000014000106F | FF 15 9B 05 00 00 | call qword ptr ds:[140001610] |
; 0000000140001075 | 48 8D 55 F8 | lea rdx, qword ptr ss:[rbp-8] | rdx:EntryPoint
; 0000000140001079 | 48 3B D4 | cmp rdx, rsp | rdx:EntryPoint
; 000000014000107C | 75 00 | jne 14000107E | jmp $0
; 000000014000107E | 48 8B 75 F8 | mov rsi, qword ptr ss:[rbp-8] |
; 0000000140001082 | 48 8B 5D F0 | mov rbx, qword ptr ss:[rbp-10] |
; 0000000140001086 | CC | int3 |<<<<<<<<< THERE IT IS!! <<<<<<<<<<<

The MASM Forum

News:

Random number algorithm.

hutch--

jj2007

TWell

avcaballero

aw27

hutch--

jj2007

hutch--

jj2007

aw27

hutch--

jj2007

hutch--

TWell

hutch--