News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests

Main Menu

Reverse all bits of a DWORD

Started by aw27, November 01, 2019, 01:53:40 AM

Previous topic - Next topic

aw27

For example: ‭‭0010 0110 0111 1000 1001 1001 1001 0101‬‬ should be reversed to: 1010 1001 1001 1001 0001 1110 0110 0100

This is a challenge, the solution with the minimum number of 32-bit ASM instructions wins. I have a solution with 8 instructions. Anyone can do in less?

hutch--

I have not got time to do it but what about BSWAP then use a 0 to 255 lookup table ? Should be 5 instructions.

aw27

Quote from: hutch-- on November 01, 2019, 02:40:17 AM
I have not got time to do it but what about BSWAP then use a 0 to 255 lookup table ? Should be 5 instructions.

I would rather prefer no lookup tables. However, using BSWAP then reverse the bits in each byte is an idea but should take more than 8 instructions all together.

nidud

#3
deleted

aw27

Quote from: nidud on November 01, 2019, 03:25:32 AM

    mov ecx,32
B:  shr edx,1
    rcl eax,1
    loop B


Very good  :thumbsup: , eax don't need to be zeroed but you need 1 more instruction to load edx. This makes 5. Probably it is not possible to do better. Congratulations.

This was mine:


       xor eax, eax
mov ecx, 32
revert: ; revert bits
mov ebx, esi
add eax, eax
and ebx, 1
shr esi, 1
or eax, ebx
loop revert

daydreamer

Quote from: AW on November 01, 2019, 01:53:40 AM
For example: ‭‭0010 0110 0111 1000 1001 1001 1001 0101‬‬ should be reversed to: 1010 1001 1001 1001 0001 1110 0110 0100

This is a challenge, the solution with the minimum number of 32-bit ASM instructions wins. I have a solution with 8 instructions. Anyone can do in less?
maybe reciprocal calculation smallest
my none asm creations
https://masm32.com/board/index.php?topic=6937.msg74303#msg74303
I am an Invoker
"An Invoker is a mage who specializes in the manipulation of raw and elemental energies."
Like SIMD coding

hutch--

Loop code has more than 8 executed instructions.

TimoVJL

#7
Why not in C too//#include <intrin.h>
// 0011 1101 0010 0110
// 0110 0100 1011 1100
// 3D26h -> 64BC
// 0010 0110 0111 1000 1001 1001 1001 0101
// 1010 1001 1001 1001 0001 1110 0110 0100
// 26789995h -> A9991E64h
unsigned char _bittest(const long *valueptr, long index);
unsigned char _bittestandset(long *valueptr, long index);
#if defined(__clang__)
unsigned char _interlockedbittestandreset(volatile long *valueptr, long index);
#else
unsigned char _interlockedbittestandreset(long *valueptr, long index);
#endif
int printf(const char * restrict format, ...);
#pragma comment(lib, "msvcrt.lib")
void __cdecl mainCRTStartup(void)
{
//long ul = 0x3D26;
long ul = 0x26789995;
long ul2 = 0;
//int i1 = _bit_scan_forward(ul); // first bit to set
//int i2 = _bit_scan_reverse(ul); // last bit to set
//for (int i=i1,j=31;i<=i2;i++,j--) {
for (int i=0,j=31;i<=31;i++,j--) {
if (_bittest((const long int *)&ul, i)) {
_bittestandset(&ul2, j);
printf("1");
} else {
_interlockedbittestandreset(&ul2, j);
printf("0");
}
}
printf("\n%lXh %lXh\n", ul, ul2);
}
without header intrin.h, C compilers accept local defines for testing.
EDIT:
unsigned char _bittest(const long *valueptr, long index);
unsigned char _bittestandset(long *valueptr, long index);
void __cdecl mainCRTStartup(void)
{
long ul = 0x26789995;
long ul2 = 0;
for (int i=0,j=31; j; i++,j--) {
if (_bittest((const long int *)&ul, i)) _bittestandset(&ul2, j);
}
printf("\n%lXh %lXh\n", ul, ul2);
}
.386
includelib msvcrt.lib
public mainCRTStartup
.data
fmt db "%lXh %lXh",10,0
.code
mainCRTStartup:
  push ebp
  mov ebp, esp
  sub esp, 8h
  mov dword ptr [ebp-4h], 26789995h
  mov dword ptr [ebp-8h], 0h
  xor eax, eax
  mov edx, 1Fh
L1:
  bt dword ptr [ebp-4h], eax
  setb cl
  test cl, cl
  jz @F
  bts dword ptr [ebp-8h], edx
@@:
  setb cl
  add eax, 1h
  sub edx, 1h
  jnz L1
  push dword ptr [ebp-8h]
  push dword ptr [ebp-4h]
  push offset fmt
  call printf
  add esp, 0Ch
  mov esp, ebp
  pop ebp
  ret
end

void print_bits(long val)
{
char *p, s[40];
p = s;
for (int i=31; i>=0; i--) {
if (_bittest((const long int *)&val, i)) *p = '1'; else *p = '0';
//if (val & (1<<i)) *p = '1'; else *p = '0';
p++;
if (!(i%4)) {
*p = ' ';
p++;
}
}
*(--p) = 0;
puts(s);
}
May the source be with you

jj2007

or edx, -1
m2m ecx, 32
@@: dec ecx
jle @F
ror eax, 1
jc @B
btr edx, ecx
jmp @B
@@:

aw27

Some AMDs have the XOP instruction set which contains an instruction called VPPERM that allows to produce bit reversal. I presume the XOP is deprecated and recent AMDs don't have it.

HSE

 :biggrin:

edx = 00100110011110001001100110010101y, challenge [awch1.asm, 77]
eax = 10101001100110010001111001100100y, Nidud [awch1.asm, 85]
eax = 10101001100110010001111001100100y, AW [awch1.asm, 108]
edx = 10101001100110010001111001100101y, JJ [awch1.asm, 124]   Oops!
Equations in Assembly: SmplMath

nidud

#11
deleted

HSE

 :thumbsup:

edx = 00100110011110001001100110010101y, challenge [awch1.asm, 45]
eax = 10101001100110010001111001100100y, Nidud (4 active lines, value in edx) [awch1.asm, 52]
eax = 10101001100110010001111001100100y, AW (8 active lines, value in esi) [awch1.asm, 66]
edx = 10101001100110010001111001100100y, JJ-Nidud (11 active lines, value in eax) [awch1.asm, 83]
Equations in Assembly: SmplMath

jj2007

@HSE: Why 11 "active lines"? It's 8 instructions. But if you insist, it can be done in 8 lines, too:
or edx, -1
m2m ecx, 32
@@: dec ecx
jle $+11
ror eax, 1
jc @B
btr edx, ecx
jmp @B

TimoVJL

Cleaned from C code:
.386
includelib msvcrt.lib
public mainCRTStartup
.data
fmt db "%lXh %lXh",10,0
.code
mainCRTStartup:
  push ebp
  mov ebp, esp
  sub esp, 8h
  mov dword ptr [ebp-4h], 26789995h
  mov dword ptr [ebp-8h], 0h
  xor eax, eax
  mov edx, 1Fh
L1:
  bt dword ptr [ebp-4h], eax
  ;setb cl ; useless
  ;test cl, cl ; useless
  jnb @F ; jz @F
  bts dword ptr [ebp-8h], edx
@@:
  ;setb cl ; useless
  add eax, 1h
  sub edx, 1h
  jnz L1
  push dword ptr [ebp-8h]
  push dword ptr [ebp-4h]
  push offset fmt
  call printf
  add esp, 0Ch
  mov esp, ebp
  pop ebp
  ret
end
May the source be with you