Author Topic: Reading a file into memory  (Read 10659 times)

frktons

  • Member
  • ***
  • Posts: 491
Reading a file into memory
« on: January 18, 2013, 07:30:40 PM »
I've some doubts about memory buffers and files read into them.
This is going to be the first time I read a big file, process it and
write it back to disk with a different name. Questions arise...

1] I want to choose a file, allocate a buffer that is some 16 bytes
bigger than the file lenght, read the file all at once into the buffer
starting at a 16 bytes aligned address of the buffer. How do I proceed?

2] After I've read the file into the buffer, is it zero delimited or not?
If the allocated buffer is initialized to all zero it should be zero delimited,
I think.

3] What API gives me the exact number of bytes of the file?

4] Can I read the file with a single instruction/API or have I to loop
getting a fixed amount at a time, amount that is the maximum the
system can manage with a single get from the disk?

5] After reading the file, should it be closed, if I don't have any other
operation to perform on it? Or it will close at the end of the pgm per
default?

Thanks for your help

Frank

hfheatherfox07

  • Member
  • ***
  • Posts: 464
Re: Reading a file into memory
« Reply #1 on: January 18, 2013, 07:40:50 PM »
Answer to 3.
invoke GetFileSize, hFile, 0

Here is a Bare Bones example that I made to choose file and get file size

Code: [Select]
.586
.model flat,stdcall
option casemap:none
include \masm32\include\windows.inc
include \masm32\include\kernel32.inc
include \masm32\include\comdlg32.inc
include \masm32\include\user32.inc
include \masm32\macros\macros.asm
include \masm32\include\masm32.inc

includelib \masm32\lib\user32.lib
includelib \masm32\lib\kernel32.lib
includelib \masm32\lib\comdlg32.lib
includelib \masm32\lib\masm32.lib


; prototypes
WinMain   Proto :HINSTANCE, :DWORD, :LPSTR,:DWORD

.data

ofn   OPENFILENAME <>
FilterString   db "All Files",0,"*.*",0,0
               
Format   db "The file size is: %lu bytes",0; %lu is for numbers ,%x for asm number ; %d of wsprintf format will print the value in decimal (base ten), %x will print the hexadecimal value.
cpt     db "File Size:",0
.data?
Buffer   db 256 dup(?) ; buffer

.code

start:
Invoke GetModuleHandle, NULL
Mov Edx, Eax
Push Edx
Invoke GetCommandLine
Pop Edx
Invoke WinMain, Edx, NULL, Eax, SW_SHOW
invoke ExitProcess, 0

WinMain Proc hInstance: HINSTANCE, hPrevInstance:DWORD, lpCmdLine:  LPSTR,nCmdShow:DWORD
local LocalBuffer[512]:byte
.data
hFile dd ?
hMapping dd ?
pMapping dd ?
.code

mov ofn.lStructSize,SIZEOF ofn
mov  ofn.lpstrFilter, OFFSET FilterString
mov  ofn.lpstrFile, OFFSET Buffer
mov  ofn.nMaxFile,512
mov  ofn.Flags, OFN_FILEMUSTEXIST or \
                       OFN_PATHMUSTEXIST or OFN_LONGNAMES or\
                       OFN_EXPLORER or OFN_HIDEREADONLY               
invoke GetOpenFileName, ADDR ofn

.if eax==TRUE
invoke CreateFile, Addr Buffer, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL
.if Eax != INVALID_HANDLE_VALUE
Mov hFile, Eax
Invoke CreateFileMapping, Eax, NULL, PAGE_READONLY,0,0,0
.if Eax != NULL
Mov hMapping, Eax
Invoke MapViewOfFile, Eax, FILE_MAP_READ,0,0,0
Mov pMapping, Eax
lea Ecx, Buffer
Push Ecx
invoke GetFileSize, hFile, 0
Pop Ecx
invoke wsprintf, ADDR Buffer, ADDR Format, eax
Invoke MessageBox, NULL, Addr Buffer, Addr cpt, MB_OK
Invoke UnmapViewOfFile, pMapping
Invoke CloseHandle, hMapping
.endif
Invoke CloseHandle, hFile
.endif
.endif
ret
WinMain endp

end start

The Answer to 5.
Yes I believe you should always close the file handle , Good Practice
Invoke CloseHandle, hFile
Your code and your skills will be assimilated. Your programming language is irrelevant.
We are the ASM Borg and you will become part of us. Compile and be assembled.

frktons

  • Member
  • ***
  • Posts: 491
Re: Reading a file into memory
« Reply #2 on: January 18, 2013, 07:53:11 PM »
Answer to 3.
invoke GetFileSize, hFile, 0

Here is a Bare Bones example that I made to choose file and get file size

Code: [Select]
.586
.model flat,stdcall
option casemap:none
include \masm32\include\windows.inc
include \masm32\include\kernel32.inc
include \masm32\include\comdlg32.inc
include \masm32\include\user32.inc
include \masm32\macros\macros.asm
include \masm32\include\masm32.inc

includelib \masm32\lib\user32.lib
includelib \masm32\lib\kernel32.lib
includelib \masm32\lib\comdlg32.lib
includelib \masm32\lib\masm32.lib


; prototypes
WinMain   Proto :HINSTANCE, :DWORD, :LPSTR,:DWORD

.data

ofn   OPENFILENAME <>
FilterString   db "All Files",0,"*.*",0,0
               
Format   db "The file size is: %lu bytes",0; %lu is for numbers ,%x for asm number ; %d of wsprintf format will print the value in decimal (base ten), %x will print the hexadecimal value.
cpt     db "File Size:",0
.data?
Buffer   db 256 dup(?) ; buffer

.code

start:
Invoke GetModuleHandle, NULL
Mov Edx, Eax
Push Edx
Invoke GetCommandLine
Pop Edx
Invoke WinMain, Edx, NULL, Eax, SW_SHOW
invoke ExitProcess, 0

WinMain Proc hInstance: HINSTANCE, hPrevInstance:DWORD, lpCmdLine:  LPSTR,nCmdShow:DWORD
local LocalBuffer[512]:byte
.data
hFile dd ?
hMapping dd ?
pMapping dd ?
.code

mov ofn.lStructSize,SIZEOF ofn
mov  ofn.lpstrFilter, OFFSET FilterString
mov  ofn.lpstrFile, OFFSET Buffer
mov  ofn.nMaxFile,512
mov  ofn.Flags, OFN_FILEMUSTEXIST or \
                       OFN_PATHMUSTEXIST or OFN_LONGNAMES or\
                       OFN_EXPLORER or OFN_HIDEREADONLY               
invoke GetOpenFileName, ADDR ofn

.if eax==TRUE
invoke CreateFile, Addr Buffer, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL
.if Eax != INVALID_HANDLE_VALUE
Mov hFile, Eax
Invoke CreateFileMapping, Eax, NULL, PAGE_READONLY,0,0,0
.if Eax != NULL
Mov hMapping, Eax
Invoke MapViewOfFile, Eax, FILE_MAP_READ,0,0,0
Mov pMapping, Eax
lea Ecx, Buffer
Push Ecx
invoke GetFileSize, hFile, 0
Pop Ecx
invoke wsprintf, ADDR Buffer, ADDR Format, eax
Invoke MessageBox, NULL, Addr Buffer, Addr cpt, MB_OK
Invoke UnmapViewOfFile, pMapping
Invoke CloseHandle, hMapping
.endif
Invoke CloseHandle, hFile
.endif
.endif
ret
WinMain endp

end start

The Answer to 5.
Yes I believe you should always close the file handle , Good Practice
Invoke CloseHandle, hFile


Thanks, 2 points less to understand.  :t

dedndave

  • Member
  • *****
  • Posts: 8823
  • Still using Abacus 2.0
    • DednDave
Re: Reading a file into memory
« Reply #3 on: January 19, 2013, 12:15:36 AM »
a) get the size of the file - i would use GetFileAttributesEx for this
b) add 16 to the file size and attempt to allocate a buffer of that size
we need 15 extra bytes to 16-align the buffer and 1 extra byte to append a 0
c) do not zero-initialize the allocated memory - you need 1 zero at the end - put it there yourself
d) handle the case of the file being too large to fit into memory
exit with an error code, as appropriate
e) store the allocated address from EAX to free memory when done
add eax,15
and al,-16
store that value as the file buffer address
f) open the file, exit if error
read the file into the file buffer address
close the file and exit if read error, otherwise, close the file
g) use the number of bytes read, plus the file buffer address - place a 0 at that address
h) when done, free the allocated block

dedndave

  • Member
  • *****
  • Posts: 8823
  • Still using Abacus 2.0
    • DednDave
Re: Reading a file into memory
« Reply #4 on: January 19, 2013, 01:28:45 AM »
here is a quick example
i just show a message for errors - you could use a return code or whatever is appropriate

frktons

  • Member
  • ***
  • Posts: 491
Re: Reading a file into memory
« Reply #5 on: January 19, 2013, 01:52:55 AM »
From my recent experiment with allocating buffers, I recall
that the allocated buffer is at least 8 bytes aligned, and many
times it could be even 2*4, 2*10 or more, a page.

Managing too big files is good anyway, even if I'm going to
put a max size limit to the input file.

Just for learning purposes I could manage to read it in chunks
of a certain size, but it'll come later.

Thanks Dave, you gave me a lot of practical examples to study  :t

Vortex

  • Member
  • *****
  • Posts: 1987
Re: Reading a file into memory
« Reply #6 on: January 19, 2013, 06:12:17 AM »
Hi fktons,

You can check the In Memory Text Read and Write functions from masm32.lib  They are nice examples.

frktons

  • Member
  • ***
  • Posts: 491
Re: Reading a file into memory
« Reply #7 on: January 19, 2013, 10:16:14 AM »
Hi fktons,

You can check the In Memory Text Read and Write functions from masm32.lib  They are nice examples.


Thanks Erol. I'll check the examples as time permits.

frktons

  • Member
  • ***
  • Posts: 491
Re: Reading a file into memory
« Reply #8 on: January 19, 2013, 12:08:59 PM »
In \masm32\m32lib I found the function:
Code: [Select]

filesize proc lpszFileName:DWORD

    LOCAL wfd :WIN32_FIND_DATA

    invoke FindFirstFile,lpszFileName,ADDR wfd
    .if eax == INVALID_HANDLE_VALUE
      mov eax, -1
      jmp fsEnd
    .endif

    invoke FindClose, eax

    mov eax, wfd.nFileSizeLow

    fsEnd:

    ret

filesize endp

and for what I can understand it can return the file size up to 4 Gb.
What if the file is bigger? Is there another function in masm32 pack
that returns value bigger than 4 Gb?

qWord

  • Member
  • *****
  • Posts: 1473
  • The base type of a type is the type itself
    • SmplMath macros
Re: Reading a file into memory
« Reply #9 on: January 19, 2013, 12:20:04 PM »
and for what I can understand it can return the file size up to 4 Gb.
What if the file is bigger?
Code: [Select]
LOCAL u64Size:QWORD
...
mov DWORD ptr u64Size,rv(GetFileSize,hFile,&DWORD ptr u64Size[4])
MREAL macros - when you need floating point arithmetic while assembling!

frktons

  • Member
  • ***
  • Posts: 491
Re: Reading a file into memory
« Reply #10 on: January 19, 2013, 12:24:13 PM »
Code: [Select]
LOCAL u64Size:QWORD
...
mov DWORD ptr u64Size,rv(GetFileSize,hFile,&DWORD ptr u64Size[4])

Thanks qWord, this is your own private field, the QWORD size.  :biggrin:
By the way that "&" before DWORD looks like "C style", is it also Assembly style?
What does it mean?

qWord

  • Member
  • *****
  • Posts: 1473
  • The base type of a type is the type itself
    • SmplMath macros
Re: Reading a file into memory
« Reply #11 on: January 19, 2013, 12:28:24 PM »
By the way that "&" before DWORD looks like "C style", is it also Assembly style?
What does it mean?
yes, it is inspired by C syntax and is a shortcut for the ADDR operator (introduced with MASM32 v11, fn/rv macros)
MREAL macros - when you need floating point arithmetic while assembling!

frktons

  • Member
  • ***
  • Posts: 491
Re: Reading a file into memory
« Reply #12 on: January 19, 2013, 12:30:41 PM »
yes, it is inspired by C syntax and is a shortcut for the ADDR operator (introduced with MASM32 v11, fn/rv macros)
So it only works with the last version of masm32?

dedndave

  • Member
  • *****
  • Posts: 8823
  • Still using Abacus 2.0
    • DednDave
Re: Reading a file into memory
« Reply #13 on: January 19, 2013, 01:54:16 PM »
the example i posted has a function named SizFile that will get the full size

KeepingRealBusy

  • Member
  • ***
  • Posts: 426
Re: Reading a file into memory
« Reply #14 on: January 19, 2013, 01:57:52 PM »
To speed things up for huge files, allocate buffers with VirtualAlloc to get them on page boundaries (4096) which is also a sector boundary for either 4096 BYTE sector size files or 4096 BYTE sector size, then use unbuffered reads.

Dave.