I suspect that your speed problems are due to your OS/execution-environment.
The “blinking” is commonly called “flickering” or “tearing”. To eliminate it, the screen update, whether done by drawing directly to the display buffer or by copying the contents of a back buffer to the display buffer, must be synchronized with the screen refresh and must be completed within 1-2 refresh periods, depending on various details. At 60Hz, for example, the refresh period would be ~16.6ms, a fairly long time for even a slow system.
For a DOS app, and assuming VGA color, bit 3 of the Input Status #1 Register at I/O port 3DAh indicates when the display is in vertical retrace. For the common case where the screen update is faster than the screen refresh, to synchronize with the screen refresh you should synchronize with the start of vertical retrace. To do this reliably you read the port value in a double loop, waiting for bit 3 to be clear, then waiting for it to be set.
A demo:
;-----------------------------------------------------------------
; Spacebar to toggle sync, Escape to exit, any other key to loop.
;-----------------------------------------------------------------
.model small
.386
.stack
.data
f_sync dw 0
.fardata black
buff0 db 64000 dup(0)
.fardata white
buff1 db 64000 dup(7)
.code
sync proc
mov dx, 3dah
@@:
in al, dx
test al, 1000b
jnz @B
@@:
in al, dx
test al, 1000b
jz @B
ret
sync endp
.startup
mov eax, 13h
int 10h
push 0a000h
pop es
L0:
test f_sync, 1
jz @F
call sync
@@:
mov ecx, 64000/4
xor esi, esi
xor edi, edi
push ds
push seg buff0
pop ds
rep movsd
pop ds
xor ah, ah
int 16h
cmp ah, 1
je L1
cmp ah, 39h
jne @F
xor f_sync, 1
@@:
test f_sync, 1
jz @F
call sync
@@:
mov ecx, 64000/4
xor esi, esi
xor edi, edi
push ds
push seg buff1
pop ds
rep movsd
pop ds
xor ah, ah
int 16h
cmp ah, 1
je L1
cmp ah, 39h
jne @F
xor f_sync, 1
@@:
jmp L0
L1:
.exit
end