News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests

Main Menu

bzip3

Started by Vortex, January 30, 2023, 04:23:49 AM

Previous topic - Next topic

TimoVJL

from main.c
if (mode == MODE_ENCODE) {
            s32 read_count;
            while (!feof(input_des)) {
                read_count = xread(buffer, 1, block_size, input_des);
                bytes_read += read_count;

                if(read_count == 0)
                    break;

                s32 new_size = bz3_encode_block(state, buffer, read_count);
                if (new_size == -1) {
                    fprintf(stderr, "Failed to encode a block: %s\n", bz3_strerror(state));
                    return 1;
                }

                write_neutral_s32(byteswap_buf, new_size);
                xwrite(byteswap_buf, 4, 1, output_des);
                write_neutral_s32(byteswap_buf, read_count);
                xwrite(byteswap_buf, 4, 1, output_des);
                xwrite(buffer, new_size, 1, output_des);
                bytes_written += 8 + new_size;
            }
            fflush(output_des);
        } else if (mode == MODE_DECODE) {
            s32 new_size, old_size;
            while (!feof(input_des)) {
                if (!xread_eofcheck(&byteswap_buf, 1, 4, input_des)) continue;

                new_size = read_neutral_s32(byteswap_buf);
                xread_noeof(&byteswap_buf, 1, 4, input_des);
                old_size = read_neutral_s32(byteswap_buf);
                xread_noeof(buffer, 1, new_size, input_des);
                bytes_read += 8 + new_size;
                if (bz3_decode_block(state, buffer, new_size, old_size) == -1) {
                    fprintf(stderr, "Failed to decode a block: %s\n", bz3_strerror(state));
                    return 1;
                }
                xwrite(buffer, old_size, 1, output_des);
                bytes_written += old_size;
            }
            fflush(output_des);
        }
decompress-file.c
/* Decompress a file SEQUENTIALLY (i.e. *not* in parallel) using bzip3 high level API. */
/* This is just a demonstration of bzip3 library usage, it does not contain all the necessary error checks and will not
* support cross-endian encoding/decoding. */

#include <libbz3.h>
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char ** argv) {
    if (argc != 3) {
        printf("Usage: %s <input file> <output file>");
        return 1;
    }

    // Read the entire input file to memory:
    FILE * fp = fopen(argv[1], "rb");
    fseek(fp, 0, SEEK_END);
    size_t size = ftell(fp);
    fseek(fp, 0, SEEK_SET);
    uint8_t * buffer = malloc(size);
    fread(buffer, 1, size, fp);
    fclose(fp);

    // Decompress the file:
    size_t orig_size = *(size_t *)buffer;
    uint8_t * outbuf = malloc(orig_size);
    int bzerr = bz3_decompress(buffer + sizeof(size_t), outbuf, size - sizeof(size_t), &orig_size);
    if (bzerr != BZ3_OK) {
        printf("bz3_decompress() failed with error code %d", bzerr);
        return 1;
    }

    FILE * outfp = fopen(argv[2], "wb");
    fwrite(outbuf, 1, orig_size, outfp);
    fclose(outfp);

    printf("OK, %d => %d", size, orig_size);
    return 0;
}
hl-api.c
#include <libbz3.h>
#include <stdio.h>
#include <stdlib.h>

#define MB (1024 * 1024)

int main(void) {
    printf("Compressing shakespeare.txt back and forth in memory.\n");

    // Read the entire "shakespeare.txt" file to memory:
    FILE * fp = fopen("shakespeare.txt", "rb");
    fseek(fp, 0, SEEK_END);
    size_t size = ftell(fp);
    fseek(fp, 0, SEEK_SET);
    char * buffer = malloc(size);
    fread(buffer, 1, size, fp);
    fclose(fp);

    // Compress the file:
    size_t out_size = bz3_bound(size);
    char * outbuf = malloc(out_size);
    int bzerr = bz3_compress(1 * MB, buffer, outbuf, size, &out_size);
    if (bzerr != BZ3_OK) {
        printf("bz3_compress() failed with error code %d", bzerr);
        return 1;
    }

    printf("%d => %d\n", size, out_size);

    // Decompress the file.
    bzerr = bz3_decompress(outbuf, buffer, out_size, &size);
    if (bzerr != BZ3_OK) {
        printf("bz3_decompress() failed with error code %d", bzerr);
        return 1;
    }

    printf("%d => %d\n", out_size, size);

    free(buffer);
    free(outbuf);
    return 0;
}
May the source be with you

mineiro

#16
tests done in linux x86-64

compress and decompress in memory: test.asm

.X64
;this program test compression and decompression in memory

include ./../inc/c.inc
include ./../inc/glib.inc
include ./../inc/macros.inc

comprima proto :gpointer,:guint64
decompress proto :gpointer,:guint64

include libbz3.INC

.data?
align 16
p_output_name dq ?

.code
main proc uses rbx r12 r13 r14 r15 _argc:dword,_argv:ptr
local argc:guint32
local argv:gpointer
local error:ptr _GError
local p_input:qword
local input_sz:qword

;-------------------------------------------------------------
;FUNCTION PARAMETERS
;-------------------------------------------------------------
mov argc,_argc
mov argv,_argv

;-------------------------------------------------------------
;OPEN FILE
;-------------------------------------------------------------
.if argc == 1
    invoke printf,CStr("Usage: test c uncompressed compressed",10)
.elseif argc == 4
    mov rax,argv
    add rax,8
    mov rax,[rax]
    movzx rax,byte ptr [rax]
    .if rax == "c"
        mov rbx,argv
        add rbx,8*3
        mov rbx,[rbx]
        mov p_output_name,rbx
        mov rbx,argv
        add rbx,16
        mov rbx,[rbx]
        invoke g_get_current_dir
        invoke g_build_filename,rax,rbx,NULL
        mov error,0
        invoke g_file_get_contents,rax,addr p_input,addr input_sz,addr error
        .if eax == FALSE
            mov rax,error
            invoke printf,CStr("%s",10),[rax]._GError.message
            invoke exit,1
        .endif
        invoke comprima,p_input,input_sz
    .endif
.endif

invoke exit,0
main endp


align 16
comprima proc uses rbx r12 r13 r14 r15 _pointer:qword,_ssize:guint64

local p_input:ptr           ;input file
local sz_input:guint64

local p_output:ptr          ;compressed file in memory
local sz_output:guint64

local p_descompresso:ptr    ;decompressed file in memory
local sz_descompresso:guint64

local error:ptr _GError

    mov p_input,_pointer
    mov sz_input,_ssize

    invoke bz3_version      ;check bz2 version
    invoke printf,CStr("bzip3 version: %s",10),rax
   
    mov rdi,sz_input
    call bz3_bound          ;get necessary memory
    mov sz_output,rax

    invoke g_malloc0,sz_output
mov p_output,rax

;p_input == uncompressed file ptr
;p_output == compressed file ptr
;sz_input == uncompressed file size
;sz_output == compressed file size
invoke bz3_compress,8*1024*1024,p_input,p_output,sz_input,addr sz_output
.if rax != BZ3_OK
        invoke printf,CStr("compression failed with error code %d",10),rax
        invoke exit,1
.endif

    invoke printf,CStr("not compressed: %d",09,"compressed: %d",10),sz_input,sz_output

   
invoke g_malloc0,sz_input
mov p_descompresso,rax
mov rax,sz_input
mov sz_descompresso,rax

invoke bz3_decompress,p_output,p_descompresso,sz_output,addr sz_descompresso
.if rax != BZ3_OK
        invoke printf,CStr("decompression failed with error code %d",10),rax
        invoke exit,1
.endif

invoke printf,CStr("compressed: %d",09,"decompressed: %d",10),sz_output,sz_descompresso

ret
comprima endp

end main


this create a compatible bzip3 file to compress and decompress
edited, erased, don't work as expected. My fault.

this is the header file libbz3.h to be converted to libbz3.inc


/*
* BZip3 - A spiritual successor to BZip2.
* Copyright (C) 2022 Kamila Szewczyk
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU Lesser General Public License along with
* this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef _LIBBZ3_H
#define _LIBBZ3_H

#include <stddef.h>
#include <stdint.h>

/* Symbol visibility control. */
#ifndef BZIP3_VISIBLE
    #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
        #define BZIP3_VISIBLE __attribute__((visibility("default")))
    #else
        #define BZIP3_VISIBLE
    #endif
#endif

#if defined(BZIP3_DLL_EXPORT) && (BZIP3_DLL_EXPORT == 1)
    #define BZIP3_API __declspec(dllexport) BZIP3_VISIBLE
#elif defined(BZIP3_DLL_IMPORT) && (BZIP3_DLL_IMPORT == 1)
    #define BZIP3_API __declspec(dllimport) BZIP3_VISIBLE
#else
    #define BZIP3_API BZIP3_VISIBLE
#endif

#define BZ3_OK 0
#define BZ3_ERR_OUT_OF_BOUNDS -1
#define BZ3_ERR_BWT -2
#define BZ3_ERR_CRC -3
#define BZ3_ERR_MALFORMED_HEADER -4
#define BZ3_ERR_TRUNCATED_DATA -5
#define BZ3_ERR_DATA_TOO_BIG -6
#define BZ3_ERR_INIT -7

struct bz3_state;

/**
* @brief Get bzip3 version.
*/
BZIP3_API const char * bz3_version(void);

/**
* @brief Get the last error number associated with a given state.
*/
BZIP3_API int8_t bz3_last_error(struct bz3_state * state);

/**
* @brief Return a user-readable message explaining the cause of the last error.
*/
BZIP3_API const char * bz3_strerror(struct bz3_state * state);

/**
* @brief Construct a new block encoder state, which will encode blocks as big as the given block size.
* The decoder will be able to decode blocks at most as big as the given block size.
* Returns NULL in case allocation fails or the block size is not between 65K and 511M
*/
BZIP3_API struct bz3_state * bz3_new(int32_t block_size);

/**
* @brief Free the memory occupied by a block encoder state.
*/
BZIP3_API void bz3_free(struct bz3_state * state);

/**
* @brief Return the recommended size of the output buffer for the compression functions.
*/
BZIP3_API size_t bz3_bound(size_t input_size);

/* ** HIGH LEVEL APIs ** */

/**
* @brief Compress a block of data. This function does not support parallelism
* by itself, consider using the low level `bz3_encode_blocks()` function instead.
* Using the low level API might provide better performance.
* Returns a bzip3 error code; BZ3_OK when the operation is successful.
* Make sure to set out_size to the size of the output buffer before the operation;
* out_size must be at least equal to `bz3_bound(in_size)'.
*/
BZIP3_API int bz3_compress(uint32_t block_size, const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size);

/**
* @brief Decompress a block of data. This function does not support parallelism
* by itself, consider using the low level `bz3_decode_blocks()` function instead.
* Using the low level API might provide better performance.
* Returns a bzip3 error code; BZ3_OK when the operation is successful.
* Make sure to set out_size to the size of the output buffer before the operation.
*/
BZIP3_API int bz3_decompress(const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size);

/* ** LOW LEVEL APIs ** */

/**
* @brief Encode a single block. Returns the amount of bytes written to `buffer'.
* `buffer' must be able to hold at least `bz3_bound(size)' bytes. The size must not
* exceed the block size associated with the state.
*/
BZIP3_API int32_t bz3_encode_block(struct bz3_state * state, uint8_t * buffer, int32_t size);

/**
* @brief Decode a single block.
* `buffer' must be able to hold at least `orig_size' bytes. The size must not exceed the block size
* associated with the state.
* @param size The size of the compressed data in `buffer'
* @param orig_size The original size of the data before compression.
*/
BZIP3_API int32_t bz3_decode_block(struct bz3_state * state, uint8_t * buffer, int32_t size, int32_t orig_size);

/**
* @brief Encode `n' blocks, all in parallel.
* All specifics of the `bz3_encode_block' still hold. The function will launch a thread for each block.
* The compressed sizes are written to the `sizes' array. Every buffer is overwritten and none of them can overlap.
* Precisely `n' states, buffers and sizes must be supplied.
*
* Expects `n' between 2 and 16.
*
* Present in the shared library only if -lpthread was present during building.
*/
BZIP3_API void bz3_encode_blocks(struct bz3_state * states[], uint8_t * buffers[], int32_t sizes[], int32_t n);

/**
* @brief Decode `n' blocks, all in parallel.
* Same specifics as `bz3_encode_blocks', but doesn't overwrite `sizes'.
*/
BZIP3_API void bz3_decode_blocks(struct bz3_state * states[], uint8_t * buffers[], int32_t sizes[],
                                 int32_t orig_sizes[], int32_t n);

#endif

I'd rather be this ambulant metamorphosis than to have that old opinion about everything

Vortex

#17
Hi Jochen,

QuoteBzip3's performance is heavily dependent on the compiler. x64 Linux clang13 builds usually can go as high as 17MiB/s compression and 23MiB/s decompression per thread. Windows and 32-bit builds might be considerably slower.

https://github.com/kspalaiologos/bzip3

Attached zip file including the 32-bit and 64-bit binaries built with Msys2.

Jotti's report :

https://virusscan.jotti.org/en-US/filescanjob/jpi2ix4ovi

Please rename the extension of the attachment to .7z :

ren bzip3-Msys2.zip bzip3-Msys2.7z
The original zip compression is exceeding the limit of the maximum file size, 500 Kb


jj2007

#18
Quote from: Vortex on February 01, 2023, 04:47:50 AMAttached zip file including the 32-bit and 64-bit binaries built with Msys2.

Hi Erol,

bzip3-64bit.exe is indeed significantly faster than FreeArc :thumbsup:

The compression ratio for Win64.inc (752,296 bytes), however, is mediocre:
97066  Win64.arc
104168 Win64.bz3


Another test where FreeArc shines:
5247 ms for bzip3-64: 3656710   01.02.2023  00:58:34    Bible20.bz3
5773 ms for bzip3-32: 3656710   01.02.2023  00:58:40    Bible20.bz3

Compressing 1 file, 83,986,682 bytes. Processed
2696 ms for compressing with FreeArc
692568  16.05.2016  18:21:18    Bible20.arc


An Excel database:
8934 ms for bzip3-64: 3506850   01.02.2023  01:10:36    MDG_Database.bz3
10126 ms for bzip3-32: 3506850  01.02.2023  01:10:46    MDG_Database.bz3

Compressing 1 file, 42,340,864 bytes. Processed
9976 ms for compressing with FreeArc
1754309 04.05.2015  13:08:39    MDG_Database.arc