Fast median algorithm

Siekmanski · August 07, 2020, 05:40:48 PM

Quote from: guga on August 07, 2020, 11:34:30 AM
Marinus, what´s the format of the structure"GDIplusBitmapData" ? I can´t find it .

Here it is.

Code Select

BitmapData struct                          
    dwWidth      dd ?   
    dwHeight     dd ?   
    Stride       dd ?   
    PixelFormat  dd ?   
    Scan0        dd ?   
    Reserved     dd ?
BitmapData ends

GDIplusBitmapData BitmapData <?>

guga · August 07, 2020, 07:12:28 PM

Hi marinus. Tks :)

Btw, i succeed to load and save webp files (unfortunatelly, not with gdiplus yet. Don´t know how to access those interfaces for webp or hevc using gdiplus) I've done it using libwebp library from google.

The routine i did was (I´ll change the name later, this was just a test to load and save this webp stuff):

Code Select


; https://www.autohotkey.com/boards/viewtopic.php?style=17&t=66335
; https://git.cs.usask.ca/SumatraPDF/SumatraPDF/blob/master/src/utils/WebpReader.cpp
; https://chromium.googlesource.com/webm/libwebp/+/master/src/webp/decode.h
; https://developers.google.com/speed/webp/docs/api

Proc DecodeWebpFile:
    Arguments @pFileName
    Local @hFile, @FileSize, @pImgWidth, @pImgHeight, @pBits, @TmpOpenedFile

    lea ebx D@TmpOpenedFile ; Buffer to hold the loaded webp in memory
    lea ecx D@FileSize ; and store it´s size
    call ReadOpenedFile 0, D@pFileName, ebx, ecx ; A function to open the webp file and read it´s contents.

    ; Just to get some info of the web p image. It´s not needed since the width and height will already be retrieved with the function below
;    lea ebx D@pImgWidth
;    lea ecx D@pImgHeight
;    C_call 'libwebp.WebPGetInfo' eax, D@FileSize, ebx, ecx

    ; This function gets the Pixel data from the webp image and also it´s width and height
    lea ebx D@pImgWidth
    lea ecx D@pImgHeight
    C_call 'libwebp.WebPDecodeBGRA' D@TmpOpenedFile, D@FileSize, ebx, ecx ; libwebp library is in cdecl calling convention. The stack needs to be adjusted, that´s why i´m using RosAsm "C_call" macro that simply add XXX bytes to the stack after the call.
    mov D@pBits eax ; Copy the image pixels to export to other formats

    call CreateImageFromMemory D@pBits, D@pImgWidth, D@pImgHeight, pImage ; Marinus routine to create a pImage from the raw 32bit ARGB bitmap data in memory
    call SaveNonIndexedImage D$pImage, {B$ "GugaWebP.png", 0}, Image_PNG, PixelFormat32bppRGB
    call SaveJpgQualityImage D$pImage, {B$ "GugaWebP.jpg", 0}, 40

    C_call 'libwebp.WebPFree' D@pBits ; Must release the data loaded from WebPDecodeBGRA
    call 'RosMem.VMemFree' D@TmpOpenedFile ; and released our allocated memory

EndP

______________

;;
I_pBits = the memory pointer to the raw 32bit ARGB bitmap data in memory
I_Stride = I_Width * 4
;;

Proc CreateImageFromMemory:
    Arguments @ImgBits, @ImageWidth, @ImageHeight, @pImage
    Local @Stride

    mov eax D@ImageWidth | shl eax 2
    call 'gdiplus.GdipCreateBitmapFromScan0' D@ImageWidth, D@ImageHeight, eax, PIXELFORMAT_32BPPARGB, D@ImgBits, D@pImage

EndP
____________________

; Just a function to open the file onto a memory buffer
[NumberOfReadBytes: 0]
Proc ReadOpenedFile:
    Arguments @Adresseem, @Filename, @pFileData, @pFileLenght
    Local @hFile, @hFileSize, @MemFile
    Uses edi, ecx, edx

    call 'KERNEL32.CreateFileA' D@Filename, &GENERIC_READ,
                                &FILE_SHARE_READ+&FILE_SHARE_WRITE, 0, &OPEN_EXISTING,
                                &FILE_ATTRIBUTE_NORMAL, 0
    If eax = &INVALID_HANDLE_VALUE
        call 'USER32.MessageBoxA' 0, {B$ "File cannot be opened!", 0}, {'Error', 0}, &MB_OK__&MB_ICONWARNING__&MB_SYSTEMMODAL
        xor eax eax | ExitP
    End_If
    mov D@hFile eax

    call 'KERNEL32.GetFileSize' eax, 0
    mov D@hFileSize eax
    On eax = 0, ExitP
    mov edi D@pFileLenght

    add eax 10 | mov edx eax
    mov D$edi edx

     ; Allocate enough memory
    mov D@MemFile 0 | lea eax D@MemFile
    call 'RosMem.VMemAlloc' eax, edx
    mov edi D@pFileData
    mov D$edi eax


    call 'KERNEL32.ReadFile' D@hFile, eax,
                             D@hFileSize, NumberOfReadBytes, 0

    call 'KERNEL32.CloseHandle' D@hFile

EndP

I found how to do it reading these:

Code Select


webpUrl := "https://upload.wikimedia.org/wikipedia/commons/b/b2/Vulphere_WebP_OTAGROOVE_demonstration_2.webp"
libwebp32Url := "https://s3.amazonaws.com/resizer-dynamic-downloads/webp/0.5.2/x86/libwebp.dll"
libwebp64Url := "https://s3.amazonaws.com/resizer-dynamic-downloads/webp/0.5.2/x86_64/libwebp.dll"
filePath := A_ScriptDir . "\test.webp"
bitness := A_PtrSize*8
lib := A_ScriptDir . "\libwebp" . bitness . ".dll"

if !FileExist(filePath)
   URLDownloadToFile, % webpUrl, % filePath

if !FileExist(lib)
   URLDownloadToFile, % libwebp%bitness%Url, % lib

hBitmap := HBitmapFromWebP(lib, filePath, width, height)
Gui, Margin, 0, 0
Gui, Add, Pic, w600 h-1, HBITMAP:%hBitmap%
Gui, Show
Return

GuiClose() {
   ExitApp
}

HBitmapFromWebP(libwebp, WebpFilePath, ByRef width, ByRef height) {
   file := FileOpen(WebpFilePath, "r")
   len := file.RawRead(buff, file.Length)
   file.Close()
   if !len
      throw Exception("Failed to read the image file")
   
   if !hLib := DllCall("LoadLibrary", Str, libwebp, Ptr)
      throw Exception("Failed to load library. Error:" . A_LastError)
   
   if !pBits := DllCall(libwebp . "\WebPDecodeBGRA", Ptr, &buff, Ptr, len, IntP, width, IntP, height) {
      DllCall("FreeLibrary", Ptr, hLib)
      throw Exception("Failed to decode the image file")
   }
   
   oGDIp := new GDIp
   pBitmap := oGDIp.CreateBitmapFromScan0(width, height, pBits)
   hBitmap := oGDIp.CreateHBITMAPFromBitmap(pBitmap)
   DllCall(libwebp . "\WebPFree", Ptr, pBits)
   oGDIp.DisposeImage(pBitmap)
   DllCall("FreeLibrary", Ptr, hLib)
   Return hBitmap
}

class GDIp   {
   __New() {
      if !DllCall("GetModuleHandle", Str, "gdiplus", Ptr)
         DllCall("LoadLibrary", Str, "gdiplus")
      VarSetCapacity(si, A_PtrSize = 8 ? 24 : 16, 0), si := Chr(1)
      DllCall("gdiplus\GdiplusStartup", UPtrP, pToken, Ptr, &si, Ptr, 0)
      this.token := pToken
   }
   
   __Delete()  {
      DllCall("gdiplus\GdiplusShutdown", Ptr, this.token)
      if hModule := DllCall("GetModuleHandle", Str, "gdiplus", Ptr)
         DllCall("FreeLibrary", Ptr, hModule)
   }
   
   CreateBitmapFromScan0(Width, Height, pBits, PixelFormat := 0x26200A, Stride := "") {
      if !Stride {
         bpp := (PixelFormat & 0xFF00) >> 8
         Stride := ((Width * bpp + 31) & ~31) >> 3
      }
      DllCall("gdiplus\GdipCreateBitmapFromScan0", Int, Width, Int, Height
                                                 , Int, Stride, Int, PixelFormat
                                                 , Ptr, pBits, PtrP, pBitmap)
      Return pBitmap
   }
   
   CreateHBITMAPFromBitmap(pBitmap, Background=0xffffffff) {
      DllCall("gdiplus\GdipCreateHBITMAPFromBitmap", Ptr, pBitmap, PtrP, hbm, Int, Background)
      return hbm
   }
   
   DisposeImage(pBitmap) {
      return DllCall("gdiplus\GdipDisposeImage", Ptr, pBitmap)
   }
}

And this...

Code Select


/* Copyright 2015 the SumatraPDF project authors (see AUTHORS file).
   License: Simplified BSD (see COPYING.BSD) */

#include "BaseUtil.h"
#include "WebpReader.h"

#ifndef NO_LIBWEBP

#include <webp/decode.h>
using namespace Gdiplus;

namespace webp {

// checks whether this could be data for a WebP image
bool HasSignature(const char *data, size_t len)
{
    return len > 12 && str::StartsWith(data, "RIFF") && str::StartsWith(data + 8, "WEBP");
}

Size SizeFromData(const char *data, size_t len)
{
    Size size;
    WebPGetInfo((const uint8_t *)data, len, &size.Width, &size.Height);
    return size;
}

Bitmap *ImageFromData(const char *data, size_t len)
{
    int w, h;
    if (!WebPGetInfo((const uint8_t *)data, len, &w, &h))
        return nullptr;

    Bitmap bmp(w, h, PixelFormat32bppARGB);
    Rect bmpRect(0, 0, w, h);
    BitmapData bmpData;
    Status ok = bmp.LockBits(&bmpRect, ImageLockModeWrite, PixelFormat32bppARGB, &bmpData);
    if (ok != Ok)
        return nullptr;
    if (!WebPDecodeBGRAInto((const uint8_t *)data, len, (uint8_t *)bmpData.Scan0, bmpData.Stride * h, bmpData.Stride))
        return nullptr;
    bmp.UnlockBits(&bmpData);

    // hack to avoid the use of ::new (because there won't be a corresponding ::delete)
    return bmp.Clone(0, 0, w, h, PixelFormat32bppARGB);
}

}

#else

namespace webp {
    bool HasSignature(const char *data, size_t len) { UNUSED(data); UNUSED(len); return false; }
    Gdiplus::Size SizeFromData(const char *data, size_t len) { UNUSED(data); UNUSED(len); return Gdiplus::Size(); }
    Gdiplus::Bitmap *ImageFromData(const char *data, size_t len) { UNUSED(data); UNUSED(len); return nullptr; }
}

#endif

References:
https://www.autohotkey.com/boards/viewtopic.php?style=17&t=66335
https://git.cs.usask.ca/SumatraPDF/SumatraPDF/blob/master/src/utils/WebpReader.cpp
https://chromium.googlesource.com/webm/libwebp/+/master/src/webp/decode.h
https://developers.google.com/speed/webp/docs/api
https://cpp.hotexamples.com/examples/-/Bitmap/-/cpp-bitmap-class-examples.html

Note: Attached are the webp file and the one i converted to jpg. Also it contains the libwebp.dll library.

Siekmanski · August 07, 2020, 07:19:43 PM

Quote from: guga on August 07, 2020, 02:59:18 PM
Marinus, if you would like to add more equates to GdiPlus header. here are some more:

Hi guga,

The code was designed to keep it as small as possible.

It doesn't need all the Format and Type GUIDS in memory.
Just one GUID for all the Image Types, and one GUID for the JPG EncoderQuality.

Here comes the trick:
The first byte of the first dword of the Image Types GUID, represents the Image Type.
Image_BMP equ 0
Image_JPG equ 1
Image_GIF equ 2
Image_TIF equ 5
Image_PNG equ 6
So fill the byte with the desired Image Type and you're good to go.

These Image types are the only ones of my interest and always available.
But you can also add the other Types and Formats if desired. ( Be sure they are available.... )

In my code you don't need all the enumerate functions to get all the GUIDS or use the other wrapper functions to fill in the values for you.
And we are not wasting memory for all the Info structures that those wrapper functions need.

That's the reason I called it "Bare Minimum GDIplus code to save different Image Types and PixelFormat conversions"

guga · August 07, 2020, 07:34:09 PM

yeah, i saw it. Tinny functions that do the job correctly

I was curious about this gdiplus and gave a test (Never used before

). Indeed it seems better to work with then FreeImage, SDL, etc. I´ll later try to convert this routines for the watermark remover function and replace the Api i was using to load the images to gdiplus.

I tested also this webp format a couple of hours agos while i was trying to learsn and understand your code for gdiplus. It seems that webp is also very handy. The size of the images are really small and i couldn't´t see that much loss of quality.

Siekmanski · August 07, 2020, 07:46:19 PM

WebP images will not load with GdiPlus
Maybe you need to install a decoder/encoder for GdiPlus???
I say this because you provided the "IMAGE_WEBP 9" Image Type.

guga · August 07, 2020, 07:51:57 PM

Quote from: Siekmanski on August 07, 2020, 07:46:19 PM
WebP images will not load with GdiPlus
Maybe you need to install a decoder/encoder for GdiPlus???
I say this because you provided the "IMAGE_WEBP 9" Image Type.

That´s weird because gdiplus.dll contain the routines to load and save webp and heic. That´s where i got´those data for the guid and the other variables. Take a look on the images below.It´s from my gdiplus in windows10

The routines to load and save heic and webp are inside gdiplus. I don´t know how to load them, but they are built internally

Siekmanski · August 07, 2020, 07:57:04 PM

It doesn't on my computer, Win8.1
Then we have to update the Masm gdiplus.lib and gdiplus.inc again...

guga · August 09, 2020, 08:32:04 AM

Hi Marinus

One question. How do we calculate the laplacian operator for this image processing ?

I saw at http://masm32.com/board/index.php?topic=7420.msg81182#msg81182 that Rui did a Laplacian computation for a determinant of a matrix 3x3. Is it the same thing as what we are doing ? Can we use his function to compute the laplacian to detect edges too ?

If not... how can we estimate the correct laplacian using 2-D Log function with Gaussian standard deviation as described here ?
https://www.javatpoint.com/dip-concept-of-edge-detection

And most important..what a hell is this laplacian ?

I gave a test on this logarithm function using wolframalpha and came onto this:

If Log(x) = 1 . So our laplacian of x, y pixels, right ? So, at the end, the laplace (not normalized) can only results in values varying from 0 to 255 to make it works similar as what Sobel does)

Laplace = Log(x,y) = (1-(z)/(2*k)) * (exp(-(z)/(2*k))) * (-1/(pi*k))

If the final laplace/Log(x,y) = 1, then we have:

1 = (1-(z)/(2*k)) * (exp(-(z)/(2*k))) * (-1/(pi*k))

where z = x^2+y^2
k = standard deviation of the whole image

So...when the result of log(x,y) = 1 Wolfram Alpha gave as a solution this:

z = -2 (k W(-e k pi) - k)

1 = (1-(z)/(2*k)) * (exp(-(z)/(2*k))) * (-1/(pi*k))

https://www.wolframalpha.com/input/?i=1+%3D+%281-%28z%29%2F%282*k%29%29+*+%28exp%28-%28z%29%2F%282*k%29%29%29+*+%28-1%2F%28pi*k%29%29

Where W = product Log function (whatever that is

)

Also, if we consider log(x,y) = 2 we then have this formula:

z = -2 (k W(-2 exp(k pi)) - k)
2 = (1-(z)/(2*k)) * (exp(-(z)/(2*k))) * (-1/(pi*k))

If log(x,y) = 3, we have this:
z = -2 (k W(-3 e k pi) - k)
3 = (1-(z)/(2*k)) * (exp(-(z)/(2*k))) * (-1/(pi*k))

So, it seems to me that for log(x,y) = N we have

z = -2 (k W(-N e k pi) - k)

z = -2*k (W(-N e k pi) - 1)

where z = x^2+y^2

So, in theory, all we need to know is how to calculate this W stuff, right ?

Knowing that "k" is a constant for each image (it´s simply their standard deviation), then the values of this W function can all be pre calculated and put onto a table from 0 to 255, right ?

Since, "k" and pi are constants, and we also have the resultant N varying from 0 to 255, we can build a 256 dword table containing the results of W(-N e k pi), right ?

If is that so, to calculate the laplace (the exact values and not the approximations), we could 1st solve this W function and later, solve z for each values from 0 to 255, like this:

Val0 = -2*k (W(-0 e k pi) - 1) => related to Laplace Val = 0
Val1 = -2*k (W(-1 e k pi) - 1) => related to Laplace Val = 1
Val2 = -2*k (W(-2 e k pi) - 1) => related to Laplace Val = 2
Val3 = -2*k (W(-3 e k pi) - 1) => related to Laplace Val = 3
(...)

Since, z varies according to the value of each pixel at x, y pos, we could simply point the result of x^2+y^2 to the value that is more closer to the Val1, Val2..... right ?

On this way we would create only 256 values for this "-2*k (W(-N e k pi) - 1)" stuff and compare the values of x^2+y^2 to them to get the proper laplace value.

The question is how do we calculate this W stuff ????

Siekmanski · August 09, 2020, 09:54:32 AM

You are going full steam ahead.

I'm not that far yet, to dive into other edge dectection operators and techniques.
Sounds interesting, and a Laplacian filter as a preprocessing step maybe possible.
First I want the Sobel routine to be ready, than we will see what else is cool to implement.

I have almost finished the Sobel operator.
And I think it will be as fast as lightning....
It reads 16 pixels with 4 read instructions.
And it saves 4 Sobel G values as pixels with 2 write instruction. ( a block of 2 by 2 pixels )
It's all 4 lane parallel SSE2 code.
4 lane convolution at once without multiplications, and other tricks.
For example it uses a 4 by 4 Pixel Matrix, so we can abuse SIMD to the max.
The advantage is, a 4 by 4 Matrix can hold 4 single Sobel operators without the need of loading the pixels separately.
Yes, this one was a brain teaser, but I knew it should be possible.
Still have to test it on real data, hope it works out ( on paper it does.... )

Siekmanski · August 09, 2020, 09:29:49 PM

Hi guga,

We have to calculate with square roots for the magnitudes.
How much precision do we need?

We can calculate with high precision at the cost of +/- 39 clock cycles.
Or we can calculate the reciprocal square roots with 12 bit precision at +/- 8 clock cycles.
Or do it the rude way ( not very accurate ) and make the X Y gradients absolute values and add them together at +/- 3 clock cycles.

I myself would go for the reciprocal square roots, it's more than enough precision we need for the pixel bytes, which are actually rounded to natural numbers.

guga · August 10, 2020, 12:23:06 AM

Quote from: Siekmanski on August 09, 2020, 09:29:49 PM
Hi guga,

We have to calculate with square roots for the magnitudes.
How much precision do we need?

We can calculate with high precision at the cost of +/- 39 clock cycles.
Or we can calculate the reciprocal square roots with 12 bit precision at +/- 8 clock cycles.
Or do it the rude way ( not very accurate ) and make the X Y gradients absolute values and add them together at +/- 3 clock cycles.

I myself would go for the reciprocal square roots, it's more than enough precision we need for the pixel bytes, which are actually rounded to natural numbers.

Although 39 clock cycles is very very fast, a precision of 12 bits seems to me good enough to make the result be accurate. I saw a small issue when it concerns the strategy on what operator to choose. Since we normalized the operator value to stays withing the limits, i started to test the different operators and check the results.

With sobel we have the edges correctly detected, but it has some small gaps in some pixels, which is hard to trace. I then tested Scharr and the result was the opposite as sobel. I mean, it detected the edges a bit more accurated, but on the other hand, produced a bit more noise outside the edges.

I´m trying to figure it out what exact values we can use as a good edge detector. You are using fixed values for sobel, right ? I mean those 0-1 and 0-2. I extended your algo a little bit and allowed to insert whatever values we want. Doing this way we can try to track what values (those ValA and ValB i posted previously) generates a better result. Or...we can try to calculate the reverse operation that sobel/Scharr/Prewitt are using in order to we find the correct values of ValA and ValB.

We only need 2 values in fact, to write a edge detector routine.

I made a workaround on these problems writing a routine to Estimate the Threshold automatically. It works in 90% of the cases. The main problem relies on watermarks whose pixels that forms the edges are too dark. For example, i saw pixels whose values are in between 14 and 18 that still belongs to edges. The new routine calculates the median of the darkers values of the image (after the sobel routines do their job) and after then once it is found, it calculates the average of the remaining pixels (the ones too dark closer to black) and tries to exclude which pixels are heavily distributed (pixels from 0 to 3 represents more then 90% of the whole image, in general).

I´m just giving a rest, because last night i started the routine to test the laplacian operator. Currently, i´m trying to create a routine to compute that damn Lambert W-Function existant in wolfram alpha. I´m close to a result, but i got stuck trying to create a routine in SSE to calculate log and exp values. This laplacian stuff will be a pain to implement, but i think we can make it work :)

Siekmanski · August 10, 2020, 12:51:54 AM

Just take a moment rest, then I have a chance to keep up with you.

Log and Exp routines are not available in SSE, we have to create them ourselves.

Did a test with the machine image from wikipedia's Sobel example.
The principal of the 16 pixel sse2 parallel sobel routine works, but there is some fine tuning needed.
There are misalignments on the Y axis, as you can see in the attachment.

guga · August 10, 2020, 01:06:20 AM

Quote from: Siekmanski on August 10, 2020, 12:51:54 AM
Just take a moment rest, then I have a chance to keep up with you.

Log and Exp routines are not available in SSE, we have to create them ourselves.

Did a test with the machine image from wikipedia's Sobel example.
The principal of the 16 pixel sse2 parallel sobel routine works, but there is some fine tuning needed.
There are misalignments on the Y axis, as you can see in the attachment.

Hi Marinus. The app doesn´t open here.

Siekmanski · August 10, 2020, 01:09:37 AM

did you depack it to your HD?

guga · August 10, 2020, 01:13:47 AM

Yep. But i found it. It was looking for the image in a "Info' directory. I simply create this dir and set the machine.png to there

I see the cross lines in sobel now. The image in not aligned

The MASM Forum

News:

Fast median algorithm

Siekmanski

guga

Siekmanski

guga

Siekmanski

guga

Siekmanski

guga

Siekmanski

Siekmanski

guga

Siekmanski

guga

Siekmanski

guga