Btw...If you want to test on the new Sobel routine i made, here it is ported to masm. (Sorry, i didn´t used macros here, because i don´t remember how the macros in masm are....but it won´t shouldn´t hard to follow

). My variation of Sobel result on thinner edges and a correct equalization of the pixels distribution.
Masm versionFloat_Two dq 2.0
Float_One_255 dq 0.00392156862745098 ; (1/255)
Float_SobleVarG dq 180.3122292025696 ; 255/sqrt(2). This is to speed up the calculation rather then using divide.
SobelGetGPLusEx proc near
FractionY = dword ptr -14h
FractionX = dword ptr -10h
Divisor = dword ptr -0Ch
DataCheck = dword ptr -8
pReturn = dword ptr -4
pMatrix = dword ptr 8
pOutSobelX = dword ptr 0Ch
pOutSobelY = dword ptr 10h
pOutSobel = dword ptr 14h
push ebp
mov ebp, esp
sub esp, 14h
push edi
push edx
push ecx
finit
mov edi, [ebp+pMatrix]
; To calculate Gx^2 later. Therefore Gx = M3+M9 + 2*(M6-M4) - (M7+M1)
fld dword ptr [edi+20] ; M6
fsub dword ptr [edi+12] ; M4
fmul Float_Two
fadd dword ptr [edi+8] ; M3
fadd dword ptr [edi+32] ; M9
fsub dword ptr [edi+24] ; M7
fsub dword ptr [edi+0] ; M1
lea ecx, [ebp+DataCheck]
fistp dword ptr [ecx]
cmp [ebp+DataCheck], 0
jnz short loc_4C7649
fldz
fstp [ebp+FractionX]
jmp short loc_4C76AB
; ---------------------------------------------------------------------------
loc_4C7649:
cmp [ebp+DataCheck], -255 ; Blacks. Edge too dark. Limit was extrapolated
jge short loc_4C7675
xor edx, edx
mov ecx, 255
mov eax, [ebp+DataCheck]
neg eax
div ecx
inc eax
imul eax, 255
mov [ebp+Divisor], eax
fild [ebp+DataCheck]
fidiv [ebp+Divisor]
fstp [ebp+FractionX]
jmp short loc_4C76AB
; ---------------------------------------------------------------------------
loc_4C7675:
cmp [ebp+DataCheck], 255 ; Whites. Edge too brigth. Limit was extrapolated
jle short loc_4C769F
xor edx, edx
mov ecx, 255
mov eax, [ebp+DataCheck]
div ecx
inc eax
imul eax, 255
mov [ebp+Divisor], eax
fild [ebp+DataCheck]
fidiv [ebp+Divisor]
fstp [ebp+FractionX]
jmp short loc_4C76AB
; ---------------------------------------------------------------------------
loc_4C769F:
fild [ebp+DataCheck]
fmul Float_One_255
fstp [ebp+FractionX]
loc_4C76AB:
mov eax, [ebp+pOutSobelX]
mov ecx, [ebp+FractionX]
mov [eax], ecx
; To calculate Gy^2 later. Therefore Gy = M7+M9 + 2*(M8-M2) - (M3+M1)
fld dword ptr [edi+28] ; M8
fsub dword ptr [edi+4] ; M2
fmul Float_Two
fadd dword ptr [edi+24] ; M7
fadd dword ptr [edi+32] ; M9
fsub dword ptr [edi+8] ; M3
fsub dword ptr [edi+0] ; M1
lea ecx, [ebp+DataCheck]
fistp dword ptr [ecx]
cmp [ebp+DataCheck], 0
jnz short loc_4C76DD
fldz
fstp [ebp+FractionY]
jmp short loc_4C773F
; ---------------------------------------------------------------------------
loc_4C76DD:
cmp [ebp+DataCheck], -255 ; Blacks. Edge too dark. Limit was extrapolated
jge short loc_4C7709
xor edx, edx
mov ecx, 255
mov eax, [ebp+DataCheck]
neg eax
div ecx
inc eax
imul eax, 255
mov [ebp+Divisor], eax
fild [ebp+DataCheck]
fidiv [ebp+Divisor]
fstp [ebp+FractionY]
jmp short loc_4C773F
; ---------------------------------------------------------------------------
loc_4C7709:
cmp [ebp+DataCheck], 255 ; Whites. Edge too brigth. Limit was extrapolated
jle short loc_4C7733
xor edx, edx
mov ecx, 255
mov eax, [ebp+DataCheck]
div ecx
inc eax
imul eax, 255
mov [ebp+Divisor], eax
fild [ebp+DataCheck]
fidiv [ebp+Divisor]
fstp [ebp+FractionY]
jmp short loc_4C773F
; ---------------------------------------------------------------------------
loc_4C7733:
; Soble = sqrt((255*FractionX)^2+(255*FractionY)^2)) = G = sqrt(Gx^2+Gy^2)
; since FractionX and FractionY can have a maximum of 1 and -1, therefore sobleMax = (255/sqrt(2)) * sqrt(FractionX^2+FractionY^2)
fild [ebp+DataCheck]
fmul Float_One_255
fstp [ebp+FractionY]
loc_4C773F:
mov eax, [ebp+pOutSobelY]
mov ecx, [ebp+FractionY]
mov [eax], ecx
fld [ebp+FractionX]
fmul st, st
fld [ebp+FractionY]
fmul st, st
faddp st(1), st
fsqrt
fmul Float_SobleVarG
lea edx, [ebp+pReturn]
fistp dword ptr [edx]
mov eax, [ebp+pReturn]
cmp eax, 255
jbe short loc_4C776F
mov eax, 255
loc_4C776F:
mov ecx, [ebp+pOutSobel]
mov [ecx], eax
pop ecx
pop edx
pop edi
mov esp, ebp
pop ebp
retn 10h
SobelGetGPLusEx endp
The parameters are:
pMatrix(In):
Pointer to a 3*3 Matrix of Dwords containing the gray values
pOutSobelX(Out):
Pointer to a variable that will store the SobelX value. The variable must be is Dword/Float Sized (It exports a float, but, they are the same size ;) ). The saved value is a Real4 from -1 to 1. So they are, actually, the normalized grey. i.e: Color/255
If the value is negative, it means the pixel is dark. If the value is positive, it means, the pixel is brighter. So, at the end, basically, it goes from 0 to 127 (means dark) and 128 to 255 meaning bright.
The sign here (negative or positive) was kept because we can use the values of Gx and Gy to compute the direction of the pixel with atan(Gy/Gx)
pOutSobelY(Out):
Pointer to a variable that will store the SobelY value. The variable must be is Dword/Float Sized (It exports a float, but, they are the same size ;) ). The saved value is a Real4 from -1 to 1. So they are, actually, the normalized grey. i.e: Color/255
If the value is negative, it means the pixel is dark. If the value is positive, it means, the pixel is brighter. So, at the end, basically, it goes from 0 to 127 (means dark) and 128 to 255 meaning bright.
The sign here (negative or positive) was kept because we can use the values of Gx and Gy to compute the direction of the pixel with atan(Gy/Gx)
pOutSobel(Out):
Pointer to a variable that will store the Sobel value. The variable must be a Dword. The saved value is a integer from 0 to 255.
Example of usage:
TmpMatrix dd 212, 10, 15
dd 105, 5, 18
dd 215, 178, 0
lea eax D$ebp+SobelX
lea ebx D$ebp+SobelY
lea ecx D$ebp+Sobel
invoke SobelGetGPlusEx, offset TmpMatrix, ecx, ebx, eax
Note:The matrix containing the dwords of greys must be previously filled. They correspond basically to the 3 pixels from the width and 3 pixels from the height. Collected like this:
width = 100
height = 200
Pixel Pos in image (x/y) - On this example, to make it simple, i´ll just posting here the grey correspondent values and not the bytes from all channels, ok ?
X0/Y0
212 10 15 59 68 125 656 .....
X0/Y1
105 5 18 25 16 12 56 .....
X0/Y2
215 178 0 9 8 15 65 .....
The matrix is, therefore, always in this order:
M1 M2 M3
M4 M5 M6
M7 M8 M9
I don´t remember how equates are used in masm, but on the SobelGetGPLusEx function, when you see [edi+20], it actually refers to M6 position of the matrix.
Therefore:
M1 = 0
M2 = 4
M3 = 8
M4 = 12
M5 = 16
M6 = 20
M7 = 24
M8 = 28
M9 = 32
Size of the matrix = 36 bytes
The Rosasm version below should be a bit better to read. So here is the original version in RosAsm Syntax
Proc SobelGetGPLusEx:
Arguments @pMatrix, @pOutSobelX, @pOutSobelY, @pOutSobel
Local @pReturn, @DataCheck, @Divisor, @FractionX, @FractionY
Uses edi, edx, ecx
finit
mov edi D@pMatrix
; To calculate Gx^2 later. Therfore Gx = M3+M9 + 2*(M6-M4) - (M7+M1)
fld F$edi+FloatMatricesInt.M6Dis | fsub F$edi+FloatMatricesInt.M4Dis | fmul R$Float_Two
fadd F$edi+FloatMatricesInt.M3Dis | fadd F$edi+FloatMatricesInt.M9Dis
fsub F$edi+FloatMatricesInt.M7Dis | fsub F$edi+FloatMatricesInt.M1Dis
lea ecx D@DataCheck | fistp F$ecx
If D@DataCheck = 0
fldz | fstp F@FractionX
Else_If D@DataCheck <s 0-255 ; Blacks. Edge too dark
xor edx edx | mov ecx 255 | mov eax D@DataCheck | neg eax | div ecx | inc eax | imul eax 255 | mov D@Divisor eax
fild D@DataCheck | fidiv F@Divisor | fstp F@FractionX
Else_If D@DataCheck >s 255 ; Whites. Edge too brigth
xor edx edx | mov ecx 255 | mov eax D@DataCheck | div ecx | inc eax | imul eax 255 | mov D@Divisor eax
fild D@DataCheck | fidiv F@Divisor | fstp F@FractionX
Else
fild D@DataCheck | fmul R$Float_One_255 | fstp F@FractionX
End_If
mov eax D@pOutSobelX | mov ecx D@FractionX | mov D$eax ecx
; To calculate Gy^2 later. Therefore Gy = M7+M9 + 2*(M8-M2) - (M3+M1)
fld F$edi+FloatMatricesInt.M8Dis | fsub F$edi+FloatMatricesInt.M2Dis | fmul R$Float_Two
fadd F$edi+FloatMatricesInt.M7Dis | fadd F$edi+FloatMatricesInt.M9Dis
fsub F$edi+FloatMatricesInt.M3Dis | fsub F$edi+FloatMatricesInt.M1Dis
lea ecx D@DataCheck | fistp F$ecx
If D@DataCheck = 0
fldz | fstp F@FractionY
Else_If D@DataCheck <s 0-255 ; Blacks
xor edx edx | mov ecx 255 | mov eax D@DataCheck | neg eax | div ecx | inc eax | imul eax 255 | mov D@Divisor eax
fild D@DataCheck | fidiv F@Divisor | fstp F@FractionY; | fmul R$Float255
Else_If D@DataCheck >s 255 ; Whites
xor edx edx | mov ecx 255 | mov eax D@DataCheck | div ecx | inc eax | imul eax 255 | mov D@Divisor eax
fild D@DataCheck | fidiv F@Divisor | fstp F@FractionY; | fmul R$Float255
Else
fild D@DataCheck | fmul R$Float_One_255 | fstp F@FractionY
End_If
mov eax D@pOutSobelY | mov ecx D@FractionY | mov D$eax ecx
; Soble = sqrt((255*FractionX)^2+(255*FractionY)^2)) = G = sqrt(Gx^2+Gy^2)
; since FractionX and FractionY can have a maximum of 1 and -1, therefore sobleMax = (255/sqrt(2)) * sqrt(FractionX^2+FractionY^2)
fld F@FractionX | fmul ST0 ST0 | fld F@FractionY | fmul ST0 ST0 | faddp ST1 ST0 | fsqrt | fmul R$Float_SobleVarG
lea edx D@pReturn
fistp F$edx
mov eax D@pReturn
If eax > 255 ; Can be replaced with movzx eax ax. But i´m keping the comparition to check for errors while debugging.
mov eax 255
End_If
mov ecx D@pOutSobel | mov D$ecx eax
EndP
And RosAsm equates and variables used are:
; variables
[Float_Two: R$ 2]
[FloatOne_255: R$ (1/255)]
[Float_SobleVarG: R$ 180.3122292025696187222153123367365]; 255/sqrt(2))
; equates
[FloatMatricesInt.M1Dis 0
FloatMatricesInt.M2Dis 4
FloatMatricesInt.M3Dis 8
FloatMatricesInt.M4Dis 12
FloatMatricesInt.M5Dis 16
FloatMatricesInt.M6Dis 20
FloatMatricesInt.M7Dis 24
FloatMatricesInt.M8Dis 28
FloatMatricesInt.M9Dis 32]
[Size_Of_FloatMatricesInt 36]
Remarks:1 - Why not using the matrices -1, 0, 1 | -2, 0, 2 | 1, 0, 1 for Gx and 1, 2, 1 | 0, 0, 0 | -1, 2, -1 for Gy ?
To gain speed. This is why i already calculated the math envolving the matrix arithmetics which results in:
Gx = M3+M9 + 2*(M6-M4) - (M7+M1)
Gy = M7+M9 + 2*(M8-M2) - (M3+M1)
2 - Why did i used fractions and comparitions of DataCheck to 0, -255, 255 ?
Well, because those are the places where normal Sobel algorithm fails. While i was testing the algorithm i saw that some pixels on the edges were excessivelly bright or pure black and also found in some images, those extrapolations anywhere in the image.
Then, i realized that what sobel was producing was, in fact:
noise.
But, then i thought, well, where those noises are located after all ? Are they really noise or simply an imperfection of the sobel algorithm itself ? I then, forced the function to export those extrapollations in red so i could actually see where they were located. For my surprise, the vast majority of these extrapolations occours near the true edges resulting in thicker edge line and some random noise here and there along the Gx and Gy edges.
What i saw was that it was generating values in the order of:
-400, -269, -300, -800 too dark
290, 300, 352, 402, etc etc too bright
So, i fixed that, simply calculating how much these values were exceeding and making the following equation:
If the exceeding value is negative i did:
FixedValue = OldValue/(255*(floor(|OldValue|/255)+1))
If the exceeding value is positive:
FixedValue = OldValue/(255*(floor(OldValue/255)+1))
Why is that ? well, i simply deal the excess like an extension of the limit of 255. So, in order for me to keep the values in between 0 and 1 something need to be done.
Therefore, i basically compute how much it is excedding to see which limit it is extrapolating. For example. Say we have a value of 273.
The range is only from 0 to (+/-)255, right ?
So, if the value was found in the range of 273, it means, that it is extrapolating the limit of 255. Therefore i needed to estimate another limit rather then 255 to fix the value. On this case, the new limit is achieved simply dividing the value by 255, and whatever
is the resultant value of the quotient, we simply add 1 to it. Why is that ? Simple...
Let´s say: 273/255 = 1.07058....
It means that it is extrapolating our limit in 0.07058..... Therefore our limit is no longer 255 but. 255*2 = 510. Why is that ? Because it is the next limit (next chunk of 255) that may fits to that remainder.
So, our quotient = 1 , we do (1+1)*255 = 510 as our new limit.
Then i simply divide the value from with this new limit = 273/510 = 0.53294....
So, our new fraction to Gx or Gy is now 0.53294.....inside the limits of 0 to 1
And, sure, i kept the sign intact in both cases. That´s why i needed to take the floor of the modulus of X/255 only when the value was negative.

One thing that can also be used on this error cases of Sobel, is properly rebuild the edges. So, we could, simply making the app export those bad locations as bad pixels, and then mark them (as red, for example).
Then after the main sobel operation is over along the whole image, you could simply search for those bad pixels and recalculate the New Edges value (sobel) of them from the "good' sobel values from the neighbour pixels. or do whatever other thing to properly reconstruct those areas, specially because the pixels nearby will also be used in the matrix to compute their own sobels.
So, perhaps, a better techique could be simply flaging this pixel as bad and it´s neighbours ( on a 3x3 matrix) as yellows (warning pixels or something) and avoid computing the sobel from those bad or warned areas.
On this way, you could simply identify all good pixels 1st and create another algorithm to know what to do with the bad pixels and it´s neighbours. But that´s another story. For now, i choose only to calculate their floors to keep the algo as fast and reliable as possible. (Sure, i could also use Scharr operators, but this could be done later with his own algo).
3 - Why using floats to export the values in Gx and Gy and on the other hand, using integer to G ?
This is because how Sobel actually works. The true value in Sobel G will always be a integer from 0 to 255. Extrapolations are not allowed on mine version whatsoever to avoid having to cut off data.
And i´m using floats in Gx (SobelX) and Gy (SobelY), because, since those values are the ones needed to calculate the final integer G (Sobel), i choose to keep them as fractions, so the result of the squared root could be a bit more accurated then simply using integers. G is computed as:
G = sqrt(Gx^2+Gy^2)
So, keeping the values of Gy and Gx as floating point, results on a more accurated result wich always will only varies from a margin of error of 1 depending of the rounding mode of the system. And also, we can use the values of Gx and Gy to compute the direction (angle) of the pixel with atan(Gy/Gx).
At the very end of the function, you can see that it will only needed to check is G in eax is bigger then 255 (which, btw, may not never happens, or when it does, the extrapolation is at the maximum value of 256 due to the round mode).
Sure...i could, as well, gain a bit more speed simply making movzx eax ax at the end, instead of comparing it with 255. But since i´m testing all of this, i kept the If/EndIf macros, mainly for debugging purposes.