Hi
The RichEdit control comes with its own built-in word-breaking routine. It is intended for the most common cases, but if the control is used for development purposes, the routine becomes very annoying after a short while. :sad:
I looked around to find some information on the subject and the documentation found is very rare and poor.
My goal was to implement a procedure that works similar to RadAsm, but I quickly realized that the API infrastructure provided is very rudimentary, so I had to arrange things as best as possible. :cool:
The code looks like
; Procedure: DbgMdiChildTxt_WordBreak_Callback
; Purpose: Callback proc to to break a line of text.
; Arguments: Arg1: -> Wide character Buffer.
; Arg2: Current character index.
; Arg3: Number of BYTEs in the buffer.
; Arg4: Action to be taken [WB_LEFT..WB_RIGHTBREAK].
; Return: Nothing.
; Link: https://docs.microsoft.com/en-us/windows/win32/controls/use-word-and-line-break-information
; Notes: The char buffer passed to this callback has a very limitid size (usually 20 BYTE).
; The content of the buffer are always WIDE charaters.
; It is possible that the callback will be called many times until the goal is achieved.
; This happens, for example, if an index = 0 is returned, which means that the word break
; is not found.
; Thanks to this behavior and the lack of an lParam to store additional information,
; the implementation has to work on its own, which is somewhat difficult to code.
; From my observations, only WB_MOVEWORDLEFT and WB_MOVEWORDRIGHT are used.
.const
RE_WordBreakLUT label CHRA ;Word break character lookup table
; NUL SOH STX ETX EDT ENQ ACK BEL BS TAB LF VF FF CR SO SI
CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,001h,001h,001h,001h,001h,000h,000h
; DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN BM SUB ESC FS GS RS US
CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h
; SPC ! " # $ % & ' ( ) * + , - . /
CHRA 001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,000h,001h ;#, $
; 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,001h,001h,001h,001h,001h,001h ;0..9, ?
; @ A B C D E F G H I J K L M N O
CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;@, A..O
; P Q R S T U V W X Y Z [ \ ] ^ _
CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,001h,001h,001h,001h,000h ;P..Z, _
; ` a b c d e f g h i j k l m n o
CHRA 001h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;a..o
; p q r s t u v w x y z { | } ~
CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,001h,001h,001h,001h,000h ;p..z
.code
DbgMdiChildTxt_WordBreak_Callback proc uses xbx xsi pCharBuffer:POINTER, \
dCharIndex:DWORD, dByteCount:DWORD, dCode:DWORD
.if dCode == WB_LEFT ;0
xor eax, eax
.elseif dCode == WB_RIGHT ;1
xor eax, eax
.elseif dCode == WB_ISDELIMITER ;2
xor eax, eax
.elseif dCode == WB_CLASSIFY ;3
xor eax, eax
.elseif dCode == WB_MOVEWORDLEFT ;4 = WB_MOVEWORDPREV
mov eax, dCharIndex
mov xsi, pCharBuffer
dec eax
mov xbx, offset RE_WordBreakLUT
;If we start with a non-break char
movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
.if ((xdx >= 128) || ((xdx < 128) && (CHRA ptr [xbx + xdx] == 0)))
;Search first break char
.while eax != 0
movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
.if ((xdx < 128) && (CHRA ptr [xbx + xdx] == 1))
inc eax
.break
.endif
dec eax
.endw
.else
;Search first non-break char
.while eax != 0
movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
.if ((xdx >= 128) || ((xdx < 128) && (CHRA ptr [xbx + xdx] == 0)))
inc eax
.break
.endif
dec eax
.endw
.endif
.elseif dCode == WB_MOVEWORDRIGHT ;5 = WB_MOVEWORDNEXT
mov eax, dCharIndex
mov xsi, pCharBuffer
mov ecx, dByteCount
mov xbx, offset RE_WordBreakLUT
shr ecx, 1 ;ecx = # of chars
;If we start with a non-break char
movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
.if ((xdx >= 128) || ((xdx < 128) && (CHRA ptr [xbx + xdx] == 0)))
;Search first break char
.while eax != ecx
movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
.break .if ((xdx < 128) && (CHRA ptr [xbx + xdx] == 1))
inc eax
.endw
.else
;Search first non-break char
.while eax != ecx
movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
.break .if ((xdx >= 128) || ((xdx < 128) && (CHRA ptr [xbx + xdx] == 0)))
inc eax
.endw
.endif
.elseif dCode == WB_LEFTBREAK ;6 = WB_PREVBREAK
xor eax, eax
.elseif dCode == WB_RIGHTBREAK ;7 = WB_NEXTBREAK
xor eax, eax
.else
xor eax, eax
.endif
ret
DbgMdiChildTxt_WordBreak_Callback endp
I used this code in the new DebugCenter (Version 2.1.1) release, in case someone want to see it in action. It can be downloaded from the Dev-Repo.
Biterider
Hi
I forgot to say, that the registration of the callback routine is done using
invoke SendMessage, hEdit, EM_SETWORDBREAKPROC, 0, offset DbgMdiChildTxt_WordBreak_Callback
Biterider