Author Topic: Word-Break procedure for RichEdit  (Read 588 times)

Biterider

  • Moderator
  • Member
  • *****
  • Posts: 896
  • ObjAsm Developer
    • ObjAsm
Word-Break procedure for RichEdit
« on: November 27, 2021, 07:04:52 PM »
Hi
The RichEdit control comes with its own built-in word-breaking routine. It is intended for the most common cases, but if the control is used for development purposes, the routine becomes very annoying after a short while.  :sad:
I looked around to find some information on the subject and the documentation found is very rare and poor.
My goal was to implement a procedure that works similar to RadAsm, but I quickly realized that the API infrastructure provided is very rudimentary, so I had to arrange things as best as possible.  :cool:

The code looks like

Code: [Select]
; Procedure:  DbgMdiChildTxt_WordBreak_Callback
; Purpose:    Callback proc to to break a line of text.
; Arguments:  Arg1: -> Wide character Buffer.
;             Arg2: Current character index.
;             Arg3: Number of BYTEs in the buffer.
;             Arg4: Action to be taken [WB_LEFT..WB_RIGHTBREAK].
; Return:     Nothing.
; Link:       https://docs.microsoft.com/en-us/windows/win32/controls/use-word-and-line-break-information
; Notes:      The char buffer passed to this callback has a very limitid size (usually 20 BYTE).
;             The content of the buffer are always WIDE charaters.
;             It is possible that the callback will be called many times until the goal is achieved.
;             This happens, for example, if an index = 0 is returned, which means that the word break
;             is not found.
;             Thanks to this behavior and the lack of an lParam to store additional information,
;             the implementation has to work on its own, which is somewhat difficult to code.
;             From my observations, only WB_MOVEWORDLEFT and WB_MOVEWORDRIGHT are used.

.const
RE_WordBreakLUT label CHRA                              ;Word break character lookup table
  ;     NUL  SOH  STX  ETX  EDT  ENQ  ACK  BEL  BS   TAB  LF   VF   FF   CR   SO   SI
  CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,001h,001h,001h,001h,001h,000h,000h
  ;     DLE  DC1  DC2  DC3  DC4  NAK  SYN  ETB  CAN  BM   SUB  ESC  FS   GS   RS   US
  CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h
  ;     SPC   !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /
  CHRA 001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,001h,000h,001h  ;#, $
  ;      0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?
  CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,001h,001h,001h,001h,001h,001h  ;0..9, ?
  ;      @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O
  CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h  ;@, A..O
  ;      P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _
  CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,001h,001h,001h,001h,000h  ;P..Z, _
  ;      `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o
  CHRA 001h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h  ;a..o
  ;      p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
  CHRA 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,001h,001h,001h,001h,000h  ;p..z

.code
DbgMdiChildTxt_WordBreak_Callback proc uses xbx xsi pCharBuffer:POINTER, \
                                           dCharIndex:DWORD, dByteCount:DWORD, dCode:DWORD
  .if dCode == WB_LEFT                                  ;0
    xor eax, eax
  .elseif dCode == WB_RIGHT                             ;1
    xor eax, eax
  .elseif dCode == WB_ISDELIMITER                       ;2
    xor eax, eax
  .elseif dCode == WB_CLASSIFY                          ;3
    xor eax, eax
  .elseif dCode == WB_MOVEWORDLEFT                      ;4 = WB_MOVEWORDPREV
    mov eax, dCharIndex
    mov xsi, pCharBuffer
    dec eax
    mov xbx, offset RE_WordBreakLUT

    ;If we start with a non-break char
    movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
    .if ((xdx >= 128) || ((xdx < 128) && (CHRA ptr [xbx + xdx] == 0)))
      ;Search first break char
      .while eax != 0
        movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
        .if ((xdx < 128) && (CHRA ptr [xbx + xdx] == 1))
          inc eax
          .break
        .endif
        dec eax
      .endw
    .else
      ;Search first non-break char
      .while eax != 0
        movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
        .if ((xdx >= 128) || ((xdx < 128) && (CHRA ptr [xbx + xdx] == 0)))
          inc eax
          .break
        .endif
        dec eax
      .endw
    .endif

  .elseif dCode == WB_MOVEWORDRIGHT                     ;5 = WB_MOVEWORDNEXT
    mov eax, dCharIndex
    mov xsi, pCharBuffer
    mov ecx, dByteCount
    mov xbx, offset RE_WordBreakLUT
    shr ecx, 1                                          ;ecx = # of chars

    ;If we start with a non-break char
    movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
    .if ((xdx >= 128) || ((xdx < 128) && (CHRA ptr [xbx + xdx] == 0)))
    ;Search first break char
      .while eax != ecx
        movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
        .break .if ((xdx < 128) && (CHRA ptr [xbx + xdx] == 1))
        inc eax
      .endw
    .else
      ;Search first non-break char
      .while eax != ecx
        movzx xdx, CHRW ptr [xsi + sizeof(CHRW)*xax]
        .break .if ((xdx >= 128) || ((xdx < 128) && (CHRA ptr [xbx + xdx] == 0)))
        inc eax
      .endw
    .endif
  .elseif dCode == WB_LEFTBREAK                         ;6 = WB_PREVBREAK
    xor eax, eax
  .elseif dCode == WB_RIGHTBREAK                        ;7 = WB_NEXTBREAK
    xor eax, eax
  .else
    xor eax, eax
  .endif
  ret
DbgMdiChildTxt_WordBreak_Callback endp

I used this code in the new DebugCenter (Version 2.1.1) release, in case someone want to see it in action. It can be downloaded from the Dev-Repo.

Biterider

Biterider

  • Moderator
  • Member
  • *****
  • Posts: 896
  • ObjAsm Developer
    • ObjAsm
Re: Word-Break procedure for RichEdit
« Reply #1 on: November 28, 2021, 02:12:16 AM »
Hi
I forgot to say, that the registration of the callback routine is done using

Code: [Select]
invoke SendMessage, hEdit, EM_SETWORDBREAKPROC, 0, offset DbgMdiChildTxt_WordBreak_Callback
Biterider