News:

Masm32 SDK description, downloads and other helpful links
Message to All Guests
NB: Posting URL's See here: Posted URL Change

Main Menu

Speech API sample

Started by Vortex, February 10, 2025, 08:00:04 AM

Previous topic - Next topic

Vortex

Hello,

Here is a quick speech API demo :

.386
.model flat,stdcall
option casemap:none

include         SAPIsample.inc

.data

CLSID_SpVoice   GUID {096749377h,03391h,011D2h,<09Eh,0E3h,000h,0C0h,04Fh,079h,073h,096h>}
IID_ISpVoice    GUID {06C44DF74h,072B9h,04992h,<0A1h,0ECh,0EFh,099h,06Eh,004h,022h,0D4h>}

MyText          dw 'Hello, this is a speech API sample.',0

.data?

pVoice          dd ?

.code

start:

    invoke  CoInitialize,0

    invoke  CoCreateInstance,ADDR CLSID_SpVoice,\
            NULL,CLSCTX_ALL,ADDR IID_ISpVoice,\
            ADDR pVoice

    coinvk  pVoice,ISpVoice,SetRate,<-2>
           
    coinvk  pVoice,ISpVoice,Speak,<OFFSET MyText>,\
            SPF_DEFAULT,NULL
   
    coinvk  pVoice,ISpVoice,Release

    invoke  CoUninitialize
    invoke  ExitProcess,0

END start

Vortex

Here is the 64-bit version.

ognil

;*******************************************************************; 
Hi Vortex,
Thank you for the interesting examples. :thumbsup: 
Unfortunately, I don't have enough time and desire to study the macros in your file "SAPISample64.inc" and therefore I haven't used it.
In addition, there are no explanations in both files, which further complicated it for me.
Also, there is no need to copy and include the vTable structure of the pVoice interface.
Therefore, I rewrote your example so that it is understandable even for beginners like me : :smiley:

; This example demonstrates how to use the CLSID_SpVoice and IID_ISpVoice GUIDs in MASM64
; to perform text-to-speech using the Microsoft Speech API.
; The key steps involve initializing COM, creating the SpVoice object, calling the Speak method,
; and properly releasing resources.
;***********************************************************************************************; 
include  \masm64\include64\masm64rt.inc

.data?
pVoice    dq ?                                    ; for interface pVoice

.data
CLSID_SpVoice   GUID {096749377h,03391h,011D2h,<09Eh,0E3h,000h,0C0h,04Fh,079h,073h,096h>}
IID_ISpVoice    GUID {06C44DF74h,072B9h,04992h,<0A1h,0ECh,0EFh,099h,06Eh,004h,022h,0D4h>}

szText  dw 'H','e','l','l','o',' ','t','h','i','s',' ','i','s',' '
    dw 'a',' ','s','p','e','e','c','h',' ','A','P','I',' '
    dw 's','a','m','p','l','e',' ','c','o','d','e','d','w','i','t','h'
    dw ' ','M','A','S','M','6','4','.',0,0
;***********************************************************************; 

.code
entry_point  proc
; CoInitialize(NULL) initializes the COM library on the current thread.
; This is necessary before using any COM objects.
             xor     ecx, ecx
             mov     edx, 2 or 4                 ; 4 = COINIT_DISABLE_OLE1DDE ;Disables DDE for OLE1 support.
             call    CoInitializeEx              ; 2 = COINIT_APARTMENTTHREADED

; The code checks if CoInitialize and CoCreateInstance succeed.
; If either fails, the program exits early.
             test    eax, eax                    ; Result  
             jne     @Ret                        ; Exit

; CoCreateInstance is used to create an instance of the SpVoice object.
; The CLSID_SpVoice is the class identifier for the SpVoice object,
; and IID_ISpVoice is the interface identifier for the ISpVoice interface.  
; The CLSCTX_ALL parameter specifies that the object can be created
; in any context (in-process, out-of-process, etc.).
; Creates an instance of the SpVoice object
            lea    rax, pVoice                    ; Result
            lea    r9,  IID_ISpVoice
            mov    [rsp+4*8], rax
            mov    r8d, CLSCTX_INPROC_SERVER      ; CLSCTX_INPROC_SERVER = 1
            xor    edx, edx
            lea    rcx, CLSID_SpVoice
            cal    CoCreateInstance
            test   eax, eax                       ; Result 
            jne    @Err                           ; Exit
;  Call ISpVoice::SetRate method to set the rate of voice
            mov    rcx,  pVoice                   ; rcx = interface pVoice
            mov    rdx,  -2                       ; rdx = Rate -> speak slowly
            mov    rax,  [rcx]                    ; rax = vTable of interface pVoice
            call   qword ptr[rax+ 28*8]           ; 28*8 is an offset of the address of the "SetRate"
                                                  ; method in the vTable of the pVoice interface
; ISpVoice::Speak: Calls the Speak method to read the string aloud.
            mov     rcx,  pVoice                  ; rcx = interface pVoice
            xor     r9d,  r9d       
            xor     r8d,  r8d                     ; 0 = SPF_DEFAULT   
            mov     rax,  [rcx]                   ; rax = vTable of interface pVoice
            lea     rdx,  szText
            call    qword ptr[rax+ 20*8]          ; 20*8 is an offset of the address of the "Speak"
                                                  ; method in the vTable of the pVoice interface
;  Release the ISpVoice object->pVoice
            mov     rcx,  pVoice                  ; rcx = interface pVoice
            mov     rax,  [rcx]                   ; rax = vTable of interface pVoice
            call    qword ptr[rax+2*8]            ; 2*8 is an offset of the address of the "Release"
@Err:                                             ; method in the vTable of the pVoice interface
;  Uninitialize COM Library     
            call    CoUninitialize
@Ret:
            xor     ecx, ecx
            call    ExitProcess
            ret
entry_point endp
end

"Not keeping emotions under control is another type of mental distortion."

NoCforMe

Assembly language programming should be fun. That's why I do it.

six_L

Using the macros(within UASM64) will become simpler.
COMINTERFACE IActiveSpeech

;CVIRTUAL QueryInterface,QWORD,riid:QWORD ,ppvObject:QWORD
;CVIRTUAL AddRef,QWORD
;CVIRTUAL Release,QWORD
CVIRTUAL SetNotifySink,QWORD ,pNotifySink:QWORD
CVIRTUAL SetNotifyWindowMessage,QWORD ,hWnd:HWND ,Msg:DWORD ,wParam:WPARAM ,lParam:LPARAM
CVIRTUAL SetNotifyCallbackFunction,QWORD ,pfnCallback:QWORD ,wParam:WPARAM ,lParam:LPARAM
CVIRTUAL SetNotifyCallbackInterface,QWORD ,pSpCallback:QWORD ,wParam:WPARAM ,lParam:LPARAM
CVIRTUAL SetNotifyWin32Event,QWORD
CVIRTUAL WaitForNotifyEvent,QWORD,dwMilliseconds:DWORD
CVIRTUAL GetNotifyEventHandle1,QWORD
CVIRTUAL SetInterest,QWORD,ullEventInterest:QWORD ,ullQueuedInterest:QWORD
CVIRTUAL GetEvents,QWORD,ulCount:DWORD ,pEventArray:QWORD ,pulFetched:QWORD
CVIRTUAL GetInfo,QWORD,pInfo:QWORD
CVIRTUAL SetOutput,QWORD,pUnkOutput:QWORD ,fAllowFormatChanges:DWORD
CVIRTUAL GetOutputObjectToken,QWORD,ppObjectToken:QWORD
CVIRTUAL GetOutputStream,QWORD,ppStream:QWORD
CVIRTUAL pause1,QWORD
CVIRTUAL Resume,QWORD
CVIRTUAL SetVoice,QWORD,pToken:QWORD
CVIRTUAL GetVoice,QWORD,ppToken:QWORD
CVIRTUAL Speak,QWORD,pwcs:QWORD ,dwFlags:DWORD ,pulStreamNumber:QWORD
CVIRTUAL SpeakStream,QWORD,pStream:QWORD ,dwFlags:DWORD ,pulStreamNumber:QWORD
CVIRTUAL GetStatus,QWORD,pStatus:QWORD ,ppszLastBookmark:QWORD
CVIRTUAL Skip,QWORD,pItemType:QWORD ,lNumItems:DWORD ,pulNumSkipped:QWORD
CVIRTUAL SetPriority,QWORD,pPriority:QWORD
CVIRTUAL GetPriority,QWORD,pePriority:QWORD
CVIRTUAL SetAlertBoundary,QWORD,pBoundary:QWORD
CVIRTUAL GetAlertBoundary,QWORD,peBoundary:QWORD
CVIRTUAL SetRate,QWORD,pRateAdjust:QWORD
CVIRTUAL GetRate,QWORD,pRateAdjust:QWORD
CVIRTUAL SetVolume,QWORD,pVolume:QWORD
CVIRTUAL GetVolume,QWORD,pusVolume:QWORD
CVIRTUAL WaitUntilDone,QWORD,msTimeout:WORD
CVIRTUAL SetSyncSpeakTimeout,QWORD,msTimeout:DWORD
CVIRTUAL GetSyncSpeakTimeout,QWORD,pmsTimeout:QWORD
CVIRTUAL SpeakCompleteEvent,QWORD
CVIRTUAL IsUISupported,QWORD ,pszTypeOfUI:QWORD ,pvExtraData:QWORD ,cbExtraData:DWORD ,pfSupported:QWORD
CVIRTUAL DisplayUI,QWORD ,hwndParent:HWND ,pszTitle:QWORD ,pszTypeOfUI:QWORD ,pvExtraData:QWORD ,cbExtraData:DWORD
ENDCOMINTERFACE
ISpVoice TYPEDEF PTR IActiveSpeech

.data
;CA141FD0-AC7F-11D1-97A3-006008273008 ;Adult Female #1, US English, L&H TruVoice
CLSID_SpVoice \
dd 096749377h
dw 03391h
dw 011D2h
db 09Eh,0E3h,000h,0C0h,04Fh,079h,073h,096h
IID_ISpVoice \
dd 06C44DF74h
dw 072B9h
dw 04992h
db 0A1h,0ECh,0EFh,099h,06Eh,004h,022h,0D4h
.code
;¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
WorkerThreadSpeak proc plBufSpeak:QWORD
local uEdbuffer[256]:BYTE
local pVoice:ISpVoice

invoke  CoInitialize,NULL
invoke  CoCreateInstance,addr CLSID_SpVoice, NULL, CLSCTX_ALL, addr IID_ISpVoice,ADDR pVoice

invoke RtlZeroMemory,ADDR uEdbuffer,sizeof uEdbuffer
invoke MultiByteToWideChar,CP_ACP,0,plBufSpeak,-1,addr uEdbuffer,256

_VINVOKE pVoice,IActiveSpeech,SetRate,-2
_VINVOKE pVoice,IActiveSpeech,Speak,addr uEdbuffer,0,NULL
_VINVOKE pVoice,IActiveSpeech,Release

invoke CoUninitialize

ret

WorkerThreadSpeak endp

invoke CreateThread,NULL,NULL,offset WorkerThreadSpeak,CStr("Here is the text to speak."),NULL,NULL
invoke CloseHandle,rax

Say you, Say me, Say the codes together for ever.

jj2007

Using Say:
include \Masm32\MasmBasic\MasmBasic.inc
  Init
  Say "Buongiorno, oggi è una bella giornata con tanto sole"
  Say "Hello World, how are you today?"
EndOfCode

The interesting bit is that on my Italian version of Windows 10 the "Buongiorno" sounds perfect; the "Hello World", however, comes over correctly but with a strong Italian accent, i.e. "Ello World" :greensml: