Triggering code from sound input: how to?

dedndave · January 08, 2013, 01:45:58 AM

hi Marinus - good to see you :t

Siekmanski · January 08, 2013, 02:47:19 AM

Hi Dave

I'm still alive :t
Little free time for programming :(

Gunther · January 08, 2013, 06:20:44 AM

Hi Marinus,

good to see you again. I hope you're doing well.

Gunther

Donkey · January 08, 2013, 02:05:31 PM

Been playing around with getting sound from the PC microphone and though I am getting data I have no idea at all how to interpret it, here's the code:

Code Select

DATA SECTION
	caps		WAVEINCAPSA <>
	pBuffer		PTR ?

CODE SECTION

START:
	invoke GetMicrophone,0
	test eax,eax
	js >
	invoke GetMicInput,eax,0
	:
	invoke ExitProcess,0

GetMicrophone FRAME Shiss
	uses ebx,edi,esi
	invoke waveInGetNumDevs
	mov ebx,eax
	xor esi,esi
	mov edi,-1
	:
		invoke waveInGetDevCaps,esi,offset caps,SIZEOF WAVEINCAPSA
		mov eax,offset caps.szPname + 10
		mov B[eax],0
		invoke lstrcmpi,offset caps.szPname,"microphone"
		test eax,eax
		jnz >
			mov edi,esi
			jmp >>.EXIT
		:
		inc esi
		cmp esi,ebx
		jl <
	.EXIT
	mov eax,edi
	ret
endf

GetMicInput FRAME uID,pSound
	uses edi,esi,ebx
	LOCAL hWAV:%HANDLE
	LOCAL wfx:WAVEFORMATEX
	LOCAL whdr:WAVEHDR

	mov D[hWAV],-1

	mov W[wfx.wFormatTag],WAVE_FORMAT_PCM
	mov W[wfx.nChannels],2
	mov D[wfx.nSamplesPerSec],8000
	mov W[wfx.nBlockAlign],2
	mov D[wfx.nAvgBytesPerSec],16000
	mov W[wfx.wBitsPerSample],8
	mov W[wfx.cbSize],0

	invoke waveInOpen,offset hWAV,[uID],offset wfx,offset MonitorMicrophone,0,CALLBACK_FUNCTION

	// Create a buffer (8MB)
	
	invoke GlobalAlloc,GMEM_FIXED | GMEM_ZEROINIT,8*1024*1024
	mov [pBuffer],eax

	mov [whdr.lpData],eax
	mov D[whdr.dwBufferLength],8*1024*1024
	mov D[whdr.dwBytesRecorded],0
	mov D[whdr.dwUser],0
	mov D[whdr.dwFlags],0
	mov D[whdr.dwLoops],0
	mov D[whdr.lpNext],0
	mov D[whdr.reserved],0

	invoke waveInPrepareHeader,[hWAV],offset whdr,SIZEOF WAVEHDR
	invoke waveInAddBuffer, [hWAV],offset whdr,SIZEOF WAVEHDR
	
	invoke waveInStart,[hWAV]

	invoke Sleep,1000
	
	invoke waveInStop,[hWAV]
	
	invoke waveInClose,[hWAV]

	invoke GlobalFree,[pBuffer]

	ret
endf

MonitorMicrophone FRAME hwi, msg, dwInstance, dwParam1, dwParam2

	cmp D[msg],WIM_CLOSE
	jne >
		ret

	:
	cmp D[msg],WIM_DATA
	jne >>
		mov eax,[dwParam1]
		mov ecx,[eax+WAVEHDR.dwBytesRecorded]
		mov eax,[eax+WAVEHDR.lpData]

		ret
	
	:
	cmp D[msg],WIM_OPEN
	jne >
		ret
	
	:
	ret
endf

Everything returns without error and I have 15830 bytes recorded, there is definitely data written to the buffer but I don't know enough about WAV data to know how to use or interpret the data.

Edgar

dedndave · January 08, 2013, 02:10:01 PM

you could probably store the data as a wav file and play it back with wmp to test it

Donkey · January 08, 2013, 02:34:57 PM

Quote from: dedndave on January 08, 2013, 02:10:01 PM
you could probably store the data as a wav file and play it back with wmp to test it

There doesn't seem to be a RIFF header, only raw data. I may try to prepend a RIFF header to the data as well as the fmt and data chunk information and try to play it using PlaySound with SND_MEMORY | SND_NODEFAULT. But that's for another day.

Donkey · January 08, 2013, 03:19:39 PM

Ok, this bothered me too much to leave it till tomorrow. It was the fact that there was no RIFF header. The following code builds the header and plays back what the microphone recorded.

The RIFF header takes the format:

Code Select

DATA SECTION
	caps		WAVEINCAPSA <>
	pBuffer		PTR ?

CODE SECTION

START:
	invoke GetMicrophone,0
	test eax,eax
	js >
	invoke GetMicInput,eax,0
	:
	invoke ExitProcess,0

GetMicrophone FRAME Shiss
	uses ebx,edi,esi
	invoke waveInGetNumDevs
	mov ebx,eax
	xor esi,esi
	mov edi,-1
	:
		invoke waveInGetDevCaps,esi,offset caps,SIZEOF WAVEINCAPSA
		mov eax,offset caps.szPname + 10
		mov B[eax],0
		invoke lstrcmpi,offset caps.szPname,"microphone"
		test eax,eax
		jnz >
			mov edi,esi
			jmp >>.EXIT
		:
		inc esi
		cmp esi,ebx
		jl <
	.EXIT
	mov eax,edi
	ret
endf

GetMicInput FRAME uID,pSound
	uses edi,esi,ebx
	LOCAL hWAV:%HANDLE
	LOCAL wfx:WAVEFORMATEX
	LOCAL whdr:WAVEHDR

	mov D[hWAV],-1

	mov W[wfx.wFormatTag],WAVE_FORMAT_PCM
	mov W[wfx.nChannels],2
	mov D[wfx.nSamplesPerSec],8000
	mov W[wfx.nBlockAlign],2
	mov D[wfx.nAvgBytesPerSec],16000
	mov W[wfx.wBitsPerSample],8
	mov W[wfx.cbSize],0

	invoke waveInOpen,offset hWAV,[uID],offset wfx,offset MonitorMicrophone,0,CALLBACK_FUNCTION

	// Create a buffer (8MB) + RIFF header
	
	invoke GlobalAlloc,GMEM_FIXED | GMEM_ZEROINIT,8*1024*1024 + 44
	mov [pBuffer],eax

	add eax,44 // leave space for the WAV header
	mov [whdr.lpData],eax

	mov D[whdr.dwBufferLength],8*1024*1024
	mov D[whdr.dwBytesRecorded],0
	mov D[whdr.dwUser],0
	mov D[whdr.dwFlags],0
	mov D[whdr.dwLoops],0
	mov D[whdr.lpNext],0
	mov D[whdr.reserved],0

	invoke waveInPrepareHeader,[hWAV],offset whdr,SIZEOF WAVEHDR
	invoke waveInAddBuffer, [hWAV],offset whdr,SIZEOF WAVEHDR

	
	invoke waveInStart,[hWAV]

	invoke Sleep,1000
	
	invoke waveInStop,[hWAV]
	
	invoke waveInClose,[hWAV]

	invoke GlobalFree,[pBuffer]

	ret
endf

MonitorMicrophone FRAME hwi, msg, dwInstance, dwParam1, dwParam2

	cmp D[msg],WIM_CLOSE
	jne >
		ret

	:
	cmp D[msg],WIM_DATA
	jne >>
		mov eax,[dwParam1]
		mov ecx,[eax+WAVEHDR.dwBytesRecorded]
		mov eax,[eax+WAVEHDR.lpData]

		invoke BuildWAVFormat,[pBuffer],ecx

		ret
	
	:
	cmp D[msg],WIM_OPEN
	jne >
		ret
	
	:
	ret
endf

BuildWAVFormat FRAME pData,dwDataSize
	uses ebx,esi,edi

	mov ebx,[pData]
	mov D[ebx],"RIFF"
	mov eax,[dwDataSize]
	add eax,36
	mov [ebx+4],eax
	mov D[ebx+8],"WAVE"
	mov D[ebx+12],"fmt "
	mov W[ebx+16],16
	mov W[ebx+20],1
	mov W[ebx+22],2
	mov D[ebx+24],8000
	mov D[ebx+28],16000
	mov W[ebx+32],2
	mov W[ebx+34],8
	mov D[ebx+36],"data"
	mov eax,[dwDataSize]
	mov [ebx+40],eax

	invoke PlaySound,[pData],NULL,SND_MEMORY | SND_NODEFAULT

	ret
endf

Just to let you know, I screwed around with this for 20 minutes until I figured out I had my mic muted :)

dedndave · January 08, 2013, 03:51:45 PM

very cool, Edgar :t

the microphone thing - not hard to do - lol

Donkey · January 08, 2013, 06:18:12 PM

Quote from: dedndave on January 08, 2013, 03:51:45 PM
very cool, Edgar :t

the microphone thing - not hard to do - lol

Thanks Dave,

With a bit of work it can easily be turned into an application that will record data from a microphone and save it to a WAV file, a neat application if anyone has a use for it. Also I'm trying to figure out how to parse the raw WAV data to do an FFT on it so I can spot specific frequencies but I'm not really that good with that sort of stuff. I figure it would pretty much fit the bill for the thread subject if I could isolate a specific frequency and trigger an event based on it.

dedndave · January 09, 2013, 12:43:48 AM

i don't remember much about it, other than everything in a RIFF file is stored in "chunks"
i think the data is some form of pulse-code modulation
the header seems to outline the parameters

Siekmanski · January 09, 2013, 01:00:17 AM

Hi Donkey

The Goertzel algorithm is perfect and fast to get a specific frequency.

Goertzel algorithm:

Code Select


samples      = range -1.0 to 1.0 (floating point) 
N            = number of samples to process 
Pi           = 3.141592653589793238
frequency    = frequency to look for 
samplerate   = sample rate of the data 

 
; Precalculate coeff(s)

       coeff = Cos((Pi * 2.0 / N) * (frequency / samplerate * N + 0.5) * 2.0)
 
; Processing loop
       q1,q2,i == 0

loop:  q0 = (coeff * q1) - q2 + samples[i] 
       q2 = q1
       q1 = q0 
       i   = i + 1

      goto loop until i == N 

      Magnitude = Sqrt((q1 * q1) + (q2 * q2) - (q1 * q2 * coeff) / N * 2)

Siekmanski · January 09, 2013, 01:37:54 AM

Hi Donkey

16 bit WAV samples are signed data
8 bit WAV samples are unsigned so you have to convert them to signed data

xor eax,eax
mov al,255
xor eax,10000000b

al is now 127

Siekmanski · January 09, 2013, 02:27:40 AM

Some old test pieces with Goertzel in action.

Donkey · January 09, 2013, 02:44:18 AM

Quote from: Siekmanski on January 09, 2013, 01:37:54 AM
Hi Donkey

16 bit WAV samples are signed data
8 bit WAV samples are unsigned so you have to convert them to signed data

xor eax,eax
mov al,255
xor eax,10000000b

al is now 127

Thanks , since I'm building the WAV files I can just build them as 16 bit, easier that way. I'll take a look at Goertzel, thanks for the code, I'm missing enough hair as it is and didn't look forward to pulling any more out

dicky96 · January 09, 2013, 09:58:53 AM

Actually I have decided to try the external hardware method putting logic 1 onto input pins of the parallel port when there is a beat

This is not to discredit other suggestions, it is more about what I understand best (hardware) and the time taken for me to develop a solution

If I use windows sleep function to activate my code around every 80mS I doubt that the delay in picking up a beat would be noticable as i am not stepping my display once per beat, i'm just changing the sequence direction/colour/etc on each beat

Also as was suggested, I may as well put win98 on this PC as it is gonna be dedicated to this task (as I mentioned I have plenty of old PC hardware around) - whether I can find a copy of win98 is another matter lol! Why dont microsoft just give away old obselete OS for free as they have no more use for them?

In the case of win98 can I just read/write directly to the parallel port data/control registers at 378h and 37ah?

And does the sleep function work just the same?

cheers
Rich

The MASM Forum

News:

Triggering code from sound input: how to?

dedndave

Siekmanski

Gunther

Donkey

dedndave

Donkey

Donkey

dedndave

Donkey

dedndave

Siekmanski

Siekmanski

Siekmanski

Donkey

dicky96