live-segment.c¶
This example runs a wake word recognizer on live audio, segments the speech following the wake word with a VAD, and then saves this audio snippet to a file.
Instructions¶
Build the sample code. In the same terminal window type the command after the %, then say "Voice Genie will it rain in Portland tomorrow?"
% ./bin/live-segment ../../model/tpl-spot-vad-3.10.0.snsr ../../model/spot-voicegenie-enUS-6.5.1-m.snsr
Say <trigger phrase> will it rain in Portland tomorrow?
Spotted "voicegenie", listening...
VAD detected speech from 3150 ms to 5055 ms.
Wrote recording to "vad-audio.wav".
vad-audio.wav contains the speech after the wake word.
Code¶
Available in this TrulyNatural SDK installation at ~/Sensory/TrulyNaturalSDK/7.6.1/sample/c/src/live-segment.c
live-segment.c
/* Sensory Confidential
* Copyright (C)2017-2025 Sensory, Inc. https://sensory.com/
*
* TrulyHandsfree SDK keyword spotter, runs trailing audio through
* a voice activity detector and saves it to file.
*------------------------------------------------------------------------------
*/
#include <snsr.h>
#include <stdlib.h>
/* Set INCLUDE_SPOT to 1 to include the trigger phrase in the audio output */
#define INCLUDE_SPOT 0
/* Output filename */
#define VAD_AUDIO_FILE "vad-audio.wav"
/* Print an error message and exit.
*/
static void
fatalError(int rc, const char *msg)
{
fprintf(stderr, "ERROR: %s\n", msg);
exit(rc);
}
/* Result callback function, see snsrSetHandler() in main() below.
*/
static SnsrRC
resultEvent(SnsrSession s, const char *key, void *privateData)
{
SnsrRC r;
const char *phrase;
r = snsrGetString(s, SNSR_RES_TEXT, &phrase);
if (r == SNSR_RC_OK) {
printf("Spotted \"%s\", listening...\n", phrase);
fflush(stdout);
}
return r;
}
/* VAD segmentation callback - speech endpoint detected
*/
static SnsrRC
endpointEvent(SnsrSession s, const char *key, void *privateData)
{
SnsrRC r;
double begin, end;
snsrGetDouble(s, SNSR_RES_BEGIN_MS, &begin);
r = snsrGetDouble(s, SNSR_RES_END_MS, &end);
if (r != SNSR_RC_OK) return r;
printf("VAD detected speech from %.0f ms to %.f ms.\n", begin, end);
return SNSR_RC_STOP;
}
int
main(int argc, char *argv[])
{
SnsrRC r;
SnsrSession s;
SnsrStream audio, out;
const char *spotModel, *tmplModel;
int testing = 0;
if (argc < 3 || argc > 4) {
fprintf(stderr, "usage: %s tmpl-vad-model spot-model [--test]\n", argv[0]);
exit(1);
}
tmplModel = argv[1];
spotModel = argv[2];
testing = argc == 4;
/* Create a new session handle. */
snsrNew(&s);
/* Load and validate the spotter-vad template task file. */
snsrLoad(s, snsrStreamFromFileName(tmplModel, "r"));
snsrRequire(s, SNSR_TASK_TYPE, SNSR_PHRASESPOT_VAD);
/* Load the spotter into template slot 0. */
snsrSetStream(s, SNSR_SLOT_0, snsrStreamFromFileName(spotModel, "r"));
/* If requested, include the trigger phrase in the audio output. */
snsrSetInt(s, SNSR_INCLUDE_LEADING_SILENCE, INCLUDE_SPOT);
/* Register VAD endpoint callbacks. */
snsrSetHandler(s, SNSR_END_EVENT, snsrCallback(endpointEvent, NULL, NULL));
snsrSetHandler(s, SNSR_LIMIT_EVENT, snsrCallback(endpointEvent, NULL, NULL));
/* Register a result callback. Private data handle is used as a flag. */
snsrSetHandler(s, SNSR_RESULT_EVENT, snsrCallback(resultEvent, NULL, NULL));
/* Create an audio stream instance and attach it to the session. */
if (testing) {
/* Read from stdin for testing. */
audio = snsrStreamFromFILE(stdin, SNSR_ST_MODE_READ);
/* Reduce the trailing silence time-out, as test recordings have less than
* 1000 ms of silence at the end */
snsrSetInt(s, SNSR_TRAILING_SILENCE, 500);
/* Reduce VAD margins to the absolute minimum for testing only. This could
* lead to small portions of the beginning and end of the audio being lost.
* The recommendation is to use default values for production code.
*/
snsrSetInt(s, SNSR_BACKOFF, 0);
snsrSetInt(s, SNSR_HOLD_OVER, 0);
} else {
/* live audio */
audio = snsrStreamFromAudioDevice(SNSR_ST_AF_DEFAULT);
}
snsrSetStream(s, SNSR_SOURCE_AUDIO_PCM, audio);
/* Set up the output stream. Speech-detected audio will be written to
* this file. */
out = snsrStreamFromFileName(VAD_AUDIO_FILE, "w");
out = snsrStreamFromAudioStream(out, SNSR_ST_AF_DEFAULT);
snsrSetStream(s, SNSR_SINK_AUDIO_PCM, out);
printf("Say <trigger phrase> will it rain in Portland tomorrow?\n");
/* Main recognition loop. The endpoint handler will cause snsrRun() to
* return SNSR_RC_STOP. Other return codes indicate an unexpected error.
* Session errors remain until explicitly cleared: Any errors that occured
* earlier will also be reported here.
*/
r = snsrRun(s);
if (r != SNSR_RC_STOP) fatalError(r, snsrErrorDetail(s));
snsrRelease(s);
printf("Wrote recording to \"%s\".\n", VAD_AUDIO_FILE);
return 0;
}