Skip to content

Commit

Permalink
SAPI Bookmarks added
Browse files Browse the repository at this point in the history
SAPI bookmarks are now supported (allows e.g. JAWS to continually read
word document)
Avoid hts_engine crash when empty labels array is provided (previously
there were always at least "sil" phones, with the new option to turn
them off, this might happen sometimes.
  • Loading branch information
m-toman committed Dec 18, 2014
1 parent 8320c98 commit e92d069
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 44 deletions.
1 change: 0 additions & 1 deletion engine/manager/include/Text.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ class TextFragment {
std::string text; ///< actual text

public:
// TextFragment( const std::string& text, const VoiceDataPtr& voiceData );
TextFragment(const std::string& text, const FragmentPropertiesPtr& properties);

const std::string& GetText() const {
Expand Down
46 changes: 25 additions & 21 deletions engine/manager/src/HTSEngineSynthesizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ HTSEngineSynthesizer::~HTSEngineSynthesizer() {
**/
TTSResultPtr HTSEngineSynthesizer::SynthesizeLabels(const FragmentPropertiesPtr& properties, const LabelsPtr& labels) {

TTSResultPtr result(new TTSResult());

//- no model given?
if (properties->find(PROPERTY_KEY_VOICE_PATH) == properties->end()) {
throw PropertyMissingException(PROPERTY_KEY_VOICE_PATH);
Expand Down Expand Up @@ -92,36 +94,38 @@ TTSResultPtr HTSEngineSynthesizer::SynthesizeLabels(const FragmentPropertiesPtr&
}

//- synthesize
//- this is a big ugly hack as we need char** instead of std::string
//- we currently use an array of pointers to the internal c_str of the label data.
//- this is faster but also unsafer than copying it.
char** labelData = new char* [ labels->size() ];
std::vector<LabelPtr>::iterator it = labels->begin();
for (int i = 0; it != labels->end(); ++it, ++i) {
LabelPtr lab = *it;
labelData[i] = const_cast<char*>(lab->GetAsHTKLabel().c_str());
}

HTS_Engine_synthesize_from_strings(&engine, labelData, labels->size());
delete[] labelData;
if (labels->size() > 0) {
//- this is a big ugly hack as we need char** instead of std::string
//- we currently use an array of pointers to the internal c_str of the label data.
//- this is faster but also unsafer than copying it.
char** labelData = new char* [ labels->size() ];
std::vector<LabelPtr>::iterator it = labels->begin();
for (int i = 0; it != labels->end(); ++it, ++i) {
LabelPtr lab = *it;
labelData[i] = const_cast<char*>(lab->GetAsHTKLabel().c_str());
}

//- store result data
TTSResultPtr result(new TTSResult());
int num_samples = HTS_Engine_get_nsamples(&engine);
result->GetFrames().reserve(num_samples);
HTS_Engine_synthesize_from_strings(&engine, labelData, labels->size());

for (int i = 0; i < num_samples; ++i) {
result->GetFrames().push_back(HTS_Engine_get_generated_speech(&engine, i));
//- store result data
int num_samples = HTS_Engine_get_nsamples(&engine);
result->GetFrames().reserve(num_samples);

for (int i = 0; i < num_samples; ++i) {
result->GetFrames().push_back(HTS_Engine_get_generated_speech(&engine, i));
}
delete[] labelData;

//- store labels
// we could add time alignment information to labels here
result->GetLabels().insert(result->GetLabels().begin(), labels->begin(), labels->end());
}

//- store meta information
// additional meta information in TTS result can be added here.
result->SetSamplingRate(HTS_Engine_get_sampling_frequency(&engine));

//- store labels
// we could add time alignment information to labels here
result->GetLabels().insert(result->GetLabels().begin(), labels->begin(), labels->end());

return result;
}

Expand Down
9 changes: 0 additions & 9 deletions engine/manager/src/Text.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,6 @@ Text::~Text() {
}


/**
* TextFragment
**/
/*
TextFragment::TextFragment( const std::string& text ) {
this->text = text;
this->properties = FragmentPropertiesPtr(new FragmentProperties());
}*/

/**
* TextFragment
**/
Expand Down
57 changes: 44 additions & 13 deletions sapi/htstts.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
<// htstts.cpp : Implementation of HTSTTS
// htstts.cpp : Implementation of HTSTTS

#include "stdafx.h"
#include "htstts.h"


#include "TTSManager.h"
#include "TTSLogger.h"

Expand All @@ -14,10 +13,22 @@ static double ConvertSapiRate(int r);

static TTSManager ttsManager;

#define SAPI_PROPERTY_BOOKMARK "SAPIROPBKMK"

/******************************************************************************
* Subclass of TextFragment to add some SAPI specific properties.
******************************************************************************/
class SAPITextFragment : public TextFragment {
public:
std::string bookmark;
};

/******************************************************************************
* HTSTTS Constructor
******************************************************************************/
HTSTTS::HTSTTS() : voiceProperties(new FragmentProperties()) {
(*voiceProperties)[PROPERTY_KEY_FRAGMENT_NOSIL_END] = PROPERTY_VALUE_TRUE;
(*voiceProperties)[PROPERTY_KEY_FRAGMENT_NOSIL_BEGIN] = PROPERTY_VALUE_TRUE;
ResetActions();
}

Expand Down Expand Up @@ -164,6 +175,8 @@ HTSTTS::Speak(DWORD dwSpeakFlags,
ULONG charsLeft = curr_frag->ulTextLen;
ULONG currlen;
ULONG FRAG_SIZE = 500;

// if this fragment has some changed properties for volume/pitch/etc., change the properties, else use voice properties
FragmentPropertiesPtr props = AdjustProperties(&(curr_frag->State), this->voiceProperties);

switch (curr_frag->State.eAction) {
Expand All @@ -172,8 +185,12 @@ HTSTTS::Speak(DWORD dwSpeakFlags,
LOG_DEBUG("[Speak] Should spell out something");
case SPVA_Pronounce:
LOG_DEBUG("[Speak] Should pronounce something");
case SPVA_Bookmark:
// bookmarks are treated like normal text fragments,
// but will set a special property
LOG_DEBUG("[Speak] Set a bookmark here");
(*props)[SAPI_PROPERTY_BOOKMARK] = std::string((char*)currStart, charsLeft * sizeof(wchar_t)) + std::string("\0");
case SPVA_Speak:

LOG_DEBUG("[Speak] Converting text");

if (curr_frag->ulTextLen == 0) {
Expand All @@ -198,8 +215,6 @@ HTSTTS::Speak(DWORD dwSpeakFlags,
WideCharToMultiByte(CP_UTF8, 0, currStart, currlen, tmptext, len, NULL, NULL);
tmptext[len] = 0;

//TODO: copy and modify properties if this fragment has specific needs

LOG_DEBUG("[Speak] Text = " << tmptext);
fullText.push_back(TextFragmentPtr(new TextFragment(tmptext, props)));

Expand All @@ -215,8 +230,6 @@ HTSTTS::Speak(DWORD dwSpeakFlags,
case SPVA_Silence:
LOG_DEBUG("[Speak] Should do silence");
break;
case SPVA_Bookmark:
break;
default:
break;
}
Expand All @@ -235,11 +248,23 @@ HTSTTS::Speak(DWORD dwSpeakFlags,
break;
}

// is this actually a bookmark?
//TODO: only send if interest is there
if (tf->GetProperties()->find(SAPI_PROPERTY_BOOKMARK) != tf->GetProperties()->end()) {
SPEVENT evt;
evt.eEventId = SPEI_TTS_BOOKMARK;
evt.elParamType = SPET_LPARAM_IS_STRING;
evt.ullAudioStreamOffset = 0L;
evt.wParam = atol(tf->GetText().c_str());
evt.lParam = (LPARAM)((*tf->GetProperties())[SAPI_PROPERTY_BOOKMARK].c_str());
(strdup(tf->GetText().c_str()));
pOutputSite->AddEvents(&evt, 1);
continue;
}

LOG_DEBUG("[Speak] Synthesize text fragment");

try {
//TODO: modify volume and speaking rate according to live action events for future fragments
// combine it with values from SAPI XML
TTSResultPtr result = ttsManager.SynthesizeTextFragment(tf);

//TODO: is result->GetSamplingRate() correct?
Expand Down Expand Up @@ -349,18 +374,14 @@ void HTSTTS::HandleActions(ISpTTSEngineSite* site) {
//- change base speaking rate action
if (actions & SPVES_RATE) {
long adj;

site->GetRate(&adj);

ttsManager.SetBaseSpeakingRate(ConvertSapiRate(adj));
}

//- change base volume
if (actions & SPVES_VOLUME) {
USHORT adj;

site->GetVolume(&adj);

ttsManager.SetBaseVolume((int)adj);
}
}
Expand All @@ -378,6 +399,14 @@ FragmentPropertiesPtr HTSTTS::AdjustProperties(const SPVSTATE* state, FragmentPr
//TODO: if( state.EmphAdj )
//TODO: if( state.PitchAdj.MiddleAdj )

// bookmark always needs special properties.
if (state->eAction == SPVA_Bookmark) {
if (newProps == NULL) {
newProps = new FragmentProperties(*props);
}
//(*newProps)[SAPI_PROPERTY_BOOKMARK] = PROPERTY_VALUE_TRUE;
}

//- speaking rate changed for this fragment?
if (state->RateAdj) {
std::stringstream ss;
Expand All @@ -403,6 +432,8 @@ FragmentPropertiesPtr HTSTTS::AdjustProperties(const SPVSTATE* state, FragmentPr
//- return either the new properties
//- or if nothing has changed, the old ones
if (newProps) {
(*newProps)[PROPERTY_KEY_FRAGMENT_NOSIL_END] = PROPERTY_VALUE_TRUE;
(*newProps)[PROPERTY_KEY_FRAGMENT_NOSIL_BEGIN] = PROPERTY_VALUE_TRUE;
return FragmentPropertiesPtr(newProps);
}
else {
Expand Down

0 comments on commit e92d069

Please sign in to comment.