Logo Search packages:      
Sourcecode: pwlib version File versions

ptts.cxx

/*
 * ptts.cxx
 *
 * Text To Speech classes
 *
 * Portable Windows Library
 *
 * Copyright (c) 2002 Equivalence Pty. Ltd.
 *
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.0 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is Portable Windows Library.
 *
 * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
 *
 * Contributor(s): ______________________________________.
 *
 * $Log: ptts.cxx,v $
 * Revision 1.15  2004/10/23 10:56:15  ykiryanov
 * Added ifdef _WIN32_WCE for PocketPC 2003 SDK port
 *
 * Revision 1.14  2004/07/12 09:17:20  csoutheren
 * Fixed warnings and errors under Linux
 *
 * Revision 1.13  2004/07/06 10:12:54  csoutheren
 * Added static integer o factory template to assist in ensuring factories are instantiated
 *
 * Revision 1.12  2004/06/30 12:17:05  rjongbloed
 * Rewrite of plug in system to use single global variable for all factories to avoid all sorts
 *   of issues with startup orders and Windows DLL multiple instances.
 *
 * Revision 1.11  2004/06/19 09:02:32  csoutheren
 * Added rate and volume to keep Festival happy
 *
 * Revision 1.10  2004/06/19 07:18:59  csoutheren
 * Change TTS engine registration to use abstract factory code
 *
 * Revision 1.9  2004/04/09 06:52:17  rjongbloed
 * Removed #pargma linker command for /delayload of DLL as documentations sais that
 *   you cannot do this.
 *
 * Revision 1.8  2004/02/23 23:52:20  csoutheren
 * Added pragmas to avoid every Windows application needing to include libs explicitly
 *
 * Revision 1.7  2003/10/30 11:28:25  rjongbloed
 * Added dircetory name for Speech API under Windows.
 *
 * Revision 1.6  2003/09/17 06:02:13  csoutheren
 * Fixed windows header file problem caused by removing recursive headers
 *
 * Revision 1.5  2003/04/17 01:21:33  craigs
 * Added import of pybuildopts to correctly detect if P_SAPI is set
 *
 * Revision 1.4  2003/04/16 08:00:19  robertj
 * Windoes psuedo autoconf support
 *
 * Revision 1.3  2002/11/06 22:47:25  robertj
 * Fixed header comment (copyright etc)
 *
 * Revision 1.2  2002/08/14 15:18:25  craigs
 * Fixed Festval implementation
 *
 * Revision 1.1  2002/08/06 04:45:58  craigs
 * Initial version
 *
 */

#ifdef __GNUC__
#pragma implementation "ptts.h"
#endif

#include "ptbuildopts.h"

////////////////////////////////////////////////////////////

// WIN32 COM stuff must be first in file to compile properly

#if P_SAPI

#if defined(P_SAPI_LIBRARY)
#pragma comment(lib, P_SAPI_LIBRARY)
#endif

#ifndef _WIN32_DCOM
#define _WIN32_DCOM 1
#endif

#include <objbase.h>
#include <atlbase.h>
#include <objbase.h>
#include <windows.h>
#include <windowsx.h>
#include <sphelper.h>

#endif

////////////////////////////////////////////////////////////

// this disables the winsock2 stuff in the Windows contain.h, to avoid header file problems
#define P_KNOCKOUT_WINSOCK2

#define P_DISABLE_FACTORY_INSTANCES

#include <ptlib.h>
#include <ptlib/pipechan.h>
#include <ptclib/ptts.h>

#ifdef _WIN32

PINSTANTIATE_FACTORY(PTextToSpeech, PString)

#endif

////////////////////////////////////////////////////////////
//
// Text to speech using Microsoft's Speech API (SAPI)
// Can be downloaded from http://www.microsoft.com/speech/download/sdk51
//

#if P_SAPI

#define MAX_FN_SIZE 1024

class PTextToSpeech_SAPI : public PTextToSpeech
{
  PCLASSINFO(PTextToSpeech_SAPI, PTextToSpeech);
  public:
    PTextToSpeech_SAPI();
    ~PTextToSpeech_SAPI();

    // overrides
    PStringArray GetVoiceList();
    BOOL SetVoice(const PString & voice);

    BOOL SetRate(unsigned rate);
    unsigned GetRate();

    BOOL SetVolume(unsigned volume);
    unsigned GetVolume();

    BOOL OpenFile   (const PFilePath & fn);
    BOOL OpenChannel(PChannel * channel);
    BOOL IsOpen()     { return opened; }

    BOOL Close      ();
    BOOL Speak      (const PString & str, TextType hint);

  protected:
    BOOL OpenVoice();

    static PMutex refMutex;
    static int * refCount;

    PMutex mutex;
    CComPtr<ISpVoice> m_cpVoice;
    CComPtr<ISpStream> cpWavStream;
    BOOL opened;
    BOOL usingFile;
    unsigned rate, volume;
    PString voice;
};

PFactory<PTextToSpeech>::Worker<PTextToSpeech_SAPI> sapiTTSFactory("Microsoft SAPI", false);

int * PTextToSpeech_SAPI::refCount;
PMutex PTextToSpeech_SAPI::refMutex;


PTextToSpeech_SAPI::PTextToSpeech_SAPI()
{
  PWaitAndSignal m(refMutex);

  if (refCount == NULL) {
    refCount = new int;
    *refCount = 1;
    ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
  } else {
    (*refCount)++;
  }

  usingFile = opened = FALSE;
}


PTextToSpeech_SAPI::~PTextToSpeech_SAPI()
{
  PWaitAndSignal m(refMutex);

  if ((--(*refCount)) == 0) {
    ::CoUninitialize();
    delete refCount;
    refCount = NULL;
  }
}

BOOL PTextToSpeech_SAPI::OpenVoice()
{
  PWaitAndSignal m(mutex);

  HRESULT hr = m_cpVoice.CoCreateInstance(CLSID_SpVoice);
  return (opened = SUCCEEDED(hr));
}

BOOL PTextToSpeech_SAPI::OpenChannel(PChannel *)
{
  PWaitAndSignal m(mutex);

  Close();
  usingFile = FALSE;
  return (opened = FALSE);
}


BOOL PTextToSpeech_SAPI::OpenFile(const PFilePath & fn)
{
  PWaitAndSignal m(mutex);

  Close();
  usingFile = TRUE;

  if (!OpenVoice())
    return FALSE;

  CSpStreamFormat wavFormat;
  wavFormat.AssignFormat(SPSF_8kHz16BitMono);

  WCHAR szwWavFileName[MAX_FN_SIZE] = L"";;

  USES_CONVERSION;
  wcscpy(szwWavFileName, T2W((const char *)fn));
  HRESULT hr = SPBindToFile(szwWavFileName, SPFM_CREATE_ALWAYS, &cpWavStream, &wavFormat.FormatId(), wavFormat.WaveFormatExPtr()); 

  if (!SUCCEEDED(hr)) {
    cpWavStream.Release();
    return FALSE;
  }

      hr = m_cpVoice->SetOutput(cpWavStream, TRUE);

  return (opened = SUCCEEDED(hr));
}

BOOL PTextToSpeech_SAPI::Close()
{
  PWaitAndSignal m(mutex);

  if (!opened)
    return TRUE;

  if (usingFile) {
    if (opened)
      m_cpVoice->WaitUntilDone(INFINITE);
    cpWavStream.Release();
  }

  if (opened)
    m_cpVoice.Release();

  opened = FALSE;

  return TRUE;
}


BOOL PTextToSpeech_SAPI::Speak(const PString & otext, TextType hint)
{
  PWaitAndSignal m(mutex);

  if (!IsOpen())
    return FALSE;

  PString text = otext;

  // do various things to the string, depending upon the hint
  switch (hint) {
    case Digits:
      {
      }
      break;

    default:
    ;
  };

  // quick hack to calculate length of Unicode string
  unsigned short * uStr = new unsigned short[text.GetLength()+1];

  USES_CONVERSION;
  wcscpy(uStr, T2W((const char *)text));

  HRESULT hr = m_cpVoice->Speak(uStr, SPF_DEFAULT, NULL);

  delete[] uStr;

  return SUCCEEDED(hr);
}

PStringArray PTextToSpeech_SAPI::GetVoiceList()
{
  PWaitAndSignal m(mutex);

  PStringArray voiceList;

  CComPtr<ISpObjectToken> cpVoiceToken;
  CComPtr<IEnumSpObjectTokens> cpEnum;
  ULONG ulCount = 0;

  //Enumerate the available voices 
  HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);

  // Get the number of voices
  if (SUCCEEDED(hr))
    hr = cpEnum->GetCount(&ulCount);

  // Obtain a list of available voice tokens, set the voice to the token, and call Speak
  while (SUCCEEDED(hr) && ulCount--) {

    cpVoiceToken.Release();

    if (SUCCEEDED(hr))
      hr = cpEnum->Next(1, &cpVoiceToken, NULL );

    if (SUCCEEDED(hr)) {
      voiceList.AppendString("voice");
    }
  } 

  return voiceList;
}

BOOL PTextToSpeech_SAPI::SetVoice(const PString & v)
{
  PWaitAndSignal m(mutex);
  voice = v;
  return TRUE;
}

BOOL PTextToSpeech_SAPI::SetRate(unsigned v)
{
  rate = v;
  return TRUE;
}

unsigned PTextToSpeech_SAPI::GetRate()
{
  return rate;
}

BOOL PTextToSpeech_SAPI::SetVolume(unsigned v)
{
  volume = v;
  return TRUE;
}

unsigned PTextToSpeech_SAPI::GetVolume()
{
  return volume;
}

#endif
// P_SAPI

////////////////////////////////////////////////////////////
//
//  Generic text to speech using Festival
//

class PTextToSpeech_Festival : public PTextToSpeech
{
  PCLASSINFO(PTextToSpeech_Festival, PTextToSpeech);
  public:
    PTextToSpeech_Festival();
    ~PTextToSpeech_Festival();

    // overrides
    PStringArray GetVoiceList();
    BOOL SetVoice(const PString & voice);

    BOOL SetRate(unsigned rate);
    unsigned GetRate();

    BOOL SetVolume(unsigned volume);
    unsigned GetVolume();

    BOOL OpenFile   (const PFilePath & fn);
    BOOL OpenChannel(PChannel * channel);
    BOOL IsOpen()    { return opened; }

    BOOL Close      ();
    BOOL Speak      (const PString & str, TextType hint);

  protected:
    BOOL Invoke(const PString & str, const PFilePath & fn);

    PMutex mutex;
    BOOL opened;
    BOOL usingFile;
    PString text;
    PFilePath path;
    unsigned volume, rate;
    PString voice;
};

PFactory<PTextToSpeech>::Worker<PTextToSpeech_Festival> festivalTTSFactory("Festival", false);

PTextToSpeech_Festival::PTextToSpeech_Festival()
{
  PWaitAndSignal m(mutex);
  usingFile = opened = FALSE;
  rate = 8000;
  volume = 100;
}


PTextToSpeech_Festival::~PTextToSpeech_Festival()
{
  PWaitAndSignal m(mutex);
}

BOOL PTextToSpeech_Festival::OpenChannel(PChannel *)
{
  PWaitAndSignal m(mutex);

  Close();
  usingFile = FALSE;
  opened = FALSE;

  return TRUE;
}


BOOL PTextToSpeech_Festival::OpenFile(const PFilePath & fn)
{
  PWaitAndSignal m(mutex);

  Close();
  usingFile = TRUE;
  path = fn;
  opened = TRUE;

  PTRACE(3, "TTS\tWriting speech to " << fn);

  return TRUE;
}

BOOL PTextToSpeech_Festival::Close()
{
  PWaitAndSignal m(mutex);

  if (!opened)
    return TRUE;

  BOOL stat = FALSE;

  if (usingFile)
    stat = Invoke(text, path);

  text = PString();

  opened = FALSE;

  return stat;
}


BOOL PTextToSpeech_Festival::Speak(const PString & ostr, TextType hint)
{
  PWaitAndSignal m(mutex);

  if (!IsOpen()) {
    PTRACE(3, "TTS\tAttempt to speak whilst engine not open");
    return FALSE;
  }

  PString str = ostr;

  // do various things to the string, depending upon the hint
  switch (hint) {
    case Digits:
    default:
    ;
  };

  if (usingFile) {
    PTRACE(3, "TTS\tSpeaking " << ostr);
    text = text & str;
    return TRUE;
  }

  PTRACE(3, "TTS\tStream mode not supported for Festival");

  return FALSE;
}

PStringArray PTextToSpeech_Festival::GetVoiceList()
{
  PWaitAndSignal m(mutex);

  PStringArray voiceList;

  voiceList.AppendString("default");

  return voiceList;
}

BOOL PTextToSpeech_Festival::SetVoice(const PString & v)
{
  PWaitAndSignal m(mutex);
  voice = v;
  return TRUE;
}

BOOL PTextToSpeech_Festival::SetRate(unsigned v)
{
  rate = v;
  return TRUE;
}

unsigned PTextToSpeech_Festival::GetRate()
{
  return rate;
}

BOOL PTextToSpeech_Festival::SetVolume(unsigned v)
{
  volume = v;
  return TRUE;
}

unsigned PTextToSpeech_Festival::GetVolume()
{
  return volume;
}

BOOL PTextToSpeech_Festival::Invoke(const PString & otext, const PFilePath & fname)
{
  PString text = otext;
  text.Replace('\n', ' ', TRUE);
  text.Replace('\"', '\'', TRUE);
  text.Replace('\\', ' ', TRUE);
  text = "\"" + text + "\"";

  PString cmdLine = "echo " + text + " | ./text2wave -F " + PString(PString::Unsigned, rate) + " -otype riff > " + fname;

#if 1

#ifndef _WIN32_WCE
  system(cmdLine);
#endif

  return TRUE;

#else

  PPipeChannel cmd;
  int code = -1;
  if (!cmd.Open(cmdLine, PPipeChannel::ReadWriteStd)) {
    PTRACE(2, "TTS\tCannot execute command " << cmd);
  } else {
    PTRACE(2, "TTS\tCreating " << fname << " using " << cmdLine);
    cmd.Execute();
    code = cmd.WaitForTermination();
    if (code >= 0) {
      PTRACE(2, "TTS\tdata generated");
    } else {
      PTRACE(2, "TTS\tgeneration failed");
    }
  }

  return code == 0;

#endif
}

// End Of File ///////////////////////////////////////////////////////////////

Generated by  Doxygen 1.6.0   Back to index